3 # PSPP - a program for statistical analysis.
4 # Copyright (C) 2017, 2018, 2019 Free Software Foundation, Inc.
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
28 sys.stderr.write("%s:%d: %s\n" % (file_name, line_number, msg))
40 line = input_file.readline()
45 return s.isdigit() or (s[0] == '-' and s[1].isdigit())
48 xdigits = "0123456789abcdefABCDEF"
58 fatal("syntax error expecting %s" % type)
79 if token == ('eof', ):
80 fatal("unexpected end of input")
88 elif not line[0].isspace():
98 elif line[0] in '[]()?|*':
101 elif line.startswith('=>'):
104 elif line.startswith('...'):
107 elif line[0].isalnum() or line[0] == '-':
109 while n < len(line) and (line[n].isalnum() or line[n] == '-'):
114 if prev[0] == '*' and is_num(s):
115 token = ('number', int(s, 10))
116 elif len(s) == 2 and is_xdigits(s):
117 token = ('bytes', struct.pack('B', int(s, 16)))
118 elif s[0] == 'i' and is_num(s[1:]):
119 token = ('bytes', struct.pack('<i', int(s[1:])))
120 elif s[:2] == 'ib' and is_num(s[2:]):
121 token = ('bytes', struct.pack('>i', int(s[2:])))
123 token = ('nonterminal', s)
124 elif s in ('bool', 'int16', 'int32', 'int64', 'be16', 'be32', 'be64',
125 'string', 'bestring', 'byte', 'float', 'double',
126 'count', 'becount', 'v1', 'v3', 'vAF', 'vB0',
132 fatal("unknown character %c" % line[0])
136 argv0 = os.path.basename(sys.argv[0])
138 %(argv0)s, parser generator for SPV binary members
139 usage: %(argv0)s GRAMMAR header
140 %(argv0)s GRAMMAR code HEADER_NAME
141 where GRAMMAR contains grammar definitions\
142 ''' % {"argv0": argv0})
147 def __init__(self, type_, name, n, content):
151 self.content = content
153 if self.type_ == 'constant':
154 return ' '.join(['%02x' % maybe_ord(x) for x in self.content])
156 return "%s(%s)" % (self.type_, self.content)
167 elif t[0] in ('bool', 'byte',
168 'int16', 'int32', 'int64',
169 'be16', 'be32', 'be64',
170 'string', 'bestring',
172 'nonterminal', '...'):
176 if t[0] == 'nonterminal':
177 name = name_to_id(content[1])
178 elif t[0] in ('v1', 'v3', 'vAF', 'vB0', 'count', 'becount'):
182 content = parse_choice()
188 content = parse_choice()
192 fatal('syntax error expecting item')
197 if token[0] == 'number':
205 if n.startswith('n-'):
208 fatal('expecting quantity')
214 if type_ == 'constant' and not optional:
215 fatal("%s: cannot name a constant" % token[1])
221 if type_ == 'constant':
225 item = Item(type_, name, n, content)
227 item = Item('|', None, 1, [[item], []])
231 def parse_concatenation():
233 while token[0] not in (')', ';', '|', 'eof'):
235 if (item.type_ == 'constant'
237 and items[-1].type_ == 'constant'):
238 items[-1].content += item.content
245 sub = parse_concatenation()
251 choices.append(parse_concatenation())
253 return [Item('|', None, 1, choices)]
264 items = parse_concatenation()
267 or items[0].type_ != 'constant'
268 or len(items[0].content) != 1):
269 fatal("choice must begin with xx (or 'else')")
270 choice = '%02x' % maybe_ord(items[0].content)
272 if choice in choices:
273 fatal("duplicate choice %s" % choice)
274 choices[choice] = items
287 return Item('case', case_name, 1,
288 { '%s_%s' % (case_name, k) : v for k, v in choices.items() })
291 def parse_production():
292 expect('nonterminal')
296 return name, parse_choice()
299 def print_members(p, indent):
301 if item.type_ == 'variable' and item.name:
302 if item.content[0] == 'nonterminal':
303 typename = 'struct %s%s' % (prefix,
304 name_to_id(item.content[1]))
307 c_types = {'bool': ('bool', 0),
308 'byte': ('uint8_t', 0),
309 'int16': ('uint16_t', 0),
310 'int32': ('uint32_t', 0),
311 'int64': ('uint64_t', 0),
312 'be16': ('uint16_t', 0),
313 'be32': ('uint32_t', 0),
314 'be64': ('uint64_t', 0),
315 'string': ('char', 1),
316 'bestring': ('char', 1),
317 'float': ('double', 0),
318 'double': ('double', 0),
319 '...': ('uint8_t', 1)}
320 typename, n_stars = c_types[item.content[0]]
324 if isinstance(item.n, int):
326 array_suffix = '[%d]' % item.n
330 print("%s%s %s%s%s;" % (indent, typename, '*' * n_stars,
331 name_to_id(item.name),
333 elif item.type_ in ('v1', 'v3', 'vAF', 'vB0',
334 'count', 'becount', '()'):
335 print_members(item.content, indent)
336 elif item.type_ == '|':
337 for choice in item.content:
338 print_members(choice, indent)
339 elif item.type_ == 'case':
340 print("%sint %s;" % (indent, item.name))
341 print("%sunion {" % indent)
342 for name, choice in sorted(item.content.items()):
343 print("%s struct {" % indent)
344 print_members(choice, indent + ' ' * 8)
345 print("%s } %s;" % (indent, name))
346 print("%s};" % indent)
347 elif item.type_ == 'constant':
349 print("%sbool %s;" % (indent, item.name))
350 elif item.type_ not in ("constant", "variable"):
351 fatal("unhandled type %s" % item.type_)
356 In Python 2, the elements of byte strings b'asdf' are char.
357 In Python 3, the elements are int.
358 This converts chars to ints.
360 return x if type(x) is int else ord(x)
364 return ''.join(['"'] + ["\\x%02x" % maybe_ord(x) for x in s] + ['"'])
367 class Parser_Context(object):
371 self.need_error_handler = False
372 def gen_name(self, prefix):
373 n = self.suffixes.get(prefix, 0) + 1
374 self.suffixes[prefix] = n
375 return '%s%d' % (prefix, n) if n > 1 else prefix
376 def save_pos(self, indent):
377 pos = self.gen_name('pos')
378 print("%sstruct spvbin_position %s = spvbin_position_save (input);" % (indent, pos))
380 def save_error(self, indent):
381 error = self.gen_name('save_n_errors')
382 print("%ssize_t %s = input->n_errors;" % (indent, error))
384 def parse_limit(self, endian, indent):
385 limit = self.gen_name('saved_limit')
387 %sstruct spvbin_limit %s;
388 %sif (!spvbin_limit_parse%s (&%s, input))
391 indent, '_be' if endian == 'big' else '', limit,
396 def print_parser_items(name, production, indent, accessor, ctx):
397 for item_idx in range(len(production)):
401 item = production[item_idx]
402 if item.type_ == 'constant':
403 print("""%sif (!spvbin_match_bytes (input, %s, %d))
405 indent, bytes_to_hex(item.content), len(item.content),
407 ctx.need_error_handler = True
409 print("%sp->%s = true;" % (indent, item.name))
410 elif item.type_ == 'variable':
411 if item.content[0] == 'nonterminal':
412 func = '%sparse_%s' % (prefix, name_to_id(item.content[1]))
414 func = 'spvbin_parse_%s' % item.content[0]
417 dst = "&p->%s%s" % (accessor, name_to_id(item.name))
421 print("""%sif (!%s (input, %s))
422 %s goto %s;""" % (indent, func, dst,
425 if item.content[0] != 'nonterminal' and item.name == 'version':
426 print("%sinput->version = p->%s%s;" % (
427 indent, accessor, name_to_id(item.name)))
429 if isinstance(item.n, int):
432 count = 'p->%s%s' % (accessor, name_to_id(item.n))
434 i_name = ctx.gen_name('i')
436 if not isinstance(item.n, int):
437 print("%sp->%s%s = xcalloc (%s, sizeof *p->%s%s);" % (
439 accessor, name_to_id(item.name), count,
440 accessor, name_to_id(item.name)))
441 dst += '[%s]' % i_name
442 print("%sfor (int %s = 0; %s < %s; %s++)" % (
443 indent, i_name, i_name, count, i_name))
444 print("""%s if (!%s (input, %s))
445 %s goto %s;""" % (indent, func, dst,
448 ctx.need_error_handler = True
449 elif item.type_ == '()':
451 # Not yet implemented
454 print_parser_items(name, item.content, indent, accessor, ctx)
455 elif item.type_ in ('v1', 'v3', 'vAF', 'vB0'):
457 # Not yet implemented
460 print("%sif (input->version == 0x%s) {" % (indent, item.type_[1:]))
461 print_parser_items(name, item.content, indent + ' ', accessor, ctx)
462 print("%s}" % indent)
463 elif item.type_ in ('count', 'becount'):
465 # Not yet implemented
468 pos = ctx.save_pos(indent)
469 endian = 'big' if item.type_ == 'becount' else 'little'
470 limit = ctx.parse_limit(endian, indent)
473 ctx.bail = ctx.gen_name('backtrack')
475 print("%sdo {" % indent)
478 and item.content[-1].type_ == 'variable'
479 and item.content[-1].content[0] == '...'):
480 content = item.content[:-1]
483 content = item.content
485 print_parser_items(name, content, indent, accessor, ctx)
488 print("%sinput->ofs = input->size;" % indent)
490 print("""%sif (!spvbin_input_at_end (input))
491 %s goto %s;""" % (indent,
493 print('%sspvbin_limit_pop (&%s, input);' % (indent, limit))
494 print('%sbreak;' % indent)
496 print('%s%s:' % (indent[4:], ctx.bail))
497 # In theory, we should emit code to clear whatever we're
498 # backtracking from. In practice, it's not important to
500 print("%sspvbin_position_restore (&%s, input);" % (indent, pos))
501 print('%sspvbin_limit_pop (&%s, input);' % (indent, limit))
502 print('%sgoto %s;' % (indent, save_bail))
504 print("%s} while (0);" % indent)
507 elif item.type_ == '|':
510 print("%sdo {" % indent)
512 pos = ctx.save_pos(indent)
513 error = ctx.save_error(indent)
515 for choice in item.content:
517 print("%sspvbin_position_restore (&%s, input);" % (indent, pos))
518 print("%sinput->n_errors = %s;" % (indent, error))
521 if i != len(item.content):
522 ctx.bail = ctx.gen_name('backtrack')
525 print_parser_items(name, choice, indent, accessor, ctx)
526 print("%sbreak;" % indent)
527 if i != len(item.content):
529 print('%s%s:' % (indent[4:], ctx.bail))
530 # In theory, we should emit code to clear whatever we're
531 # backtracking from. In practice, it's not important to
534 print("%s} while (0);" % indent)
535 elif item.type_ == 'case':
537 for choice_name, choice in sorted(item.content.items()):
538 if choice_name.endswith('else'):
539 print("%s} else {" % indent)
540 print("%s p->%s%s = -1;"
541 % (indent, accessor, item.name))
544 print("%s%sif (spvbin_match_byte (input, 0x%s)) {" % (
545 indent, '} else ' if i else '', choice_name[-2:]))
546 print("%s p->%s%s = 0x%s;" % (
547 indent, accessor, item.name, choice_name[-2:]))
551 print_parser_items(name, choice, indent + ' ',
552 accessor + choice_name + '.', ctx)
554 print("%s}" % indent)
560 def print_parser(name, production, indent):
563 %(prefix)sparse_%(name)s (struct spvbin_input *input, struct %(prefix)s%(name)s **p_)
566 struct %(prefix)s%(name)s *p = xzalloc (sizeof *p);
567 p->start = input->ofs;
568 ''' % {'prefix': prefix,
569 'name': name_to_id(name)})
571 ctx = Parser_Context()
572 print_parser_items(name, production, indent, '', ctx)
575 p->len = input->ofs - p->start;
579 if ctx.need_error_handler:
582 spvbin_error (input, "%s", p->start);
584 return false;""" % (name, prefix, name_to_id(name)))
588 def print_free_items(name, production, indent, accessor, ctx):
589 for item in production:
590 if item.type_ == 'constant':
592 elif item.type_ == 'variable':
596 if item.content[0] == 'nonterminal':
597 free_func = '%sfree_%s' % (prefix, name_to_id(item.content[1]))
598 elif item.content[0] in ('string', 'bestring', '...'):
603 dst = "p->%s%s" % (accessor, name_to_id(item.name))
607 print("%s%s (%s);" % (indent, free_func, dst))
609 if isinstance(item.n, int):
612 count = 'p->%s%s' % (accessor, name_to_id(item.n))
614 i_name = ctx.gen_name('i')
616 print("%sfor (int %s = 0; %s < %s; %s++)" % (
617 indent, i_name, i_name, count, i_name))
618 print("%s %s (%s[%s]);" % (
619 indent, free_func, dst, i_name))
620 if not isinstance(item.n, int):
621 print("%sfree (p->%s%s);" % (
622 indent, accessor, name_to_id(item.name)))
623 elif item.type_ in ('()', 'v1', 'v3', 'vAF', 'vB0',
626 # Not yet implemented
629 print_free_items(name, item.content, indent, accessor, ctx)
630 elif item.type_ == '|':
631 for choice in item.content:
632 print_free_items(name, choice, indent, accessor, ctx)
633 elif item.type_ == 'case':
635 for choice_name, choice in sorted(item.content.items()):
636 if choice_name.endswith('else'):
639 value_name = '0x%s' % choice_name[-2:]
641 print('%s%sif (p->%s%s == %s) {' % (
642 indent, '} else ' if i else '', accessor, item.name,
645 print_free_items(name, choice, indent + ' ',
646 accessor + choice_name + '.', ctx)
648 print("%s}" % indent)
653 def print_free(name, production, indent):
656 %(prefix)sfree_%(name)s (struct %(prefix)s%(name)s *p)
660 ''' % {'prefix': prefix,
661 'name': name_to_id(name)})
663 print_free_items(name, production, indent, '', Parser_Context())
668 def print_print_items(name, production, indent, accessor, ctx):
669 for item_idx in range(len(production)):
673 item = production[item_idx]
674 if item.type_ == 'constant':
676 print('%sspvbin_print_presence ("%s", indent + 1, p->%s);' % (
677 indent, item.name, item.name))
678 elif item.type_ == 'variable':
682 if item.content[0] == 'nonterminal':
683 func = '%sprint_%s' % (prefix, name_to_id(item.content[1]))
685 c_types = {'bool': 'bool',
694 'bestring': 'string',
697 '...': ('uint8_t', 1)}
698 func = 'spvbin_print_%s' % c_types[item.content[0]]
700 dst = "p->%s%s" % (accessor, name_to_id(item.name))
702 print('%s%s ("%s", indent + 1, %s);' % (indent, func,
705 if isinstance(item.n, int):
708 count = 'p->%s%s' % (accessor, name_to_id(item.n))
710 i_name = ctx.gen_name('i')
711 elem_name = ctx.gen_name('elem_name')
712 dst += '[%s]' % i_name
714 %(indent)sfor (int %(index)s = 0; %(index)s < %(count)s; %(index)s++) {
715 %(indent)s char *%(elem_name)s = xasprintf ("%(item.name)s[%%d]", %(index)s);
716 %(indent)s %(func)s (%(elem_name)s, indent + 1, %(dst)s);
717 %(indent)s free (%(elem_name)s);
718 %(indent)s}""" % {'indent': indent,
721 'elem_name' : elem_name,
722 'item.name': item.name,
725 elif item.type_ == '()':
727 # Not yet implemented
730 print_print_items(name, item.content, indent, accessor, ctx)
731 elif item.type_ in ('v1', 'v3', 'vAF', 'vB0'):
733 # Not yet implemented
736 print_print_items(name, item.content, indent, accessor, ctx)
737 elif item.type_ in ('count', 'becount'):
739 # Not yet implemented
744 and item.content[-1].type_ == 'variable'
745 and item.content[-1].content[0] == '...'):
746 content = item.content[:-1]
748 content = item.content
749 print_print_items(name, content, indent, accessor, ctx)
750 elif item.type_ == '|':
751 for choice in item.content:
752 print_print_items(name, choice, indent, accessor, ctx)
753 elif item.type_ == 'case':
756 %sspvbin_print_case ("%s", indent + 1, p->%s%s);""" % (
757 indent, item.name, accessor, name_to_id(item.name)))
758 for choice_name, choice in sorted(item.content.items()):
759 if choice_name.endswith('else'):
762 value_name = '0x%s' % choice_name[-2:]
764 print('%s%sif (p->%s%s == %s) {' % (
765 indent, '} else ' if i else '', accessor, item.name,
768 print_print_items(name, choice, indent + ' ',
769 accessor + choice_name + '.', ctx)
771 print("%s}" % indent)
777 def print_print(name, production, indent):
780 %(prefix)sprint_%(name)s (const char *title, int indent, const struct %(prefix)s%(name)s *p)
782 spvbin_print_header (title, p ? p->start : -1, p ? p->len : -1, indent);
788 ''' % {'prefix': prefix,
790 'name': name_to_id(name)})
792 ctx = Parser_Context()
793 print_print_items(name, production, indent, '', ctx)
798 return s[0].lower() + ''.join(['_%c' % x.lower() if x.isupper() else x
799 for x in s[1:]]).replace('-', '_')
802 if __name__ == "__main__":
805 options, args = getopt.gnu_getopt(sys.argv[1:], 'h', ['help'])
806 except getopt.GetoptError as e:
807 sys.stderr.write("%s: %s\n" % (argv0, e.msg))
810 for key, value in options:
811 if key in ['-h', '--help']:
817 sys.stderr.write("%s: bad usage (use --help for help)\n" % argv0)
821 file_name, output_type, prefix = args[:3]
822 input_file = open(file_name)
824 prefix = '%s_' % prefix
839 if token[0] == 'eof':
842 name, production = parse_production()
843 if name in productions:
844 fatal("%s: duplicate production" % name)
845 productions[name] = production
847 print('/* Generated automatically -- do not modify! -*- buffer-read-only: t -*- */')
848 if output_type == 'code' and len(args) == 4:
849 header_name = args[3]
856 #include "libpspp/str.h"
857 #include "gl/xalloc.h"\
859 for name, production in productions.items():
860 print_parser(name, production, ' ' * 4)
861 print_free(name, production, ' ' * 4)
862 print_print(name, production, ' ' * 4)
863 elif output_type == 'header' and len(args) == 3:
865 #ifndef %(PREFIX)sPARSER_H
866 #define %(PREFIX)sPARSER_H
871 #include "output/spv/spvbin-helpers.h"\
872 """ % {'PREFIX': prefix.upper()})
873 for name, production in productions.items():
874 print('\nstruct %s%s {' % (prefix, name_to_id(name)))
875 print(" size_t start, len;")
876 print_members(production, ' ' * 4)
878 bool %(prefix)sparse_%(name)s (struct spvbin_input *, struct %(prefix)s%(name)s **);
879 void %(prefix)sfree_%(name)s (struct %(prefix)s%(name)s *);
880 void %(prefix)sprint_%(name)s (const char *title, int indent, const struct %(prefix)s%(name)s *);\
881 ''' % {'prefix': prefix,
882 'name': name_to_id(name)})
885 #endif /* %(PREFIX)sPARSER_H */""" % {'PREFIX': prefix.upper()})
887 sys.stderr.write("%s: bad usage (use --help for help)" % argv0)