3 # PSPP - a program for statistical analysis.
4 # Copyright (C) 2017, 2018, 2019 Free Software Foundation, Inc.
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
28 sys.stderr.write("%s:%d: %s\n" % (file_name, line_number, msg))
40 line = input_file.readline()
45 return s.isdigit() or (s[0] == '-' and s[1].isdigit())
48 xdigits = "0123456789abcdefABCDEF"
58 fatal("syntax error expecting %s" % type)
79 if token == ('eof', ):
80 fatal("unexpected end of input")
88 elif not line[0].isspace():
98 elif line[0] in '[]()?|*':
101 elif line.startswith('=>'):
104 elif line.startswith('...'):
107 elif line[0].isalnum() or line[0] == '-':
109 while n < len(line) and (line[n].isalnum() or line[n] == '-'):
114 if prev[0] == '*' and is_num(s):
115 token = ('number', int(s, 10))
116 elif len(s) == 2 and is_xdigits(s):
117 token = ('bytes', struct.pack('B', int(s, 16)))
118 elif s[0] == 'i' and is_num(s[1:]):
119 token = ('bytes', struct.pack('<i', int(s[1:])))
120 elif s[:2] == 'ib' and is_num(s[2:]):
121 token = ('bytes', struct.pack('>i', int(s[2:])))
123 token = ('nonterminal', s)
124 elif s in ('bool', 'int16', 'int32', 'int64', 'be16', 'be32', 'be64',
125 'string', 'bestring', 'byte', 'float', 'double',
126 'count', 'becount', 'v1', 'v3', 'vAF', 'vB0',
132 fatal("unknown character %c" % line[0])
136 argv0 = os.path.basename(sys.argv[0])
138 %(argv0)s, parser generator for SPV binary members
139 usage: %(argv0)s GRAMMAR header
140 %(argv0)s GRAMMAR code HEADER_NAME
141 where GRAMMAR contains grammar definitions\
142 ''' % {"argv0": argv0})
147 def __init__(self, type_, name, n, content):
151 self.content = content
153 if self.type_ == 'constant':
154 return ' '.join(['%02x' % ord(x) for x in self.content])
156 return "%s(%s)" % (self.type_, self.content)
167 elif t[0] in ('bool', 'byte',
168 'int16', 'int32', 'int64',
169 'be16', 'be32', 'be64',
170 'string', 'bestring',
172 'nonterminal', '...'):
176 if t[0] == 'nonterminal':
177 name = name_to_id(content[1])
178 elif t[0] in ('v1', 'v3', 'vAF', 'vB0', 'count', 'becount'):
182 content = parse_choice()
188 content = parse_choice()
192 fatal('syntax error expecting item')
197 if token[0] == 'number':
205 if n.startswith('n-'):
208 fatal('expecting quantity')
214 if type_ == 'constant' and not optional:
215 fatal("%s: cannot name a constant" % token[1])
221 if type_ == 'constant':
225 item = Item(type_, name, n, content)
227 item = Item('|', None, 1, [[item], []])
231 def parse_concatenation():
233 while token[0] not in (')', ';', '|', 'eof'):
235 if (item.type_ == 'constant'
237 and items[-1].type_ == 'constant'):
238 items[-1].content += item.content
245 sub = parse_concatenation()
251 choices.append(parse_concatenation())
253 return [Item('|', None, 1, choices)]
264 items = parse_concatenation()
267 or items[0].type_ != 'constant'
268 or len(items[0].content) != 1):
269 fatal("choice must begin with xx (or 'else')")
270 choice = '%02x' % ord(items[0].content)
272 if choice in choices:
273 fatal("duplicate choice %s" % choice)
274 choices[choice] = items
287 return Item('case', case_name, 1,
288 { '%s_%s' % (case_name, k) : v for k, v in choices.items() })
291 def parse_production():
292 expect('nonterminal')
296 return name, parse_choice()
299 def print_members(p, indent):
301 if item.type_ == 'variable' and item.name:
302 if item.content[0] == 'nonterminal':
303 typename = 'struct %s%s' % (prefix,
304 name_to_id(item.content[1]))
307 c_types = {'bool': ('bool', 0),
308 'byte': ('uint8_t', 0),
309 'int16': ('uint16_t', 0),
310 'int32': ('uint32_t', 0),
311 'int64': ('uint64_t', 0),
312 'be16': ('uint16_t', 0),
313 'be32': ('uint32_t', 0),
314 'be64': ('uint64_t', 0),
315 'string': ('char', 1),
316 'bestring': ('char', 1),
317 'float': ('double', 0),
318 'double': ('double', 0),
319 '...': ('uint8_t', 1)}
320 typename, n_stars = c_types[item.content[0]]
324 if isinstance(item.n, int):
326 array_suffix = '[%d]' % item.n
330 print "%s%s %s%s%s;" % (indent, typename, '*' * n_stars,
331 name_to_id(item.name),
333 elif item.type_ in ('v1', 'v3', 'vAF', 'vB0',
334 'count', 'becount', '()'):
335 print_members(item.content, indent)
336 elif item.type_ == '|':
337 for choice in item.content:
338 print_members(choice, indent)
339 elif item.type_ == 'case':
340 print "%sint %s;" % (indent, item.name)
341 print "%sunion {" % indent
342 for name, choice in sorted(item.content.items()):
343 print "%s struct {" % indent
344 print_members(choice, indent + ' ' * 8)
345 print "%s } %s;" % (indent, name)
346 print "%s};" % indent
347 elif item.type_ == 'constant':
349 print "%sbool %s;" % (indent, item.name)
350 elif item.type_ not in ("constant", "variable"):
351 fatal("unhandled type %s" % item.type_)
355 return ''.join(['"'] + ["\\x%02x" % ord(x) for x in s] + ['"'])
358 class Parser_Context(object):
362 self.need_error_handler = False
363 def gen_name(self, prefix):
364 n = self.suffixes.get(prefix, 0) + 1
365 self.suffixes[prefix] = n
366 return '%s%d' % (prefix, n) if n > 1 else prefix
367 def save_pos(self, indent):
368 pos = self.gen_name('pos')
369 print "%sstruct spvbin_position %s = spvbin_position_save (input);" % (indent, pos)
371 def save_error(self, indent):
372 error = self.gen_name('save_n_errors')
373 print "%ssize_t %s = input->n_errors;" % (indent, error)
375 def parse_limit(self, endian, indent):
376 limit = self.gen_name('saved_limit')
378 %sstruct spvbin_limit %s;
379 %sif (!spvbin_limit_parse%s (&%s, input))
382 indent, '_be' if endian == 'big' else '', limit,
387 def print_parser_items(name, production, indent, accessor, ctx):
388 for item_idx in range(len(production)):
392 item = production[item_idx]
393 if item.type_ == 'constant':
394 print """%sif (!spvbin_match_bytes (input, %s, %d))
396 indent, bytes_to_hex(item.content), len(item.content),
398 ctx.need_error_handler = True
400 print "%sp->%s = true;" % (indent, item.name)
401 elif item.type_ == 'variable':
402 if item.content[0] == 'nonterminal':
403 func = '%sparse_%s' % (prefix, name_to_id(item.content[1]))
405 func = 'spvbin_parse_%s' % item.content[0]
408 dst = "&p->%s%s" % (accessor, name_to_id(item.name))
412 print """%sif (!%s (input, %s))
413 %s goto %s;""" % (indent, func, dst,
416 if item.content[0] != 'nonterminal' and item.name == 'version':
417 print "%sinput->version = p->%s%s;" % (
418 indent, accessor, name_to_id(item.name))
420 if isinstance(item.n, int):
423 count = 'p->%s%s' % (accessor, name_to_id(item.n))
425 i_name = ctx.gen_name('i')
427 if not isinstance(item.n, int):
428 print "%sp->%s%s = xcalloc (%s, sizeof *p->%s%s);" % (
430 accessor, name_to_id(item.name), count,
431 accessor, name_to_id(item.name))
432 dst += '[%s]' % i_name
433 print "%sfor (int %s = 0; %s < %s; %s++)" % (
434 indent, i_name, i_name, count, i_name)
435 print """%s if (!%s (input, %s))
436 %s goto %s;""" % (indent, func, dst,
439 ctx.need_error_handler = True
440 elif item.type_ == '()':
442 # Not yet implemented
445 print_parser_items(name, item.content, indent, accessor, ctx)
446 elif item.type_ in ('v1', 'v3', 'vAF', 'vB0'):
448 # Not yet implemented
451 print "%sif (input->version == 0x%s) {" % (indent, item.type_[1:])
452 print_parser_items(name, item.content, indent + ' ', accessor, ctx)
454 elif item.type_ in ('count', 'becount'):
456 # Not yet implemented
459 pos = ctx.save_pos(indent)
460 endian = 'big' if item.type_ == 'becount' else 'little'
461 limit = ctx.parse_limit(endian, indent)
464 ctx.bail = ctx.gen_name('backtrack')
466 print "%sdo {" % indent
469 and item.content[-1].type_ == 'variable'
470 and item.content[-1].content[0] == '...'):
471 content = item.content[:-1]
474 content = item.content
476 print_parser_items(name, content, indent, accessor, ctx)
479 print "%sinput->ofs = input->size;" % indent
481 print """%sif (!spvbin_input_at_end (input))
482 %s goto %s;""" % (indent,
484 print '%sspvbin_limit_pop (&%s, input);' % (indent, limit)
485 print '%sbreak;' % indent
487 print '%s%s:' % (indent[4:], ctx.bail)
488 # In theory, we should emit code to clear whatever we're
489 # backtracking from. In practice, it's not important to
491 print "%sspvbin_position_restore (&%s, input);" % (indent, pos)
492 print '%sspvbin_limit_pop (&%s, input);' % (indent, limit)
493 print '%sgoto %s;' % (indent, save_bail)
495 print "%s} while (0);" % indent
498 elif item.type_ == '|':
501 print "%sdo {" % indent
503 pos = ctx.save_pos(indent)
504 error = ctx.save_error(indent)
506 for choice in item.content:
508 print "%sspvbin_position_restore (&%s, input);" % (indent, pos)
509 print "%sinput->n_errors = %s;" % (indent, error)
512 if i != len(item.content):
513 ctx.bail = ctx.gen_name('backtrack')
516 print_parser_items(name, choice, indent, accessor, ctx)
517 print "%sbreak;" % indent
518 if i != len(item.content):
520 print '%s%s:' % (indent[4:], ctx.bail)
521 # In theory, we should emit code to clear whatever we're
522 # backtracking from. In practice, it's not important to
525 print "%s} while (0);" % indent
526 elif item.type_ == 'case':
528 for choice_name, choice in sorted(item.content.items()):
529 if choice_name.endswith('else'):
530 print "%s} else {" % indent
531 print "%s p->%s%s = -1;" % (indent, accessor, item.name)
534 print "%s%sif (spvbin_match_byte (input, 0x%s)) {" % (
535 indent, '} else ' if i else '', choice_name[-2:])
536 print "%s p->%s%s = 0x%s;" % (
537 indent, accessor, item.name, choice_name[-2:])
541 print_parser_items(name, choice, indent + ' ',
542 accessor + choice_name + '.', ctx)
550 def print_parser(name, production, indent):
553 %(prefix)sparse_%(name)s (struct spvbin_input *input, struct %(prefix)s%(name)s **p_)
556 struct %(prefix)s%(name)s *p = xzalloc (sizeof *p);
557 p->start = input->ofs;
558 ''' % {'prefix': prefix,
559 'name': name_to_id(name)}
561 ctx = Parser_Context()
562 print_parser_items(name, production, indent, '', ctx)
565 p->len = input->ofs - p->start;
569 if ctx.need_error_handler:
572 spvbin_error (input, "%s", p->start);
574 return false;""" % (name, prefix, name_to_id(name))
578 def print_free_items(name, production, indent, accessor, ctx):
579 for item in production:
580 if item.type_ == 'constant':
582 elif item.type_ == 'variable':
586 if item.content[0] == 'nonterminal':
587 free_func = '%sfree_%s' % (prefix, name_to_id(item.content[1]))
588 elif item.content[0] in ('string', 'bestring', '...'):
593 dst = "p->%s%s" % (accessor, name_to_id(item.name))
597 print "%s%s (%s);" % (indent, free_func, dst)
599 if isinstance(item.n, int):
602 count = 'p->%s%s' % (accessor, name_to_id(item.n))
604 i_name = ctx.gen_name('i')
606 print "%sfor (int %s = 0; %s < %s; %s++)" % (
607 indent, i_name, i_name, count, i_name)
608 print "%s %s (%s[%s]);" % (
609 indent, free_func, dst, i_name)
610 if not isinstance(item.n, int):
611 print "%sfree (p->%s%s);" % (
612 indent, accessor, name_to_id(item.name))
613 elif item.type_ in ('()', 'v1', 'v3', 'vAF', 'vB0',
616 # Not yet implemented
619 print_free_items(name, item.content, indent, accessor, ctx)
620 elif item.type_ == '|':
621 for choice in item.content:
622 print_free_items(name, choice, indent, accessor, ctx)
623 elif item.type_ == 'case':
625 for choice_name, choice in sorted(item.content.items()):
626 if choice_name.endswith('else'):
629 value_name = '0x%s' % choice_name[-2:]
631 print '%s%sif (p->%s%s == %s) {' % (
632 indent, '} else ' if i else '', accessor, item.name,
635 print_free_items(name, choice, indent + ' ',
636 accessor + choice_name + '.', ctx)
643 def print_free(name, production, indent):
646 %(prefix)sfree_%(name)s (struct %(prefix)s%(name)s *p)
650 ''' % {'prefix': prefix,
651 'name': name_to_id(name)}
653 print_free_items(name, production, indent, '', Parser_Context())
658 def print_print_items(name, production, indent, accessor, ctx):
659 for item_idx in range(len(production)):
663 item = production[item_idx]
664 if item.type_ == 'constant':
666 print '%sspvbin_print_presence ("%s", indent + 1, p->%s);' % (
667 indent, item.name, item.name)
668 elif item.type_ == 'variable':
672 if item.content[0] == 'nonterminal':
673 func = '%sprint_%s' % (prefix, name_to_id(item.content[1]))
675 c_types = {'bool': 'bool',
684 'bestring': 'string',
687 '...': ('uint8_t', 1)}
688 func = 'spvbin_print_%s' % c_types[item.content[0]]
690 dst = "p->%s%s" % (accessor, name_to_id(item.name))
692 print '%s%s ("%s", indent + 1, %s);' % (indent, func,
695 if isinstance(item.n, int):
698 count = 'p->%s%s' % (accessor, name_to_id(item.n))
700 i_name = ctx.gen_name('i')
701 elem_name = ctx.gen_name('elem_name')
702 dst += '[%s]' % i_name
704 %(indent)sfor (int %(index)s = 0; %(index)s < %(count)s; %(index)s++) {
705 %(indent)s char *%(elem_name)s = xasprintf ("%(item.name)s[%%d]", %(index)s);
706 %(indent)s %(func)s (%(elem_name)s, indent + 1, %(dst)s);
707 %(indent)s free (%(elem_name)s);
708 %(indent)s}""" % {'indent': indent,
711 'elem_name' : elem_name,
712 'item.name': item.name,
715 elif item.type_ == '()':
717 # Not yet implemented
720 print_print_items(name, item.content, indent, accessor, ctx)
721 elif item.type_ in ('v1', 'v3', 'vAF', 'vB0'):
723 # Not yet implemented
726 print_print_items(name, item.content, indent, accessor, ctx)
727 elif item.type_ in ('count', 'becount'):
729 # Not yet implemented
734 and item.content[-1].type_ == 'variable'
735 and item.content[-1].content[0] == '...'):
736 content = item.content[:-1]
738 content = item.content
739 print_print_items(name, content, indent, accessor, ctx)
740 elif item.type_ == '|':
741 for choice in item.content:
742 print_print_items(name, choice, indent, accessor, ctx)
743 elif item.type_ == 'case':
746 %sspvbin_print_case ("%s", indent + 1, p->%s%s);""" % (
747 indent, item.name, accessor, name_to_id(item.name))
748 for choice_name, choice in sorted(item.content.items()):
749 if choice_name.endswith('else'):
752 value_name = '0x%s' % choice_name[-2:]
754 print '%s%sif (p->%s%s == %s) {' % (
755 indent, '} else ' if i else '', accessor, item.name,
758 print_print_items(name, choice, indent + ' ',
759 accessor + choice_name + '.', ctx)
767 def print_print(name, production, indent):
770 %(prefix)sprint_%(name)s (const char *title, int indent, const struct %(prefix)s%(name)s *p)
772 spvbin_print_header (title, p ? p->start : -1, p ? p->len : -1, indent);
778 ''' % {'prefix': prefix,
780 'name': name_to_id(name)}
782 ctx = Parser_Context()
783 print_print_items(name, production, indent, '', ctx)
788 return s[0].lower() + ''.join(['_%c' % x.lower() if x.isupper() else x
789 for x in s[1:]]).replace('-', '_')
792 if __name__ == "__main__":
795 options, args = getopt.gnu_getopt(sys.argv[1:], 'h', ['help'])
796 except getopt.GetoptError as e:
797 sys.stderr.write("%s: %s\n" % (argv0, e.msg))
800 for key, value in options:
801 if key in ['-h', '--help']:
807 sys.stderr.write("%s: bad usage (use --help for help)\n" % argv0)
811 file_name, output_type, prefix = args[:3]
812 input_file = open(file_name)
814 prefix = '%s_' % prefix
829 if token[0] == 'eof':
832 name, production = parse_production()
833 if name in productions:
834 fatal("%s: duplicate production" % name)
835 productions[name] = production
837 print '/* Generated automatically -- do not modify! -*- buffer-read-only: t -*- */'
838 if output_type == 'code' and len(args) == 4:
839 header_name = args[3]
846 #include "libpspp/str.h"
847 #include "gl/xalloc.h"\
849 for name, production in productions.items():
850 print_parser(name, production, ' ' * 4)
851 print_free(name, production, ' ' * 4)
852 print_print(name, production, ' ' * 4)
853 elif output_type == 'header' and len(args) == 3:
855 #ifndef %(PREFIX)sPARSER_H
856 #define %(PREFIX)sPARSER_H
861 #include "output/spv/spvbin-helpers.h"\
862 """ % {'PREFIX': prefix.upper()}
863 for name, production in productions.items():
864 print '\nstruct %s%s {' % (prefix, name_to_id(name))
865 print " size_t start, len;"
866 print_members(production, ' ' * 4)
868 bool %(prefix)sparse_%(name)s (struct spvbin_input *, struct %(prefix)s%(name)s **);
869 void %(prefix)sfree_%(name)s (struct %(prefix)s%(name)s *);
870 void %(prefix)sprint_%(name)s (const char *title, int indent, const struct %(prefix)s%(name)s *);\
871 ''' % {'prefix': prefix,
872 'name': name_to_id(name)}
875 #endif /* %(PREFIX)sPARSER_H */""" % {'PREFIX': prefix.upper()}
877 sys.stderr.write("%s: bad usage (use --help for help)" % argv0)