3 # PSPP - a program for statistical analysis.
4 # Copyright (C) 2017, 2018, 2019 Free Software Foundation, Inc.
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
28 sys.stderr.write("%s:%d: %s\n" % (file_name, line_number, msg))
40 line = input_file.readline()
45 return s.isdigit() or (s[0] == '-' and s[1].isdigit())
48 xdigits = "0123456789abcdefABCDEF"
58 fatal("syntax error expecting %s" % type)
79 if token == ('eof', ):
80 fatal("unexpected end of input")
88 elif not line[0].isspace():
98 elif line[0] in '[]()?|*':
101 elif line.startswith('=>'):
104 elif line.startswith('...'):
107 elif line.startswith('"'):
109 while n < len(line) and (line[n] != '"'):
111 s = line[1:n].encode()
113 token = ('bytes', struct.pack('<h', len(s)) + s)
114 elif line[0].isalnum() or line[0] == '-':
116 while n < len(line) and (line[n].isalnum() or line[n] == '-'):
121 if prev[0] == '*' and is_num(s):
122 token = ('number', int(s, 10))
123 elif len(s) == 2 and is_xdigits(s):
124 token = ('bytes', struct.pack('B', int(s, 16)))
125 elif s[0] == 'i' and is_num(s[1:]):
126 token = ('bytes', struct.pack('<i', int(s[1:])))
127 elif s[:2] == 'ib' and is_num(s[2:]):
128 token = ('bytes', struct.pack('>i', int(s[2:])))
130 token = ('nonterminal', s)
131 elif s in ('bool', 'int16', 'int32', 'int64', 'be16', 'be32', 'be64',
132 'string', 'bestring', 'byte', 'float', 'double',
133 'count', 'becount', 'v1', 'v3', 'vAF', 'vB0',
139 fatal("unknown character %c" % line[0])
143 argv0 = os.path.basename(sys.argv[0])
145 %(argv0)s, parser generator for SPV binary members
146 usage: %(argv0)s GRAMMAR header PREFIX
147 %(argv0)s GRAMMAR code PREFIX HEADER_NAME
148 where GRAMMAR contains grammar definitions,
149 PREFIX is the identifier prefix to use,
150 and HEADER_NAME is the name of the header to include.
151 ''' % {"argv0": argv0})
156 def __init__(self, type_, name, n, content):
160 self.content = content
162 if self.type_ == 'constant':
163 return ' '.join(['%02x' % maybe_ord(x) for x in self.content])
165 return "%s(%s)" % (self.type_, self.content)
176 elif t[0] in ('bool', 'byte',
177 'int16', 'int32', 'int64',
178 'be16', 'be32', 'be64',
179 'string', 'bestring',
181 'nonterminal', '...'):
185 if t[0] == 'nonterminal':
186 name = name_to_id(content[1])
187 elif t[0] in ('v1', 'v3', 'vAF', 'vB0', 'count', 'becount'):
191 content = parse_choice()
197 content = parse_choice()
201 fatal('syntax error expecting item')
206 if token[0] == 'number':
214 if n.startswith('n-'):
217 fatal('expecting quantity')
223 if type_ == 'constant' and not optional:
224 fatal("%s: cannot name a constant" % token[1])
230 if type_ == 'constant':
234 item = Item(type_, name, n, content)
236 item = Item('|', None, 1, [[item], []])
240 def parse_concatenation():
242 while token[0] not in (')', ';', '|', 'eof'):
244 if (item.type_ == 'constant'
246 and items[-1].type_ == 'constant'):
247 items[-1].content += item.content
254 sub = parse_concatenation()
260 choices.append(parse_concatenation())
262 return [Item('|', None, 1, choices)]
273 items = parse_concatenation()
276 or items[0].type_ != 'constant'
277 or len(items[0].content) != 1):
278 fatal("choice must begin with xx (or 'else')")
279 choice = '%02x' % maybe_ord(items[0].content)
281 if choice in choices:
282 fatal("duplicate choice %s" % choice)
283 choices[choice] = items
296 return Item('case', case_name, 1,
297 { '%s_%s' % (case_name, k) : v for k, v in choices.items() })
300 def parse_production():
301 expect('nonterminal')
305 return name, parse_choice()
308 def print_members(p, indent):
310 if item.type_ == 'variable' and item.name:
311 if item.content[0] == 'nonterminal':
312 typename = 'struct %s%s' % (prefix,
313 name_to_id(item.content[1]))
316 c_types = {'bool': ('bool', 0),
317 'byte': ('uint8_t', 0),
318 'int16': ('uint16_t', 0),
319 'int32': ('uint32_t', 0),
320 'int64': ('uint64_t', 0),
321 'be16': ('uint16_t', 0),
322 'be32': ('uint32_t', 0),
323 'be64': ('uint64_t', 0),
324 'string': ('char', 1),
325 'bestring': ('char', 1),
326 'float': ('double', 0),
327 'double': ('double', 0),
328 '...': ('uint8_t', 1)}
329 typename, n_stars = c_types[item.content[0]]
333 if isinstance(item.n, int):
335 array_suffix = '[%d]' % item.n
339 print("%s%s %s%s%s;" % (indent, typename, '*' * n_stars,
340 name_to_id(item.name),
342 elif item.type_ in ('v1', 'v3', 'vAF', 'vB0',
343 'count', 'becount', '()'):
344 print_members(item.content, indent)
345 elif item.type_ == '|':
346 for choice in item.content:
347 print_members(choice, indent)
348 elif item.type_ == 'case':
349 print("%sint %s;" % (indent, item.name))
350 print("%sunion {" % indent)
351 for name, choice in sorted(item.content.items()):
352 print("%s struct {" % indent)
353 print_members(choice, indent + ' ' * 8)
354 print("%s } %s;" % (indent, name))
355 print("%s};" % indent)
356 elif item.type_ == 'constant':
358 print("%sbool %s;" % (indent, item.name))
359 elif item.type_ not in ("constant", "variable"):
360 fatal("unhandled type %s" % item.type_)
365 In Python 2, the elements of byte strings b'asdf' are char.
366 In Python 3, the elements are int.
367 This converts chars to ints.
369 return x if type(x) is int else ord(x)
373 return ''.join(['"'] + ["\\x%02x" % maybe_ord(x) for x in s] + ['"'])
376 class Parser_Context(object):
380 self.need_error_handler = False
381 def gen_name(self, prefix):
382 n = self.suffixes.get(prefix, 0) + 1
383 self.suffixes[prefix] = n
384 return '%s%d' % (prefix, n) if n > 1 else prefix
385 def save_pos(self, indent):
386 pos = self.gen_name('pos')
387 print("%sstruct spvbin_position %s = spvbin_position_save (input);" % (indent, pos))
389 def save_error(self, indent):
390 error = self.gen_name('save_n_errors')
391 print("%ssize_t %s = input->n_errors;" % (indent, error))
393 def parse_limit(self, endian, indent):
394 limit = self.gen_name('saved_limit')
396 %sstruct spvbin_limit %s;
397 %sif (!spvbin_limit_parse%s (&%s, input))
400 indent, '_be' if endian == 'big' else '', limit,
405 def print_parser_items(name, production, indent, accessor, ctx):
406 for item_idx in range(len(production)):
410 item = production[item_idx]
411 if item.type_ == 'constant':
412 print("""%sif (!spvbin_match_bytes (input, %s, %d))
414 indent, bytes_to_hex(item.content), len(item.content),
416 ctx.need_error_handler = True
418 print("%sp->%s = true;" % (indent, item.name))
419 elif item.type_ == 'variable':
420 if item.content[0] == 'nonterminal':
421 func = '%sparse_%s' % (prefix, name_to_id(item.content[1]))
423 func = 'spvbin_parse_%s' % item.content[0]
426 dst = "&p->%s%s" % (accessor, name_to_id(item.name))
430 print("""%sif (!%s (input, %s))
431 %s goto %s;""" % (indent, func, dst,
434 if item.content[0] != 'nonterminal' and item.name == 'version':
435 print("%sinput->version = p->%s%s;" % (
436 indent, accessor, name_to_id(item.name)))
438 if isinstance(item.n, int):
441 count = 'p->%s%s' % (accessor, name_to_id(item.n))
443 i_name = ctx.gen_name('i')
445 if not isinstance(item.n, int):
446 print("%sp->%s%s = xcalloc (%s, sizeof *p->%s%s);" % (
448 accessor, name_to_id(item.name), count,
449 accessor, name_to_id(item.name)))
450 dst += '[%s]' % i_name
451 print("%sfor (int %s = 0; %s < %s; %s++)" % (
452 indent, i_name, i_name, count, i_name))
453 print("""%s if (!%s (input, %s))
454 %s goto %s;""" % (indent, func, dst,
457 ctx.need_error_handler = True
458 elif item.type_ == '()':
460 # Not yet implemented
463 print_parser_items(name, item.content, indent, accessor, ctx)
464 elif item.type_ in ('v1', 'v3', 'vAF', 'vB0'):
466 # Not yet implemented
469 print("%sif (input->version == 0x%s) {" % (indent, item.type_[1:]))
470 print_parser_items(name, item.content, indent + ' ', accessor, ctx)
471 print("%s}" % indent)
472 elif item.type_ in ('count', 'becount'):
474 # Not yet implemented
477 pos = ctx.save_pos(indent)
478 endian = 'big' if item.type_ == 'becount' else 'little'
479 limit = ctx.parse_limit(endian, indent)
482 ctx.bail = ctx.gen_name('backtrack')
484 print("%sdo {" % indent)
487 and item.content[-1].type_ == 'variable'
488 and item.content[-1].content[0] == '...'):
489 content = item.content[:-1]
492 content = item.content
494 print_parser_items(name, content, indent, accessor, ctx)
497 print("%sinput->ofs = input->size;" % indent)
499 print("""%sif (!spvbin_input_at_end (input))
500 %s goto %s;""" % (indent,
502 print('%sspvbin_limit_pop (&%s, input);' % (indent, limit))
503 print('%sbreak;' % indent)
505 print('%s%s:' % (indent[4:], ctx.bail))
506 # In theory, we should emit code to clear whatever we're
507 # backtracking from. In practice, it's not important to
509 print("%sspvbin_position_restore (&%s, input);" % (indent, pos))
510 print('%sspvbin_limit_pop (&%s, input);' % (indent, limit))
511 print('%sgoto %s;' % (indent, save_bail))
513 print("%s} while (0);" % indent)
516 elif item.type_ == '|':
519 print("%sdo {" % indent)
521 pos = ctx.save_pos(indent)
522 error = ctx.save_error(indent)
524 for choice in item.content:
526 print("%sspvbin_position_restore (&%s, input);" % (indent, pos))
527 print("%sinput->n_errors = %s;" % (indent, error))
530 if i != len(item.content):
531 ctx.bail = ctx.gen_name('backtrack')
534 print_parser_items(name, choice, indent, accessor, ctx)
535 print("%sbreak;" % indent)
536 if i != len(item.content):
538 print('%s%s:' % (indent[4:], ctx.bail))
539 # In theory, we should emit code to clear whatever we're
540 # backtracking from. In practice, it's not important to
543 print("%s} while (0);" % indent)
544 elif item.type_ == 'case':
546 for choice_name, choice in sorted(item.content.items()):
547 if choice_name.endswith('else'):
548 print("%s} else {" % indent)
549 print("%s p->%s%s = -1;"
550 % (indent, accessor, item.name))
553 print("%s%sif (spvbin_match_byte (input, 0x%s)) {" % (
554 indent, '} else ' if i else '', choice_name[-2:]))
555 print("%s p->%s%s = 0x%s;" % (
556 indent, accessor, item.name, choice_name[-2:]))
560 print_parser_items(name, choice, indent + ' ',
561 accessor + choice_name + '.', ctx)
563 print("%s}" % indent)
569 def print_parser(name, production, indent):
572 %(prefix)sparse_%(name)s (struct spvbin_input *input, struct %(prefix)s%(name)s **p_)
575 struct %(prefix)s%(name)s *p = xzalloc (sizeof *p);
576 p->start = input->ofs;
577 ''' % {'prefix': prefix,
578 'name': name_to_id(name)})
580 ctx = Parser_Context()
581 print_parser_items(name, production, indent, '', ctx)
584 p->len = input->ofs - p->start;
588 if ctx.need_error_handler:
591 spvbin_error (input, "%s", p->start);
593 return false;""" % (name, prefix, name_to_id(name)))
597 def print_free_items(name, production, indent, accessor, ctx):
598 for item in production:
599 if item.type_ == 'constant':
601 elif item.type_ == 'variable':
605 if item.content[0] == 'nonterminal':
606 free_func = '%sfree_%s' % (prefix, name_to_id(item.content[1]))
607 elif item.content[0] in ('string', 'bestring', '...'):
612 dst = "p->%s%s" % (accessor, name_to_id(item.name))
616 print("%s%s (%s);" % (indent, free_func, dst))
618 if isinstance(item.n, int):
621 count = 'p->%s%s' % (accessor, name_to_id(item.n))
623 i_name = ctx.gen_name('i')
625 print("%sfor (int %s = 0; %s < %s; %s++)" % (
626 indent, i_name, i_name, count, i_name))
627 print("%s %s (%s[%s]);" % (
628 indent, free_func, dst, i_name))
629 if not isinstance(item.n, int):
630 print("%sfree (p->%s%s);" % (
631 indent, accessor, name_to_id(item.name)))
632 elif item.type_ in ('()', 'v1', 'v3', 'vAF', 'vB0',
635 # Not yet implemented
638 print_free_items(name, item.content, indent, accessor, ctx)
639 elif item.type_ == '|':
640 for choice in item.content:
641 print_free_items(name, choice, indent, accessor, ctx)
642 elif item.type_ == 'case':
644 for choice_name, choice in sorted(item.content.items()):
645 if choice_name.endswith('else'):
648 value_name = '0x%s' % choice_name[-2:]
650 print('%s%sif (p->%s%s == %s) {' % (
651 indent, '} else ' if i else '', accessor, item.name,
654 print_free_items(name, choice, indent + ' ',
655 accessor + choice_name + '.', ctx)
657 print("%s}" % indent)
662 def print_free(name, production, indent):
665 %(prefix)sfree_%(name)s (struct %(prefix)s%(name)s *p)
669 ''' % {'prefix': prefix,
670 'name': name_to_id(name)})
672 print_free_items(name, production, indent, '', Parser_Context())
677 def print_print_items(name, production, indent, accessor, ctx):
678 for item_idx in range(len(production)):
682 item = production[item_idx]
683 if item.type_ == 'constant':
685 print('%sspvbin_print_presence ("%s", indent + 1, p->%s);' % (
686 indent, item.name, item.name))
687 elif item.type_ == 'variable':
691 if item.content[0] == 'nonterminal':
692 func = '%sprint_%s' % (prefix, name_to_id(item.content[1]))
694 c_types = {'bool': 'bool',
703 'bestring': 'string',
706 '...': ('uint8_t', 1)}
707 func = 'spvbin_print_%s' % c_types[item.content[0]]
709 dst = "p->%s%s" % (accessor, name_to_id(item.name))
711 print('%s%s ("%s", indent + 1, %s);' % (indent, func,
714 if isinstance(item.n, int):
717 count = 'p->%s%s' % (accessor, name_to_id(item.n))
719 i_name = ctx.gen_name('i')
720 elem_name = ctx.gen_name('elem_name')
721 dst += '[%s]' % i_name
723 %(indent)sfor (int %(index)s = 0; %(index)s < %(count)s; %(index)s++) {
724 %(indent)s char *%(elem_name)s = xasprintf ("%(item.name)s[%%d]", %(index)s);
725 %(indent)s %(func)s (%(elem_name)s, indent + 1, %(dst)s);
726 %(indent)s free (%(elem_name)s);
727 %(indent)s}""" % {'indent': indent,
730 'elem_name' : elem_name,
731 'item.name': item.name,
734 elif item.type_ == '()':
736 # Not yet implemented
739 print_print_items(name, item.content, indent, accessor, ctx)
740 elif item.type_ in ('v1', 'v3', 'vAF', 'vB0'):
742 # Not yet implemented
745 print_print_items(name, item.content, indent, accessor, ctx)
746 elif item.type_ in ('count', 'becount'):
748 # Not yet implemented
753 and item.content[-1].type_ == 'variable'
754 and item.content[-1].content[0] == '...'):
755 content = item.content[:-1]
757 content = item.content
758 print_print_items(name, content, indent, accessor, ctx)
759 elif item.type_ == '|':
760 for choice in item.content:
761 print_print_items(name, choice, indent, accessor, ctx)
762 elif item.type_ == 'case':
765 %sspvbin_print_case ("%s", indent + 1, p->%s%s);""" % (
766 indent, item.name, accessor, name_to_id(item.name)))
767 for choice_name, choice in sorted(item.content.items()):
768 if choice_name.endswith('else'):
771 value_name = '0x%s' % choice_name[-2:]
773 print('%s%sif (p->%s%s == %s) {' % (
774 indent, '} else ' if i else '', accessor, item.name,
777 print_print_items(name, choice, indent + ' ',
778 accessor + choice_name + '.', ctx)
780 print("%s}" % indent)
786 def print_print(name, production, indent):
789 %(prefix)sprint_%(name)s (const char *title, int indent, const struct %(prefix)s%(name)s *p)
791 spvbin_print_header (title, p ? p->start : -1, p ? p->len : -1, indent);
797 ''' % {'prefix': prefix,
799 'name': name_to_id(name)})
801 ctx = Parser_Context()
802 print_print_items(name, production, indent, '', ctx)
807 return s[0].lower() + ''.join(['_%c' % x.lower() if x.isupper() else x
808 for x in s[1:]]).replace('-', '_')
811 if __name__ == "__main__":
814 options, args = getopt.gnu_getopt(sys.argv[1:], 'h', ['help'])
815 except getopt.GetoptError as e:
816 sys.stderr.write("%s: %s\n" % (argv0, e.msg))
819 for key, value in options:
820 if key in ['-h', '--help']:
826 sys.stderr.write("%s: bad usage (use --help for help)\n" % argv0)
830 file_name, output_type, prefix = args[:3]
831 input_file = open(file_name)
833 prefix = '%s_' % prefix
848 if token[0] == 'eof':
851 name, production = parse_production()
852 if name in productions:
853 fatal("%s: duplicate production" % name)
854 productions[name] = production
856 print('/* Generated automatically -- do not modify! -*- buffer-read-only: t -*- */')
857 if output_type == 'code' and len(args) == 4:
858 header_name = args[3]
865 #include "libpspp/str.h"
866 #include "gl/xalloc.h"\
868 for name, production in productions.items():
869 print_parser(name, production, ' ' * 4)
870 print_free(name, production, ' ' * 4)
871 print_print(name, production, ' ' * 4)
872 elif output_type == 'header' and len(args) == 3:
874 #ifndef %(PREFIX)sPARSER_H
875 #define %(PREFIX)sPARSER_H
880 #include "output/spv/spvbin-helpers.h"\
881 """ % {'PREFIX': prefix.upper()})
882 for name, production in productions.items():
883 print('\nstruct %s%s {' % (prefix, name_to_id(name)))
884 print(" size_t start, len;")
885 print_members(production, ' ' * 4)
887 bool %(prefix)sparse_%(name)s (struct spvbin_input *, struct %(prefix)s%(name)s **);
888 void %(prefix)sfree_%(name)s (struct %(prefix)s%(name)s *);
889 void %(prefix)sprint_%(name)s (const char *title, int indent, const struct %(prefix)s%(name)s *);\
890 ''' % {'prefix': prefix,
891 'name': name_to_id(name)})
894 #endif /* %(PREFIX)sPARSER_H */""" % {'PREFIX': prefix.upper()})
896 sys.stderr.write("%s: bad usage (use --help for help)" % argv0)