X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Foutput%2Fspv%2Fbinary-parser-generator;fp=src%2Foutput%2Fspv%2Fbinary-parser-generator;h=dd2f56ce8184a033adc98b555a51d2dcb74cf289;hb=bcaaee5f0bd21f443c8dcb5f67114e63d43673af;hp=0000000000000000000000000000000000000000;hpb=1abd7f599dd0d773add0a98fa3b612bc15aaf422;p=pspp diff --git a/src/output/spv/binary-parser-generator b/src/output/spv/binary-parser-generator new file mode 100644 index 0000000000..dd2f56ce81 --- /dev/null +++ b/src/output/spv/binary-parser-generator @@ -0,0 +1,861 @@ +#! /usr/bin/python + +import getopt +import os +import struct +import sys + +n_errors = 0 + +def error(msg): + global n_errors + sys.stderr.write("%s:%d: %s\n" % (file_name, line_number, msg)) + n_errors += 1 + + +def fatal(msg): + error(msg) + sys.exit(1) + + +def get_line(): + global line + global line_number + line = input_file.readline() + line_number += 1 + + +def is_num(s): + return s.isdigit() or (s[0] == '-' and s[1].isdigit()) + + +xdigits = "0123456789abcdefABCDEF" +def is_xdigits(s): + for c in s: + if c not in xdigits: + return False + return True + + +def expect(type): + if token[0] != type: + fatal("syntax error expecting %s" % type) + + +def match(type): + if token[0] == type: + get_token() + return True + else: + return False + + +def must_match(type): + expect(type) + get_token() + + +def get_token(): + global token + global line + prev = token + if line == "": + if token == ('eof', ): + fatal("unexpected end of input") + get_line() + if not line: + token = ('eof', ) + return + elif line == '\n': + token = (';', ) + return + elif not line[0].isspace(): + token = (';', ) + return + + line = line.lstrip() + if line == "": + get_token() + elif line[0] == '#': + line = '' + get_token() + elif line[0] in '[]()?|*': + token = (line[0], ) + line = line[1:] + elif line.startswith('=>'): + token = (line[:2], ) + line = line[2:] + elif line.startswith('...'): + token = (line[:3], ) + line = line[3:] + elif line[0].isalnum() or line[0] == '-': + n = 1 + while n < len(line) and (line[n].isalnum() or line[n] == '-'): + n += 1 + s = line[:n] + line = line[n:] + + if prev[0] == '*' and is_num(s): + token = ('number', int(s, 10)) + elif len(s) == 2 and is_xdigits(s): + token = ('bytes', struct.pack('B', int(s, 16))) + elif s[0] == 'i' and is_num(s[1:]): + token = ('bytes', struct.pack('i', int(s[2:]))) + elif s[0].isupper(): + token = ('nonterminal', s) + elif s in ('bool', 'int16', 'int32', 'int64', 'be16', 'be32', 'be64', + 'string', 'bestring', 'byte', 'float', 'double', + 'count', 'becount', 'v1', 'v3', 'vAF', 'vB0', + 'case', 'else'): + token = (s, ) + else: + token = ('id', s) + else: + fatal("unknown character %c" % line[0]) + + +def usage(): + argv0 = os.path.basename(sys.argv[0]) + print('''\ +%(argv0)s, parser generator for SPV binary members +usage: %(argv0)s GRAMMAR header + %(argv0)s GRAMMAR code HEADER_NAME + where GRAMMAR contains grammar definitions\ +''' % {"argv0": argv0}) + sys.exit(0) + + +class Item(object): + def __init__(self, type_, name, n, content): + self.type_ = type_ + self.name = name + self.n = n + self.content = content + def __repr__(self): + if self.type_ == 'constant': + return ' '.join(['%02x' % ord(x) for x in self.content]) + elif self.content: + return "%s(%s)" % (self.type_, self.content) + else: + return self.type_ + +def parse_item(): + t = token + name = None + if t[0] == 'bytes': + type_ = 'constant' + content = t[1] + get_token() + elif t[0] in ('bool', 'byte', + 'int16', 'int32', 'int64', + 'be16', 'be32', 'be64', + 'string', 'bestring', + 'float', 'double', + 'nonterminal', '...'): + type_ = 'variable' + content = t + get_token() + if t[0] == 'nonterminal': + name = name_to_id(content[1]) + elif t[0] in ('v1', 'v3', 'vAF', 'vB0', 'count', 'becount'): + type_ = t[0] + get_token() + must_match('(') + content = parse_choice() + must_match(')') + elif match('case'): + return parse_case() + elif match('('): + type_ = '()' + content = parse_choice() + must_match(')') + else: + print token + fatal('syntax error expecting item') + + n = 1 + optional = False + if match('*'): + if token[0] == 'number': + n = token[1] + get_token() + elif match('['): + expect('id') + n = token[1] + get_token() + must_match(']') + if n.startswith('n-'): + name = n[2:] + else: + fatal('expecting quantity') + elif match('?'): + optional = True + + if match('['): + expect('id') + if type_ == 'constant' and not optional: + fatal("%s: cannot name a constant" % token[1]) + + name = token[1] + get_token() + must_match(']') + + if type_ == 'constant': + content *= n + n = 1 + + item = Item(type_, name, n, content) + if optional: + item = Item('|', None, 1, [[item], []]) + return item + + +def parse_concatenation(): + items = [] + while token[0] not in (')', ';', '|', 'eof'): + item = parse_item() + if (item.type_ == 'constant' + and items + and items[-1].type_ == 'constant'): + items[-1].content += item.content + else: + items.append(item) + return items + + +def parse_choice(): + sub = parse_concatenation() + if token[0] != '|': + return sub + + choices = [sub] + while match('|'): + choices.append(parse_concatenation()) + + return [Item('|', None, 1, choices)] + + +def parse_case(): + must_match('(') + choices = {} + while True: + choice = None + if match('else'): + choice = 'else' + + items = parse_concatenation() + if choice is None: + if (not items + or items[0].type_ != 'constant' + or len(items[0].content) != 1): + fatal("choice must begin with xx (or 'else')") + choice = '%02x' % ord(items[0].content) + + if choice in choices: + fatal("duplicate choice %s" % choice) + choices[choice] = items + + if match(')'): + break + must_match('|') + + case_name = None + if match('['): + expect('id') + case_name = token[1] + get_token() + must_match(']') + + return Item('case', case_name, 1, + { '%s_%s' % (case_name, k) : v for k, v in choices.items() }) + + +def parse_production(): + expect('nonterminal') + name = token[1] + get_token() + must_match('=>') + return name, parse_choice() + + +def print_members(p, indent): + for item in p: + if item.type_ == 'variable' and item.name: + if item.content[0] == 'nonterminal': + typename = 'struct %s%s' % (prefix, + name_to_id(item.content[1])) + n_stars = 1 + else: + c_types = {'bool': ('bool', 0), + 'byte': ('uint8_t', 0), + 'int16': ('uint16_t', 0), + 'int32': ('uint32_t', 0), + 'int64': ('uint64_t', 0), + 'be16': ('uint16_t', 0), + 'be32': ('uint32_t', 0), + 'be64': ('uint64_t', 0), + 'string': ('char', 1), + 'bestring': ('char', 1), + 'float': ('double', 0), + 'double': ('double', 0), + '...': ('uint8_t', 1)} + typename, n_stars = c_types[item.content[0]] + + array_suffix = '' + if item.n: + if isinstance(item.n, int): + if item.n > 1: + array_suffix = '[%d]' % item.n + else: + n_stars += 1 + + print "%s%s %s%s%s;" % (indent, typename, '*' * n_stars, + name_to_id(item.name), + array_suffix) + elif item.type_ in ('v1', 'v3', 'vAF', 'vB0', + 'count', 'becount', '()'): + print_members(item.content, indent) + elif item.type_ == '|': + for choice in item.content: + print_members(choice, indent) + elif item.type_ == 'case': + print "%sint %s;" % (indent, item.name) + print "%sunion {" % indent + for name, choice in sorted(item.content.items()): + print "%s struct {" % indent + print_members(choice, indent + ' ' * 8) + print "%s } %s;" % (indent, name) + print "%s};" % indent + elif item.type_ == 'constant': + if item.name: + print "%sbool %s;" % (indent, item.name) + elif item.type_ not in ("constant", "variable"): + fatal("unhandled type %s" % item.type_) + + +def bytes_to_hex(s): + return ''.join(['"'] + ["\\x%02x" % ord(x) for x in s] + ['"']) + + +class Parser_Context(object): + def __init__(self): + self.suffixes = {} + self.bail = 'error' + self.need_error_handler = False + def gen_name(self, prefix): + n = self.suffixes.get(prefix, 0) + 1 + self.suffixes[prefix] = n + return '%s%d' % (prefix, n) if n > 1 else prefix + def save_pos(self, indent): + pos = self.gen_name('pos') + print "%sstruct spvbin_position %s = spvbin_position_save (input);" % (indent, pos) + return pos + def save_error(self, indent): + error = self.gen_name('save_n_errors') + print "%ssize_t %s = input->n_errors;" % (indent, error) + return error + def parse_limit(self, endian, indent): + limit = self.gen_name('saved_limit') + print """\ +%sstruct spvbin_limit %s; +%sif (!spvbin_limit_parse%s (&%s, input)) +%s goto %s;""" % ( + indent, limit, + indent, '_be' if endian == 'big' else '', limit, + indent, self.bail) + return limit + + +def print_parser_items(name, production, indent, accessor, ctx): + for item_idx in range(len(production)): + if item_idx > 0: + print + + item = production[item_idx] + if item.type_ == 'constant': + print """%sif (!spvbin_match_bytes (input, %s, %d)) +%s goto %s;""" % ( + indent, bytes_to_hex(item.content), len(item.content), + indent, ctx.bail) + ctx.need_error_handler = True + if item.name: + print "%sp->%s = true;" % (indent, item.name) + elif item.type_ == 'variable': + if item.content[0] == 'nonterminal': + func = '%sparse_%s' % (prefix, name_to_id(item.content[1])) + else: + func = 'spvbin_parse_%s' % item.content[0] + + if item.name: + dst = "&p->%s%s" % (accessor, name_to_id(item.name)) + else: + dst = "NULL" + if item.n == 1: + print """%sif (!%s (input, %s)) +%s goto %s;""" % (indent, func, dst, + indent, ctx.bail) + + if item.content[0] != 'nonterminal' and item.name == 'version': + print "%sinput->version = p->%s%s;" % ( + indent, accessor, name_to_id(item.name)) + else: + if isinstance(item.n, int): + count = item.n + else: + count = 'p->%s%s' % (accessor, name_to_id(item.n)) + + i_name = ctx.gen_name('i') + if item.name: + if not isinstance(item.n, int): + print "%sp->%s%s = xcalloc (%s, sizeof *p->%s%s);" % ( + indent, + accessor, name_to_id(item.name), count, + accessor, name_to_id(item.name)) + dst += '[%s]' % i_name + print "%sfor (int %s = 0; %s < %s; %s++)" % ( + indent, i_name, i_name, count, i_name) + print """%s if (!%s (input, %s)) +%s goto %s;""" % (indent, func, dst, + indent, ctx.bail) + + ctx.need_error_handler = True + elif item.type_ == '()': + if item.n != 1: + # Not yet implemented + raise AssertionError + + print_parser_items(name, item.content, indent, accessor, ctx) + elif item.type_ in ('v1', 'v3', 'vAF', 'vB0'): + if item.n != 1: + # Not yet implemented + raise AssertionError + + print "%sif (input->version == 0x%s) {" % (indent, item.type_[1:]) + print_parser_items(name, item.content, indent + ' ', accessor, ctx) + print "%s}" % indent + elif item.type_ in ('count', 'becount'): + if item.n != 1: + # Not yet implemented + raise AssertionError + + pos = ctx.save_pos(indent) + endian = 'big' if item.type_ == 'becount' else 'little' + limit = ctx.parse_limit(endian, indent) + + save_bail = ctx.bail + ctx.bail = ctx.gen_name('backtrack') + + print "%sdo {" % indent + indent += ' ' + if (item.content + and item.content[-1].type_ == 'variable' + and item.content[-1].content[0] == '...'): + content = item.content[:-1] + ellipsis = True + else: + content = item.content + ellipsis = False + print_parser_items(name, content, indent, accessor, ctx) + + if ellipsis: + print "%sinput->ofs = input->size;" % indent + else: + print """%sif (!spvbin_input_at_end (input)) +%s goto %s;""" % (indent, + indent, ctx.bail) + print '%sspvbin_limit_pop (&%s, input);' % (indent, limit) + print '%sbreak;' % indent + print + print '%s%s:' % (indent[4:], ctx.bail) + # In theory, we should emit code to clear whatever we're + # backtracking from. In practice, it's not important to + # do that. + print "%sspvbin_position_restore (&%s, input);" % (indent, pos) + print '%sspvbin_limit_pop (&%s, input);' % (indent, limit) + print '%sgoto %s;' % (indent, save_bail) + indent = indent[4:] + print "%s} while (0);" % indent + + ctx.bail = save_bail + elif item.type_ == '|': + save_bail = ctx.bail + + print "%sdo {" % indent + indent += ' ' + pos = ctx.save_pos(indent) + error = ctx.save_error(indent) + i = 0 + for choice in item.content: + if i: + print "%sspvbin_position_restore (&%s, input);" % (indent, pos) + print "%sinput->n_errors = %s;" % (indent, error) + i += 1 + + if i != len(item.content): + ctx.bail = ctx.gen_name('backtrack') + else: + ctx.bail = save_bail + print_parser_items(name, choice, indent, accessor, ctx) + print "%sbreak;" % indent + if i != len(item.content): + print + print '%s%s:' % (indent[4:], ctx.bail) + # In theory, we should emit code to clear whatever we're + # backtracking from. In practice, it's not important to + # do that. + indent = indent[4:] + print "%s} while (0);" % indent + elif item.type_ == 'case': + i = 0 + for choice_name, choice in sorted(item.content.items()): + if choice_name.endswith('else'): + print "%s} else {" % indent + print "%s p->%s%s = -1;" % (indent, accessor, item.name) + print + else: + print "%s%sif (spvbin_match_byte (input, 0x%s)) {" % ( + indent, '} else ' if i else '', choice_name[-2:]) + print "%s p->%s%s = 0x%s;" % ( + indent, accessor, item.name, choice_name[-2:]) + print + choice = choice[1:] + + print_parser_items(name, choice, indent + ' ', + accessor + choice_name + '.', ctx) + i += 1 + print "%s}" % indent + else: + # Not implemented + raise AssertionError + + +def print_parser(name, production, indent): + print ''' +bool +%(prefix)sparse_%(name)s (struct spvbin_input *input, struct %(prefix)s%(name)s **p_) +{ + *p_ = NULL; + struct %(prefix)s%(name)s *p = xzalloc (sizeof *p); + p->start = input->ofs; +''' % {'prefix': prefix, + 'name': name_to_id(name)} + + ctx = Parser_Context() + print_parser_items(name, production, indent, '', ctx) + + print ''' + p->len = input->ofs - p->start; + *p_ = p; + return true;''' + + if ctx.need_error_handler: + print """ +error: + spvbin_error (input, "%s", p->start); + %sfree_%s (p); + return false;""" % (name, prefix, name_to_id(name)) + + print "}" + +def print_free_items(name, production, indent, accessor, ctx): + for item in production: + if item.type_ == 'constant': + pass + elif item.type_ == 'variable': + if not item.name: + continue + + if item.content[0] == 'nonterminal': + free_func = '%sfree_%s' % (prefix, name_to_id(item.content[1])) + elif item.content[0] in ('string', 'bestring', '...'): + free_func = 'free' + else: + free_func = None + + dst = "p->%s%s" % (accessor, name_to_id(item.name)) + + if item.n == 1: + if free_func: + print "%s%s (%s);" % (indent, free_func, dst) + else: + if isinstance(item.n, int): + count = item.n + else: + count = 'p->%s%s' % (accessor, name_to_id(item.n)) + + i_name = ctx.gen_name('i') + if free_func: + print "%sfor (int %s = 0; %s < %s; %s++)" % ( + indent, i_name, i_name, count, i_name) + print "%s %s (%s[%s]);" % ( + indent, free_func, dst, i_name) + if not isinstance(item.n, int): + print "%sfree (p->%s%s);" % ( + indent, accessor, name_to_id(item.name)) + elif item.type_ in ('()', 'v1', 'v3', 'vAF', 'vB0', + 'count', 'becount'): + if item.n != 1: + # Not yet implemented + raise AssertionError + + print_free_items(name, item.content, indent, accessor, ctx) + elif item.type_ == '|': + for choice in item.content: + print_free_items(name, choice, indent, accessor, ctx) + elif item.type_ == 'case': + i = 0 + for choice_name, choice in sorted(item.content.items()): + if choice_name.endswith('else'): + value_name = '-1' + else: + value_name = '0x%s' % choice_name[-2:] + + print '%s%sif (p->%s%s == %s) {' % ( + indent, '} else ' if i else '', accessor, item.name, + value_name) + + print_free_items(name, choice, indent + ' ', + accessor + choice_name + '.', ctx) + i += 1 + print "%s}" % indent + else: + # Not implemented + raise AssertionError + +def print_free(name, production, indent): + print ''' +void +%(prefix)sfree_%(name)s (struct %(prefix)s%(name)s *p) +{ + if (p == NULL) + return; +''' % {'prefix': prefix, + 'name': name_to_id(name)} + + print_free_items(name, production, indent, '', Parser_Context()) + + print " free (p);" + print "}" + +def print_print_items(name, production, indent, accessor, ctx): + for item_idx in range(len(production)): + if item_idx > 0: + print + + item = production[item_idx] + if item.type_ == 'constant': + if item.name: + print '%sspvbin_print_presence ("%s", indent + 1, p->%s);' % ( + indent, item.name, item.name) + elif item.type_ == 'variable': + if not item.name: + continue + + if item.content[0] == 'nonterminal': + func = '%sprint_%s' % (prefix, name_to_id(item.content[1])) + else: + c_types = {'bool': 'bool', + 'byte': 'byte', + 'int16': 'int16', + 'int32': 'int32', + 'int64': 'int64', + 'be16': 'int16', + 'be32': 'int32', + 'be64': 'int64', + 'string': 'string', + 'bestring': 'string', + 'float': 'double', + 'double': 'double', + '...': ('uint8_t', 1)} + func = 'spvbin_print_%s' % c_types[item.content[0]] + + dst = "p->%s%s" % (accessor, name_to_id(item.name)) + if item.n == 1: + print '%s%s ("%s", indent + 1, %s);' % (indent, func, + item.name, dst) + else: + if isinstance(item.n, int): + count = item.n + else: + count = 'p->%s%s' % (accessor, name_to_id(item.n)) + + i_name = ctx.gen_name('i') + elem_name = ctx.gen_name('elem_name') + dst += '[%s]' % i_name + print """\ +%(indent)sfor (int %(index)s = 0; %(index)s < %(count)s; %(index)s++) { +%(indent)s char *%(elem_name)s = xasprintf ("%(item.name)s[%%d]", %(index)s); +%(indent)s %(func)s (%(elem_name)s, indent + 1, %(dst)s); +%(indent)s free (%(elem_name)s); +%(indent)s}""" % {'indent': indent, + 'index': i_name, + 'count': count, + 'elem_name' : elem_name, + 'item.name': item.name, + 'func': func, + 'dst': dst} + elif item.type_ == '()': + if item.n != 1: + # Not yet implemented + raise AssertionError + + print_print_items(name, item.content, indent, accessor, ctx) + elif item.type_ in ('v1', 'v3', 'vAF', 'vB0'): + if item.n != 1: + # Not yet implemented + raise AssertionError + + print_print_items(name, item.content, indent, accessor, ctx) + elif item.type_ in ('count', 'becount'): + if item.n != 1: + # Not yet implemented + raise AssertionError + + indent += ' ' + if (item.content + and item.content[-1].type_ == 'variable' + and item.content[-1].content[0] == '...'): + content = item.content[:-1] + else: + content = item.content + print_print_items(name, content, indent, accessor, ctx) + elif item.type_ == '|': + for choice in item.content: + print_print_items(name, choice, indent, accessor, ctx) + elif item.type_ == 'case': + i = 0 + print """\ +%sspvbin_print_case ("%s", indent + 1, p->%s%s);""" % ( + indent, item.name, accessor, name_to_id(item.name)) + for choice_name, choice in sorted(item.content.items()): + if choice_name.endswith('else'): + value_name = '-1' + else: + value_name = '0x%s' % choice_name[-2:] + + print '%s%sif (p->%s%s == %s) {' % ( + indent, '} else ' if i else '', accessor, item.name, + value_name) + + print_print_items(name, choice, indent + ' ', + accessor + choice_name + '.', ctx) + i += 1 + print "%s}" % indent + else: + # Not implemented + raise AssertionError + + +def print_print(name, production, indent): + print ''' +void +%(prefix)sprint_%(name)s (const char *title, int indent, const struct %(prefix)s%(name)s *p) +{ + spvbin_print_header (title, p ? p->start : -1, p ? p->len : -1, indent); + if (p == NULL) { + printf ("none\\n"); + return; + } + putchar ('\\n'); +''' % {'prefix': prefix, + 'rawname': name, + 'name': name_to_id(name)} + + ctx = Parser_Context() + print_print_items(name, production, indent, '', ctx) + + print "}" + +def name_to_id(s): + return s[0].lower() + ''.join(['_%c' % x.lower() if x.isupper() else x + for x in s[1:]]).replace('-', '_') + + +if __name__ == "__main__": + argv0 = sys.argv[0] + try: + options, args = getopt.gnu_getopt(sys.argv[1:], 'h', ['help']) + except getopt.GetoptError as e: + sys.stderr.write("%s: %s\n" % (argv0, e.msg)) + sys.exit(1) + + for key, value in options: + if key in ['-h', '--help']: + usage() + else: + sys.exit(0) + + if len(args) < 3: + sys.stderr.write("%s: bad usage (use --help for help)\n" % argv0) + sys.exit(1) + + global file_name + file_name, output_type, prefix = args[:3] + input_file = open(file_name) + + prefix = '%s_' % prefix + + global line + global line_number + line = "" + line_number = 0 + + productions = {} + + global token + token = ('start', ) + get_token() + while True: + while match(';'): + pass + if token[0] == 'eof': + break + + name, production = parse_production() + if name in productions: + fatal("%s: duplicate production" % name) + productions[name] = production + + print '/* Generated automatically -- do not modify! -*- buffer-read-only: t -*- */' + if output_type == 'code' and len(args) == 4: + header_name = args[3] + + print """\ +#include +#include %s +#include +#include +#include "libpspp/str.h" +#include "gl/xalloc.h"\ +""" % header_name + for name, production in productions.items(): + print_parser(name, production, ' ' * 4) + print_free(name, production, ' ' * 4) + print_print(name, production, ' ' * 4) + elif output_type == 'header' and len(args) == 3: + print """\ +#ifndef %(PREFIX)sPARSER_H +#define %(PREFIX)sPARSER_H + +#include +#include +#include +#include "output/spv/spvbin-helpers.h"\ +""" % {'PREFIX': prefix.upper()} + for name, production in productions.items(): + print '\nstruct %s%s {' % (prefix, name_to_id(name)) + print " size_t start, len;" + print_members(production, ' ' * 4) + print '''}; +bool %(prefix)sparse_%(name)s (struct spvbin_input *, struct %(prefix)s%(name)s **); +void %(prefix)sfree_%(name)s (struct %(prefix)s%(name)s *); +void %(prefix)sprint_%(name)s (const char *title, int indent, const struct %(prefix)s%(name)s *);\ +''' % {'prefix': prefix, + 'name': name_to_id(name)} + print """\ + +#endif /* %(PREFIX)sPARSER_H */""" % {'PREFIX': prefix.upper()} + else: + sys.stderr.write("%s: bad usage (use --help for help)" % argv0)