From 5604311ca7d0e6970c5449f225dd00c5804d91ef Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 13 Dec 2021 21:06:23 -0800 Subject: [PATCH] Rewrite code generator for expressions in Python. --- src/language/expressions/generate.pl | 2 +- src/language/expressions/generate.py | 978 +++++++++++++++++++++++++++ 2 files changed, 979 insertions(+), 1 deletion(-) create mode 100644 src/language/expressions/generate.py diff --git a/src/language/expressions/generate.pl b/src/language/expressions/generate.pl index 188d4eb11c..9355d7d374 100644 --- a/src/language/expressions/generate.pl +++ b/src/language/expressions/generate.pl @@ -792,7 +792,7 @@ sub generate_optimize_inc { push (@decls, "${ctype}*arg_$name = " . "get_$type->{ATOM}_args " - . " (node, $arg_idx, arg_$idx, e)"); + . "(node, $arg_idx, arg_$idx, e)"); } $arg_idx++; } diff --git a/src/language/expressions/generate.py b/src/language/expressions/generate.py new file mode 100644 index 0000000000..ef7e2ab7e7 --- /dev/null +++ b/src/language/expressions/generate.py @@ -0,0 +1,978 @@ +#! /usr/bin/python3 +# PSPP - a program for statistical analysis. +# Copyright (C) 2017, 2021 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +import getopt +import re +import sys + +argv0 = sys.argv[0] + +def init_all_types(): + """ + Defines all our types. + + Initializes 'types' global. +""" + + global types + types = {} + + # Common user-visible types used throughout evaluation trees. + init_type('number', 'any', C_TYPE='double', + ATOM='number', MANGLE='n', HUMAN_NAME='number', + STACK='ns', MISSING_VALUE='SYSMIS') + init_type('string', 'any', C_TYPE='struct substring', + ATOM='string', MANGLE='s', HUMAN_NAME='string', + STACK='ss', MISSING_VALUE='empty_string') + init_type('boolean', 'any', C_TYPE='double', + ATOM='number', MANGLE='n', HUMAN_NAME='boolean', + STACK='ns', MISSING_VALUE='SYSMIS') + + # Format types. + init_type('format', 'atom') + init_type('ni_format', 'leaf', C_TYPE='const struct fmt_spec *', + ATOM='format', MANGLE='f', + HUMAN_NAME='num_input_format') + init_type('no_format', 'leaf', C_TYPE='const struct fmt_spec *', + ATOM='format', MANGLE='f', + HUMAN_NAME='num_output_format') + + # Integer types. + init_type('integer', 'leaf', C_TYPE='int', + ATOM='integer', MANGLE='n', HUMAN_NAME='integer') + init_type('pos_int', 'leaf', C_TYPE='int', + ATOM='integer', MANGLE='n', + HUMAN_NAME='positive_integer_constant') + + # Variable names. + init_type('variable', 'atom') + init_type('num_var', 'leaf', C_TYPE='const struct variable *', + ATOM='variable', MANGLE='Vn', + HUMAN_NAME='num_variable') + init_type('str_var', 'leaf', C_TYPE='const struct variable *', + ATOM='variable', MANGLE='Vs', + HUMAN_NAME='string_variable') + init_type('var', 'leaf', C_TYPE='const struct variable *', + ATOM='variable', MANGLE='V', + HUMAN_NAME='variable') + + # Vectors. + init_type('vector', 'leaf', C_TYPE='const struct vector *', + ATOM='vector', MANGLE='v', HUMAN_NAME='vector') + + # Fixed types. + init_type('expression', 'fixed', C_TYPE='struct expression *', + FIXED_VALUE='e') + init_type('case', 'fixed', C_TYPE='const struct ccase *', + FIXED_VALUE='c') + init_type('case_idx', 'fixed', C_TYPE='size_t', + FIXED_VALUE='case_idx') + init_type('dataset', 'fixed', C_TYPE='struct dataset *', + FIXED_VALUE='ds') + + # One of these is emitted at the end of each expression as a sentinel + # that tells expr_evaluate() to return the value on the stack. + init_type('return_number', 'atom') + init_type('return_string', 'atom') + + # Used only for debugging purposes. + init_type('operation', 'atom') + +def init_type(name, role, **rest): + """ + init_type has 2 required arguments: + + NAME: Type name. + + 'name' is the type's name in operations.def. + + `OP_$name' is the terminal's type in operations.h. + + `expr_allocate_$name()' allocates a node of the given type. + + ROLE: How the type may be used: + + "any": Usable as operands and function arguments, and + function and operator results. + + "leaf": Usable as operands and function arguments, but + not function arguments or results. (Thus, they appear + only in leaf nodes in the parse type.) + + "fixed": Not allowed either as an operand or argument + type or a result type. Used only as auxiliary data. + + "atom": Not allowed anywhere; just adds the name to + the list of atoms. + + All types except those with "atom" as their role also require: + + C_TYPE: The C type that represents this abstract type. + + Types with "any" or "leaf" role require: + + ATOM: + + `$atom' is the `struct operation_data' member name. + + get_$atom_name() obtains the corresponding data from a + node. + + MANGLE: Short string for name mangling. Use identical strings + if two types should not be overloaded. + + HUMAN_NAME: Name for a type when we describe it to the user. + + Types with role "any" require: + + STACK: Name of the local variable in expr_evaluate(), used for + maintaining the stack for this type. + + MISSING_VALUE: Expression used for the missing value of this + type. + + Types with role "fixed" require: + + FIXED_VALUE: Expression used for the value of this type. +""" + global types + new_type = { 'NAME': name, 'ROLE': role } | rest + + need_keys = ['NAME', 'ROLE'] + if role == 'any': + need_keys += ['C_TYPE', 'ATOM', 'MANGLE', 'HUMAN_NAME', 'STACK', 'MISSING_VALUE'] + elif role == 'leaf': + need_keys += ['C_TYPE', 'ATOM', 'MANGLE', 'HUMAN_NAME'] + elif role == 'fixed': + need_keys += ['C_TYPE', 'FIXED_VALUE'] + elif role == 'atom': + pass + else: + sys.stderr.write("no role '%s'\n" % role) + sys.exit(1) + + for key in new_type.keys(): + if not key in new_type: + sys.stderr.write("%s lacks %s\n" % (name, key)) + sys.exit(1) + for key in need_keys: + if not key in need_keys: + sys.stderr.write("%s has superfluous key %s\n" % (name, key)) + sys.exit(1) + + types[name] = new_type + +# c_type(type). +# +def c_type(type_): + """Returns the C type of the given type as a string designed to be + prepended to a variable name to produce a declaration. (That + won't work in general but it works well enough for our types.) + """ + c_type = type_["C_TYPE"] + if not c_type.endswith('*'): + c_type += ' ' + return c_type + +# Input parsing. + +def parse_input(): + """Parses the entire input. + + Initializes ops, funcs, opers.""" + + global token + global toktype + global line_number + token = None + toktype = None + line_number = 0 + get_line() + get_token() + + global ops + global funcs + global opers + global order + ops = {} + funcs = [] + opers = [] + + while toktype != 'eof': + op = { + 'OPTIMIZABLE': True, + 'UNIMPLEMENTED': False, + 'EXTENSION': False, + 'PERM_ONLY': False, + 'ABSORB_MISS': False, + 'NO_ABBREV': False, + } + while True: + if match('extension'): + op['EXTENSION'] = True + elif match('no_opt'): + op['OPTIMIZABLE'] = False + elif match('absorb_miss'): + op['ABSORB_MISS'] = True + elif match('perm_only'): + op['PERM_ONLY'] = True + elif match('no_abbrev'): + op['NO_ABBREV'] = True + else: + break + + return_type = parse_type() + if return_type is None: + return_type = types['number'] + if return_type['NAME'] not in ['number', 'string', 'boolean']: + sys.stderr.write('%s is not a valid return type\n' % return_type['NAME']) + sys.exit(1) + op['RETURNS'] = return_type + + op['CATEGORY'] = token + if op['CATEGORY'] not in ['operator', 'function']: + sys.stderr.write("'operator' or 'function' expected at '%s'" % token) + sys.exit(1) + get_token() + + name = force('id') + if op['CATEGORY'] == 'function' and '_' in name: + sys.stderr.write("function name '%s' may not contain underscore\n" % name) + sys.exit(1) + elif op['CATEGORY'] == 'operator' and '.' in name: + sys.stderr.write("operator name '%s' may not contain period\n" % name) + sys.exit(1) + + m = re.match(r'(.*)\.(\d+)$', name) + if m: + prefix, suffix = m.groups() + name = prefix + op['MIN_VALID'] = int(suffix) + op['ABSORB_MISS'] = True + else: + op['MIN_VALID'] = 0 + op['NAME'] = name + + force_match('(') + op['ARGS'] = [] + while not match(')'): + arg = parse_arg() + op['ARGS'] += [arg] + if 'IDX' in arg: + if match(')'): + break + sys.stderr.write('array must be last argument\n') + sys.exit(1) + if not match(','): + force_match(')') + break + + for arg in op['ARGS']: + if 'CONDITION' in arg: + any_arg = '|'.join([a['NAME'] for a in op['ARGS']]) + arg['CONDITION'] = re.sub(r'\b(%s)\b' % any_arg, r'arg_\1', + arg['CONDITION']) + + opname = 'OP_' + op['NAME'] + opname = opname.replace('.', '_') + if op['CATEGORY'] == 'function': + print(op) + mangle = ''.join([a['TYPE']['MANGLE'] for a in op['ARGS']]) + op['MANGLE'] = mangle + opname += '_' + mangle + op['OPNAME'] = opname + + if op['MIN_VALID'] > 0: + aa = array_arg(op) + if aa is None: + sys.stderr.write("can't have minimum valid count without array arg\n") + sys.exit(1) + if aa['TYPE']['NAME'] != 'number': + sys.stderr.write('minimum valid count allowed only with double array\n') + sys.exit(1) + if aa['TIMES'] != 1: + sys.stderr.write("can't have minimu valid count if array has multiplication factor\n") + sys.exit(1) + + op['AUX'] = [] + while toktype == 'id': + type_ = parse_type() + if type_ is None: + sys.stderr.write('parse error\n') + sys.exit(1) + if type_['ROLE'] not in ['leaf', 'fixed']: + sys.stderr.write("'%s' is not allowed as auxiliary data\n" + % type_['NAME']) + sys.exit(1) + name = force('id') + op['AUX'] += [{'TYPE': type_, 'NAME': name}] + force_match(';') + + if op['OPTIMIZABLE']: + if op['NAME'].startswith('RV.'): + sys.stderr.write("random variate functions must be marked 'no_opt'\n") + sys.exit(1) + for key in ['CASE', 'CASE_IDX']: + if key in op['AUX']: + sys.stderr.write("operators with %s aux data must be marked 'no_opt'\n" % key) + sys.exit(1) + + if op['RETURNS']['NAME'] == 'string' and not op['ABSORB_MISS']: + for arg in op['ARGS']: + if arg['TYPE']['NAME'] in ['number', 'boolean']: + sys.stderr.write("'%s' returns string and has double or bool " + "argument, but is not marked ABSORB_MISS\n" + % op['NAME']) + sys.exit(1) + if 'CONDITION' in arg: + sys.stderr.write("'%s' returns string but has argument with condition\n") + sys.exit(1) + + if toktype == 'block': + op['BLOCK'] = force('block') + elif toktype == 'expression': + if token == 'unimplemented': + op['UNIMPLEMENTED'] = True + else: + op['EXPRESSION'] = token + get_token() + else: + sys.stderr.write("block or expression expected\n") + sys.exit(1) + + if opname in ops: + sys.stderr.write("duplicate operation name %s\n" % opname) + sys.exit(1) + ops[opname] = op + if op['CATEGORY'] == 'function': + funcs += [opname] + else: + opers += [opname] + in_file.close() + + funcs = sorted(funcs, key=lambda name: (ops[name]['NAME'], ops[name]['OPNAME'])) + opers = sorted(opers, key=lambda name: ops[name]['NAME']) + order = funcs + opers + +def get_token(): + """Reads the next token into 'token' and 'toktype'.""" + + global line + global token + global toktype + + lookahead() + if toktype == 'eof': + return + + print('%s %s' % (line, toktype)) + m = re.match(r'([a-zA-Z_][a-zA-Z_.0-9]*)(.*)$', line) + if m: + token, line = m.groups() + toktype = 'id' + return + + m = re.match(r'([0-9]+)(.*)$', line) + if m: + token, line = m.groups() + token = int(token) + toktype = 'int' + return + + m = re.match(r'([][(),*;.])(.*)$', line) + if m: + token, line = m.groups() + toktype = 'punct' + return + + m = re.match(r'=\s*(.*)$', line) + if m: + toktype = 'expression' + line = m.group(1) + token = accumulate_balanced(';') + return + + m = re.match(r'{(.*)$', line) + if m: + toktype = 'block' + line = m.group(1) + token = accumulate_balanced('}') + token = token.rstrip('\n') + return + + sys.stderr.write("bad character '%s' in input\n" % line[0]) + sys.exit(1) + +def lookahead(): + """Skip whitespace.""" + global line + if line is None: + sys.stderr.write("unexpected end of file\n") + sys.exit(1) + + while True: + line = line.lstrip() + if line != "": + break + get_line() + if line is None: + global token + global toktype + token = 'eof' + toktype = 'eof' + return + +def accumulate_balanced(end, swallow_end=True): + """Accumulates input until a character in 'end' is encountered, + except that balanced pairs of (), [], or {} cause 'end' to be + ignored. Returns the input read. + """ + s = "" + nest = 0 + global line + print("line='%s'" % line) + while True: + print(type(line)) + for idx, c in enumerate(line): + print('nest=%s %s end=%s' % (nest, c, end)) + if c in end and nest == 0: + line = line[idx:] + if swallow_end: + line = line[1:] + s = s.strip('\r\n') + return s + elif c in "[({": + nest += 1 + elif c in "])}": + if nest > 0: + nest -= 1 + else: + sys.stderr.write('unbalanced parentheses\n') + sys.exit(1) + s += c + s += '\n' + get_line() + +def get_line(): + """Reads the next line from INPUT into 'line'.""" + global line + global line_number + line = in_file.readline() + line_number += 1 + print("%s\n" % line_number) + if line == '': + line = None + else: + line = line.rstrip('\r\n') + comment_ofs = line.find('//') + if comment_ofs >= 0: + line = line[:comment_ofs] + +def parse_type(): + """If the current token is an identifier that names a type, returns + the type and skips to the next token. Otherwise, returns + undef. + """ + if toktype == 'id': + for type_ in types.values(): + if type_.get("NAME") == token: + get_token() + return type_ + return None + +def force(type_): + """Makes sure that 'toktype' equals 'type', reads the next token, and + returns the previous 'token'. + + """ + if type_ != toktype: + sys.stderr.write("parse error at `%s' expecting %s\n" % (token, type_)) + sys.exit(1) + tok = token + get_token() + return tok + +def match(tok): + """If 'token' equals 'tok', reads the next token and returns true. + Otherwise, returns false.""" + if token == tok: + get_token() + return True + else: + return False + +def force_match(tok): + """If 'token' equals 'tok', reads the next token. Otherwise, flags an + error in the input. + """ + if not match(tok): + sys.stderr.write("parse error at `%s' expecting `%s'\n" % (token, tok)) + sys.exit(1) + +def parse_arg(): + """Parses and returns a function argument.""" + arg = {} + arg['TYPE'] = parse_type() + if arg['TYPE'] is None: + arg['TYPE'] = types['number'] + + if toktype != 'id': + sys.stderr.write("argument name expected at `%s'\n" % token) + sys.exit(1) + arg['NAME'] = token + + lookahead() + global line + print("line[0]=%s" % line[0]) + if line[0] in "[,)": + get_token() + print('token=%s toktype=%s' % (token, toktype)) + if match('['): + if arg['TYPE']['NAME'] not in ('number', 'string'): + sys.stderr.write('only double and string arrays supported\n') + sys.exit(1) + arg['IDX'] = force('id') + if match('*'): + arg['TIMES'] = force('int') + if arg['TIMES'] != 2: + sys.stderr.write('multiplication factor must be two\n') + sys.exit(1) + else: + arg['TIMES'] = 1 + force_match(']') + else: + arg['CONDITION'] = arg['NAME'] + ' ' + accumulate_balanced(',)', swallow_end=False) + get_token() + return arg + +def print_header(): + """Prints the output file header.""" + out_file.write("""\ +/* %s + Generated from %s by generate.py. + Do not modify! */ + +""" % (out_file_name, in_file_name)) + +def print_trailer(): + """Prints the output file trailer.""" + out_file.write("""\ + +/* + Local Variables: + mode: c + buffer-read-only: t + End: +*/ +""") + +def generate_evaluate_h(): + out_file.write("#include \"helpers.h\"\n\n") + + for opname in order: + op = ops[opname] + if op['UNIMPLEMENTED']: + continue + + args = [] + for arg in op['ARGS']: + if 'IDX' not in arg: + args += [c_type(arg['TYPE']) + arg['NAME']] + else: + args += [c_type(arg['TYPE']) + arg['NAME'] + '[]'] + args += ['size_t %s' % arg['IDX']] + for aux in op['AUX']: + args += [c_type(aux['TYPE']) + aux['NAME']] + if not args: + args += ['void'] + + if 'BLOCK' in op: + statements = op['BLOCK'] + '\n' + else: + statements = " return %s;\n" % op['EXPRESSION'] + + out_file.write("static inline %s\n" % c_type (op['RETURNS'])) + out_file.write("eval_%s (%s)\n" % (opname, ', '.join(args))) + out_file.write("{\n") + out_file.write(statements) + out_file.write("}\n\n") + +def generate_evaluate_inc(): + for opname in order: + op = ops[opname] + if op['UNIMPLEMENTED']: + out_file.write("case %s:\n" % opname) + out_file.write(" NOT_REACHED ();\n\n") + continue + + decls = [] + args = [] + for arg in op['ARGS']: + name = arg['NAME'] + type_ = arg['TYPE'] + ctype = c_type(type_) + args += ['arg_%s' % name] + if 'IDX' not in arg: + decl = '%sarg_%s' % (ctype, name) + if type_['ROLE'] == 'any': + decls = ['%s = *--%s' % (decl, type_['STACK'])] + decls + elif type_['ROLE'] == 'leaf': + decls += ['%s = op++->%s' % (decl, type_['ATOM'])] + else: + assert False + else: + idx = arg['IDX'] + stack = type_['STACK'] + decls = ['%s*arg_%s = %s -= arg_%s' % (ctype, name, stack, idx)] + decls + decls = ['size_t arg_%s = op++->integer' % idx] + decls + + idx = 'arg_%s' % idx + if arg['TIMES'] != 1: + idx += ' / %s' % arg['TIMES'] + args += [idx] + for aux in op['AUX']: + type_ = aux['TYPE'] + name = aux['NAME'] + if type_['ROLE'] == 'leaf': + ctype = c_type(type_) + decls += ['%saux_%s = op++->%s' % (ctype, name, type_['ATOM'])] + args += ['aux_%s' % name] + elif type_['ROLE'] == 'fixed': + args += [type_['FIXED_VALUE']] + + sysmis_cond = make_sysmis_decl(op, 'op++->integer') + if sysmis_cond is not None: + decls += [sysmis_cond] + + print(args) + result = 'eval_%s (%s)' % (op['OPNAME'], ', '.join(args)) + + stack = op['RETURNS']['STACK'] + + out_file.write("case %s:\n" % opname) + if decls: + out_file.write(" {\n") + for decl in decls: + out_file.write(" %s;\n" % decl) + if sysmis_cond is not None: + miss_ret = op['RETURNS']['MISSING_VALUE'] + out_file.write(" *%s++ = force_sysmis ? %s : %s;\n" % (stack, miss_ret, result)) + else: + out_file.write(" *%s++ = %s;\n" % (stack, result)) + out_file.write(" }\n") + else: + out_file.write(" *%s++ = %s;\n" % (stack, result)) + out_file.write(" break;\n\n") + +def generate_operations_h(): + out_file.write("#include \n") + out_file.write("#include \n\n") + + out_file.write("typedef enum") + out_file.write(" {\n") + atoms = [] + for type_ in types.values(): + if type_['ROLE'] != 'fixed': + atoms += ["OP_%s" % type_['NAME']] + + print_operations('atom', 1, atoms) + print_operations('function', "OP_atom_last + 1", funcs) + print_operations('operator', "OP_function_last + 1", opers) + print_range("OP_composite", "OP_function_first", "OP_operator_last") + out_file.write(",\n\n") + print_range("OP", "OP_atom_first", "OP_composite_last") + out_file.write("\n }\n") + out_file.write("operation_type, atom_type;\n") + + print_predicate('is_operation', 'OP') + for key in ('atom', 'composite', 'function', 'operator'): + print_predicate("is_%s" % key, "OP_%s" % key) + +def print_operations(type_, first, names): + out_file.write(" /* %s types. */\n" % type_.title()) + out_file.write(" %s = %s,\n" % (names[0], first)) + for name in names[1:]: + out_file.write(" %s,\n" % name) + print_range("OP_%s" % type_, names[0], names[len(names) - 1]) + out_file.write(",\n\n") + +def print_range(prefix, first, last): + out_file.write(" %s_first = %s,\n" % (prefix, first)) + out_file.write(" %s_last = %s,\n" % (prefix, last)) + out_file.write(" n_%s = %s_last - %s_first + 1" % (prefix, prefix, prefix)) + +def print_predicate(function, category): + assertion = "" + + out_file.write("\nstatic inline bool\n") + out_file.write("%s (operation_type op)\n" % function) + out_file.write("{\n") + if function != 'is_operation': + out_file.write(" assert (is_operation (op));\n") + out_file.write(" return op >= %s_first && op <= %s_last;\n" % (category, category)) + out_file.write("}\n") + +def generate_optimize_inc(): + for opname in order: + op = ops[opname] + + if not op['OPTIMIZABLE'] or op['UNIMPLEMENTED']: + out_file.write("case %s:\n" % opname) + out_file.write(" NOT_REACHED ();\n\n") + continue + + decls = [] + arg_idx = 0 + for arg in op['ARGS']: + name = arg['NAME'] + type_ = arg['TYPE'] + ctype = c_type(type_) + if not 'IDX' in arg: + func = "get_%s_arg" % type_['ATOM'] + decls += ["%sarg_%s = %s (node, %s)" % (ctype, name, func, arg_idx)] + else: + idx = arg['IDX'] + decl = "size_t arg_%s = node->n_args" % idx + if arg_idx > 0: + decl += " - %s" % arg_idx + decls += [decl] + + decls += ["%s*arg_%s = get_%s_args (node, %s, arg_%s, e)" % (ctype, name, type_['ATOM'], arg_idx, idx)] + arg_idx += 1 + + sysmis_cond = make_sysmis_decl (op, "node->min_valid") + if sysmis_cond is not None: + decls += [sysmis_cond] + + args = [] + for arg in op['ARGS']: + args += ["arg_%s" % arg['NAME']] + if 'IDX' in arg: + idx = 'arg_%s' % arg['IDX'] + if arg['TIMES'] != 1: + idx += " / %s" % arg['TIMES'] + args += [idx] + + for aux in op['AUX']: + type_ = aux['TYPE'] + if type_['ROLE'] == 'leaf': + func = "get_%s_arg" % type_['ATOM'] + args += "%s (node, %s)" % (func, arg_idx) + arg_idx += 1 + elif type_['ROLE'] == 'fixed': + args += [type_['FIXED_VALUE']] + else: + assert False + + result = "eval_%s (%s)" % (op['OPNAME'], ', '.join(args)) + if decls and sysmis_cond is not None: + miss_ret = op['RETURNS']['MISSING_VALUE'] + decls += ['%sresult = force_sysmis ? %s : %s' % (c_type(op['RETURNS']), miss_ret, result)] + result = 'result' + + out_file.write("case %s:\n" % opname) + alloc_func = "expr_allocate_%s" % op['RETURNS']['NAME'] + if decls: + out_file.write(" {\n") + for decl in decls: + out_file.write(" %s;\n" % decl) + out_file.write(" return %s (e, %s);\n" % (alloc_func, result)) + out_file.write(" }\n") + else: + out_file.write(" return %s (e, %s);\n" % (alloc_func, result)) + out_file.write("\n") + +def generate_parse_inc(): + members = ['""', '""', '0', '0', '0', "{}", '0', '0'] + out_file.write("{%s},\n" % ', '.join(members)) + + for type_ in types.values(): + if type_['ROLE'] != 'fixed': + human_name = type_.get('HUMAN_NAME', type_['NAME']) + members = ('"%s"' % type_['NAME'], '"%s"' % human_name, '0', "OP_%s" % type_['NAME'], '0', "{}", '0', '0') + out_file.write("{%s},\n" % ', '.join(members)) + + for opname in order: + op = ops[opname] + + members = [] + members += ['"%s"' % op['NAME']] + + if op['CATEGORY'] == 'function': + args = [] + opt_args = [] + for arg in op['ARGS']: + if 'IDX' not in arg: + args += [arg['TYPE']['HUMAN_NAME']] + + array = array_arg(op) + if array is not None: + if op['MIN_VALID'] == 0: + array_args = [] + for i in range(array['TIMES']): + array_args += [array['TYPE']['HUMAN_NAME']] + args += array_args + opt_args = array_args + else: + for i in range(op['MIN_VALID']): + args += [array['TYPE']['HUMAN_NAME']] + opt_args += [array['TYPE']['HUMAN_NAME']] + human = "%s(%s" % (op['NAME'], ', '.join(args)) + if opt_args: + human += '[, %s]...' % ', '.join(opt_args) + human += ')' + members += ['"%s"' % human] + else: + members += ['NULL'] + + flags = [] + if op['ABSORB_MISS']: + flags += ['OPF_ABSORB_MISS'] + if array_arg(op): + flags += ['OPF_ARRAY_OPERAND'] + if op['MIN_VALID'] > 0: + flags += ['OPF_MIN_VALID'] + if not op['OPTIMIZABLE']: + flags += ['OPF_NONOPTIMIZABLE'] + if op['EXTENSION']: + flags += ['OPF_EXTENSION'] + if op['UNIMPLEMENTED']: + flags += ['OPF_UNIMPLEMENTED'] + if op['PERM_ONLY']: + flags += ['OPF_PERM_ONLY'] + if op['NO_ABBREV']: + flags += ['OPF_NO_ABBREV'] + members += [' | '.join(flags) if flags else '0'] + + members += ['OP_%s' % op['RETURNS']['NAME']] + + members += ['%s' % len(op['ARGS'])] + + arg_types = ["OP_%s" % arg['TYPE']['NAME'] for arg in op['ARGS']] + members += ['{%s}' % ', '.join(arg_types)] + + members += ['%s' % op['MIN_VALID']] + + members += ['%s' % (array_arg(op)['TIMES'] if array_arg(op) else 0)] + + out_file.write('{%s},\n' % ', '.join(members)) + +# Utilities. + +def make_sysmis_decl(op, min_valid_src): + """Returns a declaration for a boolean variable called `force_sysmis', + which will be true when operation 'op' should be system-missing. + Returns None if there are no such circumstances. + + If 'op' has a minimum number of valid arguments, 'min_valid_src' + should be an an expression that evaluates to the minimum number of + valid arguments for 'op'. + + """ + sysmis_cond = [] + if not op['ABSORB_MISS']: + for arg in op['ARGS']: + arg_name = 'arg_%s' % arg['NAME'] + if 'IDX' not in arg: + if arg['TYPE']['NAME'] in ['number', 'boolean']: + sysmis_cond += ["!is_valid (%s)" % arg_name] + elif arg['TYPE']['NAME'] == 'number': + a = arg_name + n = 'arg_%s' % arg['IDX'] + sysmis_cond += ['count_valid (%s, %s) < %s' % (a, n, n)] + elif op['MIN_VALID'] > 0: + args = op['ARGS'] + arg = args[-1] + a = 'arg_%s' % arg['NAME'] + n = 'arg_%s' % arg['IDX'] + sysmis_cond += ["count_valid (%s, %s) < %s" % (a, n, min_valid_src)] + for arg in op['ARGS']: + if 'CONDITION' in arg: + sysmis_cond += ['!(%s)' % arg['CONDITION']] + if sysmis_cond: + return 'bool force_sysmis = %s' % ' || '.join(sysmis_cond) + return None + +def array_arg(op): + """If 'op' has an array argument, returns it. Otherwise, returns + None.""" + args = op['ARGS'] + if not args: + return None + last_arg = args[-1] + if 'IDX' in last_arg: + return last_arg + return None + +def usage(): + print("""\ +%(argv0)s, for generating expression parsers and evaluators from definitions +usage: generate.pl -o OUTPUT [-i INPUT] [-h] + -i INPUT input file containing definitions (default: operations.def) + -o OUTPUT output file + -h display this help message +""" % {'argv0': argv0}) + sys.exit(0) + +if __name__ == "__main__": + try: + options, args = getopt.gnu_getopt(sys.argv[1:], 'hi:o:', + ['input=s', + 'output=s', + 'help']) + except getopt.GetoptError as geo: + sys.stderr.write("%s: %s\n" % (argv0, geo.msg)) + sys.exit(1) + + in_file_name = 'operations.def' + out_file_name = None + for key, value in options: + if key in ['-h', '--help']: + usage() + elif key in ['-i', '--input']: + in_file_name = value + elif key in ['-o', '--output']: + out_file_name = value + else: + sys.exit(0) + + if out_file_name is None: + sys.stderr.write("%(argv0)s: output file must be specified " + "(use --help for help)\n" % {'argv0': argv0}) + sys.exit(1) + + in_file = open(in_file_name, 'r') + out_file = open(out_file_name, 'w') + + init_all_types() + parse_input() + + print_header() + if out_file_name.endswith('evaluate.h'): + generate_evaluate_h() + elif out_file_name.endswith('evaluate.inc'): + generate_evaluate_inc() + elif out_file_name.endswith('operations.h'): + generate_operations_h() + elif out_file_name.endswith('optimize.inc'): + generate_optimize_inc() + elif out_file_name.endswith('parse.inc'): + generate_parse_inc() + else: + sys.stderr.write("%(argv0)s: unknown output type\n") + sys.exit(1) + print_trailer() -- 2.30.2