From 068e39ec18fe1d17c9f339dab56c7fe0b9406b62 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 13 Dec 2021 21:41:51 -0800 Subject: [PATCH] Introduce class for Type. --- src/language/expressions/generate.py | 316 +++++++++++++-------------- 1 file changed, 148 insertions(+), 168 deletions(-) diff --git a/src/language/expressions/generate.py b/src/language/expressions/generate.py index 6cb6a9f257..4d87ce8f06 100644 --- a/src/language/expressions/generate.py +++ b/src/language/expressions/generate.py @@ -32,149 +32,132 @@ def init_all_types(): types = {} # Common user-visible types used throughout evaluation trees. - init_type('number', 'any', C_TYPE='double', - ATOM='number', MANGLE='n', HUMAN_NAME='number', - STACK='ns', MISSING_VALUE='SYSMIS') - init_type('string', 'any', C_TYPE='struct substring', - ATOM='string', MANGLE='s', HUMAN_NAME='string', - STACK='ss', MISSING_VALUE='empty_string') - init_type('boolean', 'any', C_TYPE='double', - ATOM='number', MANGLE='n', HUMAN_NAME='boolean', - STACK='ns', MISSING_VALUE='SYSMIS') + init_type(Type.new_any('number', 'double', 'number', 'n', 'number', 'ns', 'SYSMIS')) + init_type(Type.new_any('string', 'struct substring', 'string', 's', 'string', 'ss', 'empty_string')) + init_type(Type.new_any('boolean', 'double', 'number', 'n', 'boolean', 'ns', 'SYSMIS')) # Format types. - init_type('format', 'atom') - init_type('ni_format', 'leaf', C_TYPE='const struct fmt_spec *', - ATOM='format', MANGLE='f', - HUMAN_NAME='num_input_format') - init_type('no_format', 'leaf', C_TYPE='const struct fmt_spec *', - ATOM='format', MANGLE='f', - HUMAN_NAME='num_output_format') + init_type(Type.new_atom('format')) + init_type(Type.new_leaf('ni_format', 'const struct fmt_spec *', 'format', 'f', 'num_input_format')) + init_type(Type.new_leaf('no_format', 'const struct fmt_spec *', 'format', 'f', 'num_output_format')) # Integer types. - init_type('integer', 'leaf', C_TYPE='int', - ATOM='integer', MANGLE='n', HUMAN_NAME='integer') - init_type('pos_int', 'leaf', C_TYPE='int', - ATOM='integer', MANGLE='n', - HUMAN_NAME='positive_integer_constant') + init_type(Type.new_leaf('integer', 'int', 'integer', 'n', 'integer')) + init_type(Type.new_leaf('pos_int', 'int', 'integer', 'n', 'positive_integer_constant')) # Variable names. - init_type('variable', 'atom') - init_type('num_var', 'leaf', C_TYPE='const struct variable *', - ATOM='variable', MANGLE='Vn', - HUMAN_NAME='num_variable') - init_type('str_var', 'leaf', C_TYPE='const struct variable *', - ATOM='variable', MANGLE='Vs', - HUMAN_NAME='string_variable') - init_type('var', 'leaf', C_TYPE='const struct variable *', - ATOM='variable', MANGLE='V', - HUMAN_NAME='variable') + init_type(Type.new_atom('variable')) + init_type(Type.new_leaf('num_var', 'const struct variable *', 'variable', 'Vn', 'num_variable')) + init_type(Type.new_leaf('str_var', 'const struct variable *', 'variable', 'Vs', 'string_variable')) + init_type(Type.new_leaf('var', 'const struct variable *', 'variable', 'V', 'variable')) # Vectors. - init_type('vector', 'leaf', C_TYPE='const struct vector *', - ATOM='vector', MANGLE='v', HUMAN_NAME='vector') + init_type(Type.new_leaf('vector', 'const struct vector *', 'vector', 'v', 'vector')) # Fixed types. - init_type('expression', 'fixed', C_TYPE='struct expression *', - FIXED_VALUE='e') - init_type('case', 'fixed', C_TYPE='const struct ccase *', - FIXED_VALUE='c') - init_type('case_idx', 'fixed', C_TYPE='size_t', - FIXED_VALUE='case_idx') - init_type('dataset', 'fixed', C_TYPE='struct dataset *', - FIXED_VALUE='ds') + init_type(Type.new_fixed('expression', 'struct expression *', 'e')) + init_type(Type.new_fixed('case', 'const struct ccase *', 'c')) + init_type(Type.new_fixed('case_idx', 'size_t', 'case_idx')) + init_type(Type.new_fixed('dataset', 'struct dataset *', 'ds')) # One of these is emitted at the end of each expression as a sentinel # that tells expr_evaluate() to return the value on the stack. - init_type('return_number', 'atom') - init_type('return_string', 'atom') + init_type(Type.new_atom('return_number')) + init_type(Type.new_atom('return_string')) # Used only for debugging purposes. - init_type('operation', 'atom') + init_type(Type.new_atom('operation')) -def init_type(name, role, **rest): - """ - init_type has 2 required arguments: - - NAME: Type name. - - 'name' is the type's name in operations.def. - - `OP_$name' is the terminal's type in operations.h. - - `expr_allocate_$name()' allocates a node of the given type. - - ROLE: How the type may be used: - - "any": Usable as operands and function arguments, and - function and operator results. - - "leaf": Usable as operands and function arguments, but - not function arguments or results. (Thus, they appear - only in leaf nodes in the parse type.) - - "fixed": Not allowed either as an operand or argument - type or a result type. Used only as auxiliary data. - - "atom": Not allowed anywhere; just adds the name to - the list of atoms. - - All types except those with "atom" as their role also require: - - C_TYPE: The C type that represents this abstract type. - - Types with "any" or "leaf" role require: - - ATOM: - - `$atom' is the `struct operation_data' member name. - - get_$atom_name() obtains the corresponding data from a - node. - - MANGLE: Short string for name mangling. Use identical strings - if two types should not be overloaded. - - HUMAN_NAME: Name for a type when we describe it to the user. - - Types with role "any" require: - - STACK: Name of the local variable in expr_evaluate(), used for - maintaining the stack for this type. - - MISSING_VALUE: Expression used for the missing value of this - type. - - Types with role "fixed" require: - - FIXED_VALUE: Expression used for the value of this type. """ - global types - new_type = { 'NAME': name, 'ROLE': role } | rest - - need_keys = ['NAME', 'ROLE'] - if role == 'any': - need_keys += ['C_TYPE', 'ATOM', 'MANGLE', 'HUMAN_NAME', 'STACK', 'MISSING_VALUE'] - elif role == 'leaf': - need_keys += ['C_TYPE', 'ATOM', 'MANGLE', 'HUMAN_NAME'] - elif role == 'fixed': - need_keys += ['C_TYPE', 'FIXED_VALUE'] - elif role == 'atom': - pass - else: - sys.stderr.write("no role '%s'\n" % role) - sys.exit(1) +init_type has 2 required arguments: - for key in new_type.keys(): - if not key in new_type: - sys.stderr.write("%s lacks %s\n" % (name, key)) - sys.exit(1) - for key in need_keys: - if not key in need_keys: - sys.stderr.write("%s has superfluous key %s\n" % (name, key)) - sys.exit(1) + NAME: Type name. + + 'name' is the type's name in operations.def. + + `OP_$name' is the terminal's type in operations.h. + + `expr_allocate_$name()' allocates a node of the given type. + + ROLE: How the type may be used: + + "any": Usable as operands and function arguments, and + function and operator results. + + "leaf": Usable as operands and function arguments, but + not function arguments or results. (Thus, they appear + only in leaf nodes in the parse type.) + + "fixed": Not allowed either as an operand or argument + type or a result type. Used only as auxiliary data. + + "atom": Not allowed anywhere; just adds the name to + the list of atoms. + +All types except those with "atom" as their role also require: + + C_TYPE: The C type that represents this abstract type. + +Types with "any" or "leaf" role require: - types[name] = new_type + ATOM: + + `$atom' is the `struct operation_data' member name. + + get_$atom_name() obtains the corresponding data from a + node. + + MANGLE: Short string for name mangling. Use identical strings + if two types should not be overloaded. + + HUMAN_NAME: Name for a type when we describe it to the user. + +Types with role "any" require: + + STACK: Name of the local variable in expr_evaluate(), used for + maintaining the stack for this type. + + MISSING_VALUE: Expression used for the missing value of this + type. + +Types with role "fixed" require: + + FIXED_VALUE: Expression used for the value of this type. +""" +class Type: + def __init__(self, name, role, human_name): + self.name = name + self.role = role + self.human_name = human_name + + def new_atom(name): + return Type(name, 'atom', name) + + def new_any(name, c_type, atom, mangle, human_name, stack, missing_value): + new = Type(name, 'any', human_name) + new.c_type = c_type + new.atom = atom + new.mangle = mangle + new.stack = stack + new.missing_value = missing_value + return new + + def new_leaf(name, c_type, atom, mangle, human_name): + new = Type(name, 'leaf', human_name) + new.c_type = c_type + new.atom = atom + new.mangle = mangle + return new + + def new_fixed(name, c_type, fixed_value): + new = Type(name, 'fixed', name) + new.c_type = c_type + new.fixed_value = fixed_value + return new + +def init_type(type_): + global types + types[type_.name] = type_ # c_type(type). # @@ -183,7 +166,7 @@ def c_type(type_): prepended to a variable name to produce a declaration. (That won't work in general but it works well enough for our types.) """ - c_type = type_["C_TYPE"] + c_type = type_.c_type if not c_type.endswith('*'): c_type += ' ' return c_type @@ -238,7 +221,7 @@ def parse_input(): return_type = parse_type() if return_type is None: return_type = types['number'] - if return_type['NAME'] not in ['number', 'string', 'boolean']: + if return_type.name not in ['number', 'string', 'boolean']: sys.stderr.write('%s is not a valid return type\n' % return_type['NAME']) sys.exit(1) op['RETURNS'] = return_type @@ -289,7 +272,7 @@ def parse_input(): opname = 'OP_' + op['NAME'] opname = opname.replace('.', '_') if op['CATEGORY'] == 'function': - mangle = ''.join([a.type_['MANGLE'] for a in op['ARGS']]) + mangle = ''.join([a.type_.mangle for a in op['ARGS']]) op['MANGLE'] = mangle opname += '_' + mangle op['OPNAME'] = opname @@ -299,7 +282,7 @@ def parse_input(): if aa is None: sys.stderr.write("can't have minimum valid count without array arg\n") sys.exit(1) - if aa.type_['NAME'] != 'number': + if aa.type_.name != 'number': sys.stderr.write('minimum valid count allowed only with double array\n') sys.exit(1) if aa.times != 1: @@ -312,9 +295,8 @@ def parse_input(): if type_ is None: sys.stderr.write('parse error\n') sys.exit(1) - if type_['ROLE'] not in ['leaf', 'fixed']: - sys.stderr.write("'%s' is not allowed as auxiliary data\n" - % type_['NAME']) + if type_.role not in ['leaf', 'fixed']: + sys.stderr.write("'%s' is not allowed as auxiliary data\n" % type_.name) sys.exit(1) name = force('id') op['AUX'] += [{'TYPE': type_, 'NAME': name}] @@ -329,9 +311,9 @@ def parse_input(): sys.stderr.write("operators with %s aux data must be marked 'no_opt'\n" % key) sys.exit(1) - if op['RETURNS']['NAME'] == 'string' and not op['ABSORB_MISS']: + if op['RETURNS'].name == 'string' and not op['ABSORB_MISS']: for arg in op['ARGS']: - if arg.type_['NAME'] in ['number', 'boolean']: + if arg.type_.name in ['number', 'boolean']: sys.stderr.write("'%s' returns string and has double or bool " "argument, but is not marked ABSORB_MISS\n" % op['NAME']) @@ -482,7 +464,7 @@ def parse_type(): """ if toktype == 'id': for type_ in types.values(): - if type_.get("NAME") == token: + if type_.name == token: get_token() return type_ return None @@ -544,7 +526,7 @@ def parse_arg(): if line[0] in "[,)": get_token() if match('['): - if type_['NAME'] not in ('number', 'string'): + if type_.name not in ('number', 'string'): sys.stderr.write('only double and string arrays supported\n') sys.exit(1) idx = force('id') @@ -629,16 +611,15 @@ def generate_evaluate_inc(): args += ['arg_%s' % arg.name] if arg.idx is None: decl = '%sarg_%s' % (ctype, arg.name) - if type_['ROLE'] == 'any': - decls = ['%s = *--%s' % (decl, type_['STACK'])] + decls - elif type_['ROLE'] == 'leaf': - decls += ['%s = op++->%s' % (decl, type_['ATOM'])] + if type_.role == 'any': + decls = ['%s = *--%s' % (decl, type_.stack)] + decls + elif type_.role == 'leaf': + decls += ['%s = op++->%s' % (decl, type_.atom)] else: assert False else: idx = arg.idx - stack = type_['STACK'] - decls = ['%s*arg_%s = %s -= arg_%s' % (ctype, arg.name, stack, idx)] + decls + decls = ['%s*arg_%s = %s -= arg_%s' % (ctype, arg.name, type_.stack, idx)] + decls decls = ['size_t arg_%s = op++->integer' % idx] + decls idx = 'arg_%s' % idx @@ -648,12 +629,12 @@ def generate_evaluate_inc(): for aux in op['AUX']: type_ = aux['TYPE'] name = aux['NAME'] - if type_['ROLE'] == 'leaf': + if type_.role == 'leaf': ctype = c_type(type_) - decls += ['%saux_%s = op++->%s' % (ctype, name, type_['ATOM'])] + decls += ['%saux_%s = op++->%s' % (ctype, name, type_.atom)] args += ['aux_%s' % name] - elif type_['ROLE'] == 'fixed': - args += [type_['FIXED_VALUE']] + elif type_.role == 'fixed': + args += [type_.fixed_value] sysmis_cond = make_sysmis_decl(op, 'op++->integer') if sysmis_cond is not None: @@ -661,7 +642,7 @@ def generate_evaluate_inc(): result = 'eval_%s (%s)' % (op['OPNAME'], ', '.join(args)) - stack = op['RETURNS']['STACK'] + stack = op['RETURNS'].stack out_file.write("case %s:\n" % opname) if decls: @@ -669,7 +650,7 @@ def generate_evaluate_inc(): for decl in decls: out_file.write(" %s;\n" % decl) if sysmis_cond is not None: - miss_ret = op['RETURNS']['MISSING_VALUE'] + miss_ret = op['RETURNS'].missing_value out_file.write(" *%s++ = force_sysmis ? %s : %s;\n" % (stack, miss_ret, result)) else: out_file.write(" *%s++ = %s;\n" % (stack, result)) @@ -686,8 +667,8 @@ def generate_operations_h(): out_file.write(" {\n") atoms = [] for type_ in types.values(): - if type_['ROLE'] != 'fixed': - atoms += ["OP_%s" % type_['NAME']] + if type_.role != 'fixed': + atoms += ["OP_%s" % type_.name] print_operations('atom', 1, atoms) print_operations('function', "OP_atom_last + 1", funcs) @@ -742,7 +723,7 @@ def generate_optimize_inc(): type_ = arg.type_ ctype = c_type(type_) if arg.idx is None: - func = "get_%s_arg" % type_['ATOM'] + func = "get_%s_arg" % type_.atom decls += ["%sarg_%s = %s (node, %s)" % (ctype, name, func, arg_idx)] else: decl = "size_t arg_%s = node->n_args" % arg.idx @@ -750,7 +731,7 @@ def generate_optimize_inc(): decl += " - %s" % arg_idx decls += [decl] - decls += ["%s*arg_%s = get_%s_args (node, %s, arg_%s, e)" % (ctype, name, type_['ATOM'], arg_idx, arg.idx)] + decls += ["%s*arg_%s = get_%s_args (node, %s, arg_%s, e)" % (ctype, name, type_.atom, arg_idx, arg.idx)] arg_idx += 1 sysmis_cond = make_sysmis_decl (op, "node->min_valid") @@ -768,23 +749,23 @@ def generate_optimize_inc(): for aux in op['AUX']: type_ = aux['TYPE'] - if type_['ROLE'] == 'leaf': - func = "get_%s_arg" % type_['ATOM'] + if type_.role == 'leaf': + func = "get_%s_arg" % type_.atom args += "%s (node, %s)" % (func, arg_idx) arg_idx += 1 - elif type_['ROLE'] == 'fixed': - args += [type_['FIXED_VALUE']] + elif type_.role == 'fixed': + args += [type_.fixed_value] else: assert False result = "eval_%s (%s)" % (op['OPNAME'], ', '.join(args)) if decls and sysmis_cond is not None: - miss_ret = op['RETURNS']['MISSING_VALUE'] + miss_ret = op['RETURNS'].missing_value decls += ['%sresult = force_sysmis ? %s : %s' % (c_type(op['RETURNS']), miss_ret, result)] result = 'result' out_file.write("case %s:\n" % opname) - alloc_func = "expr_allocate_%s" % op['RETURNS']['NAME'] + alloc_func = "expr_allocate_%s" % op['RETURNS'].name if decls: out_file.write(" {\n") for decl in decls: @@ -800,9 +781,8 @@ def generate_parse_inc(): out_file.write("{%s},\n" % ', '.join(members)) for type_ in types.values(): - if type_['ROLE'] != 'fixed': - human_name = type_.get('HUMAN_NAME', type_['NAME']) - members = ('"%s"' % type_['NAME'], '"%s"' % human_name, '0', "OP_%s" % type_['NAME'], '0', "{}", '0', '0') + if type_.role != 'fixed': + members = ('"%s"' % type_.name, '"%s"' % type_.human_name, '0', "OP_%s" % type_.name, '0', "{}", '0', '0') out_file.write("{%s},\n" % ', '.join(members)) for opname in order: @@ -816,20 +796,20 @@ def generate_parse_inc(): opt_args = [] for arg in op['ARGS']: if arg.idx is None: - args += [arg.type_['HUMAN_NAME']] + args += [arg.type_.human_name] array = array_arg(op) if array is not None: if op['MIN_VALID'] == 0: array_args = [] for i in range(array.times): - array_args += [array.type_['HUMAN_NAME']] + array_args += [array.type_.human_name] args += array_args opt_args = array_args else: for i in range(op['MIN_VALID']): - args += [array.type_['HUMAN_NAME']] - opt_args += [array.type_['HUMAN_NAME']] + args += [array.type_.human_name] + opt_args += [array.type_.human_name] human = "%s(%s" % (op['NAME'], ', '.join(args)) if opt_args: human += '[, %s]...' % ', '.join(opt_args) @@ -857,11 +837,11 @@ def generate_parse_inc(): flags += ['OPF_NO_ABBREV'] members += [' | '.join(flags) if flags else '0'] - members += ['OP_%s' % op['RETURNS']['NAME']] + members += ['OP_%s' % op['RETURNS'].name] members += ['%s' % len(op['ARGS'])] - arg_types = ["OP_%s" % arg.type_['NAME'] for arg in op['ARGS']] + arg_types = ["OP_%s" % arg.type_.name for arg in op['ARGS']] members += ['{%s}' % ', '.join(arg_types)] members += ['%s' % op['MIN_VALID']] @@ -887,9 +867,9 @@ def make_sysmis_decl(op, min_valid_src): for arg in op['ARGS']: arg_name = 'arg_%s' % arg.name if arg.idx is None: - if arg.type_['NAME'] in ['number', 'boolean']: + if arg.type_.name in ['number', 'boolean']: sysmis_cond += ["!is_valid (%s)" % arg_name] - elif arg.type_['NAME'] == 'number': + elif arg.type_.name == 'number': a = arg_name n = 'arg_%s' % arg.idx sysmis_cond += ['count_valid (%s, %s) < %s' % (a, n, n)] -- 2.30.2