--- /dev/null
+#! /usr/bin/python3
+# PSPP - a program for statistical analysis.
+# Copyright (C) 2017, 2021 Free Software Foundation, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+import getopt
+import re
+import sys
+
+argv0 = sys.argv[0]
+
+def init_all_types():
+ """
+ Defines all our types.
+
+ Initializes 'types' global.
+"""
+
+ global types
+ types = {}
+
+ # Common user-visible types used throughout evaluation trees.
+ init_type('number', 'any', C_TYPE='double',
+ ATOM='number', MANGLE='n', HUMAN_NAME='number',
+ STACK='ns', MISSING_VALUE='SYSMIS')
+ init_type('string', 'any', C_TYPE='struct substring',
+ ATOM='string', MANGLE='s', HUMAN_NAME='string',
+ STACK='ss', MISSING_VALUE='empty_string')
+ init_type('boolean', 'any', C_TYPE='double',
+ ATOM='number', MANGLE='n', HUMAN_NAME='boolean',
+ STACK='ns', MISSING_VALUE='SYSMIS')
+
+ # Format types.
+ init_type('format', 'atom')
+ init_type('ni_format', 'leaf', C_TYPE='const struct fmt_spec *',
+ ATOM='format', MANGLE='f',
+ HUMAN_NAME='num_input_format')
+ init_type('no_format', 'leaf', C_TYPE='const struct fmt_spec *',
+ ATOM='format', MANGLE='f',
+ HUMAN_NAME='num_output_format')
+
+ # Integer types.
+ init_type('integer', 'leaf', C_TYPE='int',
+ ATOM='integer', MANGLE='n', HUMAN_NAME='integer')
+ init_type('pos_int', 'leaf', C_TYPE='int',
+ ATOM='integer', MANGLE='n',
+ HUMAN_NAME='positive_integer_constant')
+
+ # Variable names.
+ init_type('variable', 'atom')
+ init_type('num_var', 'leaf', C_TYPE='const struct variable *',
+ ATOM='variable', MANGLE='Vn',
+ HUMAN_NAME='num_variable')
+ init_type('str_var', 'leaf', C_TYPE='const struct variable *',
+ ATOM='variable', MANGLE='Vs',
+ HUMAN_NAME='string_variable')
+ init_type('var', 'leaf', C_TYPE='const struct variable *',
+ ATOM='variable', MANGLE='V',
+ HUMAN_NAME='variable')
+
+ # Vectors.
+ init_type('vector', 'leaf', C_TYPE='const struct vector *',
+ ATOM='vector', MANGLE='v', HUMAN_NAME='vector')
+
+ # Fixed types.
+ init_type('expression', 'fixed', C_TYPE='struct expression *',
+ FIXED_VALUE='e')
+ init_type('case', 'fixed', C_TYPE='const struct ccase *',
+ FIXED_VALUE='c')
+ init_type('case_idx', 'fixed', C_TYPE='size_t',
+ FIXED_VALUE='case_idx')
+ init_type('dataset', 'fixed', C_TYPE='struct dataset *',
+ FIXED_VALUE='ds')
+
+ # One of these is emitted at the end of each expression as a sentinel
+ # that tells expr_evaluate() to return the value on the stack.
+ init_type('return_number', 'atom')
+ init_type('return_string', 'atom')
+
+ # Used only for debugging purposes.
+ init_type('operation', 'atom')
+
+def init_type(name, role, **rest):
+ """
+ init_type has 2 required arguments:
+
+ NAME: Type name.
+
+ 'name' is the type's name in operations.def.
+
+ `OP_$name' is the terminal's type in operations.h.
+
+ `expr_allocate_$name()' allocates a node of the given type.
+
+ ROLE: How the type may be used:
+
+ "any": Usable as operands and function arguments, and
+ function and operator results.
+
+ "leaf": Usable as operands and function arguments, but
+ not function arguments or results. (Thus, they appear
+ only in leaf nodes in the parse type.)
+
+ "fixed": Not allowed either as an operand or argument
+ type or a result type. Used only as auxiliary data.
+
+ "atom": Not allowed anywhere; just adds the name to
+ the list of atoms.
+
+ All types except those with "atom" as their role also require:
+
+ C_TYPE: The C type that represents this abstract type.
+
+ Types with "any" or "leaf" role require:
+
+ ATOM:
+
+ `$atom' is the `struct operation_data' member name.
+
+ get_$atom_name() obtains the corresponding data from a
+ node.
+
+ MANGLE: Short string for name mangling. Use identical strings
+ if two types should not be overloaded.
+
+ HUMAN_NAME: Name for a type when we describe it to the user.
+
+ Types with role "any" require:
+
+ STACK: Name of the local variable in expr_evaluate(), used for
+ maintaining the stack for this type.
+
+ MISSING_VALUE: Expression used for the missing value of this
+ type.
+
+ Types with role "fixed" require:
+
+ FIXED_VALUE: Expression used for the value of this type.
+"""
+ global types
+ new_type = { 'NAME': name, 'ROLE': role } | rest
+
+ need_keys = ['NAME', 'ROLE']
+ if role == 'any':
+ need_keys += ['C_TYPE', 'ATOM', 'MANGLE', 'HUMAN_NAME', 'STACK', 'MISSING_VALUE']
+ elif role == 'leaf':
+ need_keys += ['C_TYPE', 'ATOM', 'MANGLE', 'HUMAN_NAME']
+ elif role == 'fixed':
+ need_keys += ['C_TYPE', 'FIXED_VALUE']
+ elif role == 'atom':
+ pass
+ else:
+ sys.stderr.write("no role '%s'\n" % role)
+ sys.exit(1)
+
+ for key in new_type.keys():
+ if not key in new_type:
+ sys.stderr.write("%s lacks %s\n" % (name, key))
+ sys.exit(1)
+ for key in need_keys:
+ if not key in need_keys:
+ sys.stderr.write("%s has superfluous key %s\n" % (name, key))
+ sys.exit(1)
+
+ types[name] = new_type
+
+# c_type(type).
+#
+def c_type(type_):
+ """Returns the C type of the given type as a string designed to be
+ prepended to a variable name to produce a declaration. (That
+ won't work in general but it works well enough for our types.)
+ """
+ c_type = type_["C_TYPE"]
+ if not c_type.endswith('*'):
+ c_type += ' '
+ return c_type
+\f
+# Input parsing.
+
+def parse_input():
+ """Parses the entire input.
+
+ Initializes ops, funcs, opers."""
+
+ global token
+ global toktype
+ global line_number
+ token = None
+ toktype = None
+ line_number = 0
+ get_line()
+ get_token()
+
+ global ops
+ global funcs
+ global opers
+ global order
+ ops = {}
+ funcs = []
+ opers = []
+
+ while toktype != 'eof':
+ op = {
+ 'OPTIMIZABLE': True,
+ 'UNIMPLEMENTED': False,
+ 'EXTENSION': False,
+ 'PERM_ONLY': False,
+ 'ABSORB_MISS': False,
+ 'NO_ABBREV': False,
+ }
+ while True:
+ if match('extension'):
+ op['EXTENSION'] = True
+ elif match('no_opt'):
+ op['OPTIMIZABLE'] = False
+ elif match('absorb_miss'):
+ op['ABSORB_MISS'] = True
+ elif match('perm_only'):
+ op['PERM_ONLY'] = True
+ elif match('no_abbrev'):
+ op['NO_ABBREV'] = True
+ else:
+ break
+
+ return_type = parse_type()
+ if return_type is None:
+ return_type = types['number']
+ if return_type['NAME'] not in ['number', 'string', 'boolean']:
+ sys.stderr.write('%s is not a valid return type\n' % return_type['NAME'])
+ sys.exit(1)
+ op['RETURNS'] = return_type
+
+ op['CATEGORY'] = token
+ if op['CATEGORY'] not in ['operator', 'function']:
+ sys.stderr.write("'operator' or 'function' expected at '%s'" % token)
+ sys.exit(1)
+ get_token()
+
+ name = force('id')
+ if op['CATEGORY'] == 'function' and '_' in name:
+ sys.stderr.write("function name '%s' may not contain underscore\n" % name)
+ sys.exit(1)
+ elif op['CATEGORY'] == 'operator' and '.' in name:
+ sys.stderr.write("operator name '%s' may not contain period\n" % name)
+ sys.exit(1)
+
+ m = re.match(r'(.*)\.(\d+)$', name)
+ if m:
+ prefix, suffix = m.groups()
+ name = prefix
+ op['MIN_VALID'] = int(suffix)
+ op['ABSORB_MISS'] = True
+ else:
+ op['MIN_VALID'] = 0
+ op['NAME'] = name
+
+ force_match('(')
+ op['ARGS'] = []
+ while not match(')'):
+ arg = parse_arg()
+ op['ARGS'] += [arg]
+ if 'IDX' in arg:
+ if match(')'):
+ break
+ sys.stderr.write('array must be last argument\n')
+ sys.exit(1)
+ if not match(','):
+ force_match(')')
+ break
+
+ for arg in op['ARGS']:
+ if 'CONDITION' in arg:
+ any_arg = '|'.join([a['NAME'] for a in op['ARGS']])
+ arg['CONDITION'] = re.sub(r'\b(%s)\b' % any_arg, r'arg_\1',
+ arg['CONDITION'])
+
+ opname = 'OP_' + op['NAME']
+ opname = opname.replace('.', '_')
+ if op['CATEGORY'] == 'function':
+ print(op)
+ mangle = ''.join([a['TYPE']['MANGLE'] for a in op['ARGS']])
+ op['MANGLE'] = mangle
+ opname += '_' + mangle
+ op['OPNAME'] = opname
+
+ if op['MIN_VALID'] > 0:
+ aa = array_arg(op)
+ if aa is None:
+ sys.stderr.write("can't have minimum valid count without array arg\n")
+ sys.exit(1)
+ if aa['TYPE']['NAME'] != 'number':
+ sys.stderr.write('minimum valid count allowed only with double array\n')
+ sys.exit(1)
+ if aa['TIMES'] != 1:
+ sys.stderr.write("can't have minimu valid count if array has multiplication factor\n")
+ sys.exit(1)
+
+ op['AUX'] = []
+ while toktype == 'id':
+ type_ = parse_type()
+ if type_ is None:
+ sys.stderr.write('parse error\n')
+ sys.exit(1)
+ if type_['ROLE'] not in ['leaf', 'fixed']:
+ sys.stderr.write("'%s' is not allowed as auxiliary data\n"
+ % type_['NAME'])
+ sys.exit(1)
+ name = force('id')
+ op['AUX'] += [{'TYPE': type_, 'NAME': name}]
+ force_match(';')
+
+ if op['OPTIMIZABLE']:
+ if op['NAME'].startswith('RV.'):
+ sys.stderr.write("random variate functions must be marked 'no_opt'\n")
+ sys.exit(1)
+ for key in ['CASE', 'CASE_IDX']:
+ if key in op['AUX']:
+ sys.stderr.write("operators with %s aux data must be marked 'no_opt'\n" % key)
+ sys.exit(1)
+
+ if op['RETURNS']['NAME'] == 'string' and not op['ABSORB_MISS']:
+ for arg in op['ARGS']:
+ if arg['TYPE']['NAME'] in ['number', 'boolean']:
+ sys.stderr.write("'%s' returns string and has double or bool "
+ "argument, but is not marked ABSORB_MISS\n"
+ % op['NAME'])
+ sys.exit(1)
+ if 'CONDITION' in arg:
+ sys.stderr.write("'%s' returns string but has argument with condition\n")
+ sys.exit(1)
+
+ if toktype == 'block':
+ op['BLOCK'] = force('block')
+ elif toktype == 'expression':
+ if token == 'unimplemented':
+ op['UNIMPLEMENTED'] = True
+ else:
+ op['EXPRESSION'] = token
+ get_token()
+ else:
+ sys.stderr.write("block or expression expected\n")
+ sys.exit(1)
+
+ if opname in ops:
+ sys.stderr.write("duplicate operation name %s\n" % opname)
+ sys.exit(1)
+ ops[opname] = op
+ if op['CATEGORY'] == 'function':
+ funcs += [opname]
+ else:
+ opers += [opname]
+ in_file.close()
+
+ funcs = sorted(funcs, key=lambda name: (ops[name]['NAME'], ops[name]['OPNAME']))
+ opers = sorted(opers, key=lambda name: ops[name]['NAME'])
+ order = funcs + opers
+
+def get_token():
+ """Reads the next token into 'token' and 'toktype'."""
+
+ global line
+ global token
+ global toktype
+
+ lookahead()
+ if toktype == 'eof':
+ return
+
+ print('%s %s' % (line, toktype))
+ m = re.match(r'([a-zA-Z_][a-zA-Z_.0-9]*)(.*)$', line)
+ if m:
+ token, line = m.groups()
+ toktype = 'id'
+ return
+
+ m = re.match(r'([0-9]+)(.*)$', line)
+ if m:
+ token, line = m.groups()
+ token = int(token)
+ toktype = 'int'
+ return
+
+ m = re.match(r'([][(),*;.])(.*)$', line)
+ if m:
+ token, line = m.groups()
+ toktype = 'punct'
+ return
+
+ m = re.match(r'=\s*(.*)$', line)
+ if m:
+ toktype = 'expression'
+ line = m.group(1)
+ token = accumulate_balanced(';')
+ return
+
+ m = re.match(r'{(.*)$', line)
+ if m:
+ toktype = 'block'
+ line = m.group(1)
+ token = accumulate_balanced('}')
+ token = token.rstrip('\n')
+ return
+
+ sys.stderr.write("bad character '%s' in input\n" % line[0])
+ sys.exit(1)
+
+def lookahead():
+ """Skip whitespace."""
+ global line
+ if line is None:
+ sys.stderr.write("unexpected end of file\n")
+ sys.exit(1)
+
+ while True:
+ line = line.lstrip()
+ if line != "":
+ break
+ get_line()
+ if line is None:
+ global token
+ global toktype
+ token = 'eof'
+ toktype = 'eof'
+ return
+
+def accumulate_balanced(end, swallow_end=True):
+ """Accumulates input until a character in 'end' is encountered,
+ except that balanced pairs of (), [], or {} cause 'end' to be
+ ignored. Returns the input read.
+ """
+ s = ""
+ nest = 0
+ global line
+ print("line='%s'" % line)
+ while True:
+ print(type(line))
+ for idx, c in enumerate(line):
+ print('nest=%s %s end=%s' % (nest, c, end))
+ if c in end and nest == 0:
+ line = line[idx:]
+ if swallow_end:
+ line = line[1:]
+ s = s.strip('\r\n')
+ return s
+ elif c in "[({":
+ nest += 1
+ elif c in "])}":
+ if nest > 0:
+ nest -= 1
+ else:
+ sys.stderr.write('unbalanced parentheses\n')
+ sys.exit(1)
+ s += c
+ s += '\n'
+ get_line()
+
+def get_line():
+ """Reads the next line from INPUT into 'line'."""
+ global line
+ global line_number
+ line = in_file.readline()
+ line_number += 1
+ print("%s\n" % line_number)
+ if line == '':
+ line = None
+ else:
+ line = line.rstrip('\r\n')
+ comment_ofs = line.find('//')
+ if comment_ofs >= 0:
+ line = line[:comment_ofs]
+
+def parse_type():
+ """If the current token is an identifier that names a type, returns
+ the type and skips to the next token. Otherwise, returns
+ undef.
+ """
+ if toktype == 'id':
+ for type_ in types.values():
+ if type_.get("NAME") == token:
+ get_token()
+ return type_
+ return None
+
+def force(type_):
+ """Makes sure that 'toktype' equals 'type', reads the next token, and
+ returns the previous 'token'.
+
+ """
+ if type_ != toktype:
+ sys.stderr.write("parse error at `%s' expecting %s\n" % (token, type_))
+ sys.exit(1)
+ tok = token
+ get_token()
+ return tok
+
+def match(tok):
+ """If 'token' equals 'tok', reads the next token and returns true.
+ Otherwise, returns false."""
+ if token == tok:
+ get_token()
+ return True
+ else:
+ return False
+
+def force_match(tok):
+ """If 'token' equals 'tok', reads the next token. Otherwise, flags an
+ error in the input.
+ """
+ if not match(tok):
+ sys.stderr.write("parse error at `%s' expecting `%s'\n" % (token, tok))
+ sys.exit(1)
+
+def parse_arg():
+ """Parses and returns a function argument."""
+ arg = {}
+ arg['TYPE'] = parse_type()
+ if arg['TYPE'] is None:
+ arg['TYPE'] = types['number']
+
+ if toktype != 'id':
+ sys.stderr.write("argument name expected at `%s'\n" % token)
+ sys.exit(1)
+ arg['NAME'] = token
+
+ lookahead()
+ global line
+ print("line[0]=%s" % line[0])
+ if line[0] in "[,)":
+ get_token()
+ print('token=%s toktype=%s' % (token, toktype))
+ if match('['):
+ if arg['TYPE']['NAME'] not in ('number', 'string'):
+ sys.stderr.write('only double and string arrays supported\n')
+ sys.exit(1)
+ arg['IDX'] = force('id')
+ if match('*'):
+ arg['TIMES'] = force('int')
+ if arg['TIMES'] != 2:
+ sys.stderr.write('multiplication factor must be two\n')
+ sys.exit(1)
+ else:
+ arg['TIMES'] = 1
+ force_match(']')
+ else:
+ arg['CONDITION'] = arg['NAME'] + ' ' + accumulate_balanced(',)', swallow_end=False)
+ get_token()
+ return arg
+
+def print_header():
+ """Prints the output file header."""
+ out_file.write("""\
+/* %s
+ Generated from %s by generate.py.
+ Do not modify! */
+
+""" % (out_file_name, in_file_name))
+
+def print_trailer():
+ """Prints the output file trailer."""
+ out_file.write("""\
+
+/*
+ Local Variables:
+ mode: c
+ buffer-read-only: t
+ End:
+*/
+""")
+
+def generate_evaluate_h():
+ out_file.write("#include \"helpers.h\"\n\n")
+
+ for opname in order:
+ op = ops[opname]
+ if op['UNIMPLEMENTED']:
+ continue
+
+ args = []
+ for arg in op['ARGS']:
+ if 'IDX' not in arg:
+ args += [c_type(arg['TYPE']) + arg['NAME']]
+ else:
+ args += [c_type(arg['TYPE']) + arg['NAME'] + '[]']
+ args += ['size_t %s' % arg['IDX']]
+ for aux in op['AUX']:
+ args += [c_type(aux['TYPE']) + aux['NAME']]
+ if not args:
+ args += ['void']
+
+ if 'BLOCK' in op:
+ statements = op['BLOCK'] + '\n'
+ else:
+ statements = " return %s;\n" % op['EXPRESSION']
+
+ out_file.write("static inline %s\n" % c_type (op['RETURNS']))
+ out_file.write("eval_%s (%s)\n" % (opname, ', '.join(args)))
+ out_file.write("{\n")
+ out_file.write(statements)
+ out_file.write("}\n\n")
+
+def generate_evaluate_inc():
+ for opname in order:
+ op = ops[opname]
+ if op['UNIMPLEMENTED']:
+ out_file.write("case %s:\n" % opname)
+ out_file.write(" NOT_REACHED ();\n\n")
+ continue
+
+ decls = []
+ args = []
+ for arg in op['ARGS']:
+ name = arg['NAME']
+ type_ = arg['TYPE']
+ ctype = c_type(type_)
+ args += ['arg_%s' % name]
+ if 'IDX' not in arg:
+ decl = '%sarg_%s' % (ctype, name)
+ if type_['ROLE'] == 'any':
+ decls = ['%s = *--%s' % (decl, type_['STACK'])] + decls
+ elif type_['ROLE'] == 'leaf':
+ decls += ['%s = op++->%s' % (decl, type_['ATOM'])]
+ else:
+ assert False
+ else:
+ idx = arg['IDX']
+ stack = type_['STACK']
+ decls = ['%s*arg_%s = %s -= arg_%s' % (ctype, name, stack, idx)] + decls
+ decls = ['size_t arg_%s = op++->integer' % idx] + decls
+
+ idx = 'arg_%s' % idx
+ if arg['TIMES'] != 1:
+ idx += ' / %s' % arg['TIMES']
+ args += [idx]
+ for aux in op['AUX']:
+ type_ = aux['TYPE']
+ name = aux['NAME']
+ if type_['ROLE'] == 'leaf':
+ ctype = c_type(type_)
+ decls += ['%saux_%s = op++->%s' % (ctype, name, type_['ATOM'])]
+ args += ['aux_%s' % name]
+ elif type_['ROLE'] == 'fixed':
+ args += [type_['FIXED_VALUE']]
+
+ sysmis_cond = make_sysmis_decl(op, 'op++->integer')
+ if sysmis_cond is not None:
+ decls += [sysmis_cond]
+
+ print(args)
+ result = 'eval_%s (%s)' % (op['OPNAME'], ', '.join(args))
+
+ stack = op['RETURNS']['STACK']
+
+ out_file.write("case %s:\n" % opname)
+ if decls:
+ out_file.write(" {\n")
+ for decl in decls:
+ out_file.write(" %s;\n" % decl)
+ if sysmis_cond is not None:
+ miss_ret = op['RETURNS']['MISSING_VALUE']
+ out_file.write(" *%s++ = force_sysmis ? %s : %s;\n" % (stack, miss_ret, result))
+ else:
+ out_file.write(" *%s++ = %s;\n" % (stack, result))
+ out_file.write(" }\n")
+ else:
+ out_file.write(" *%s++ = %s;\n" % (stack, result))
+ out_file.write(" break;\n\n")
+
+def generate_operations_h():
+ out_file.write("#include <stdlib.h>\n")
+ out_file.write("#include <stdbool.h>\n\n")
+
+ out_file.write("typedef enum")
+ out_file.write(" {\n")
+ atoms = []
+ for type_ in types.values():
+ if type_['ROLE'] != 'fixed':
+ atoms += ["OP_%s" % type_['NAME']]
+
+ print_operations('atom', 1, atoms)
+ print_operations('function', "OP_atom_last + 1", funcs)
+ print_operations('operator', "OP_function_last + 1", opers)
+ print_range("OP_composite", "OP_function_first", "OP_operator_last")
+ out_file.write(",\n\n")
+ print_range("OP", "OP_atom_first", "OP_composite_last")
+ out_file.write("\n }\n")
+ out_file.write("operation_type, atom_type;\n")
+
+ print_predicate('is_operation', 'OP')
+ for key in ('atom', 'composite', 'function', 'operator'):
+ print_predicate("is_%s" % key, "OP_%s" % key)
+
+def print_operations(type_, first, names):
+ out_file.write(" /* %s types. */\n" % type_.title())
+ out_file.write(" %s = %s,\n" % (names[0], first))
+ for name in names[1:]:
+ out_file.write(" %s,\n" % name)
+ print_range("OP_%s" % type_, names[0], names[len(names) - 1])
+ out_file.write(",\n\n")
+
+def print_range(prefix, first, last):
+ out_file.write(" %s_first = %s,\n" % (prefix, first))
+ out_file.write(" %s_last = %s,\n" % (prefix, last))
+ out_file.write(" n_%s = %s_last - %s_first + 1" % (prefix, prefix, prefix))
+
+def print_predicate(function, category):
+ assertion = ""
+
+ out_file.write("\nstatic inline bool\n")
+ out_file.write("%s (operation_type op)\n" % function)
+ out_file.write("{\n")
+ if function != 'is_operation':
+ out_file.write(" assert (is_operation (op));\n")
+ out_file.write(" return op >= %s_first && op <= %s_last;\n" % (category, category))
+ out_file.write("}\n")
+
+def generate_optimize_inc():
+ for opname in order:
+ op = ops[opname]
+
+ if not op['OPTIMIZABLE'] or op['UNIMPLEMENTED']:
+ out_file.write("case %s:\n" % opname)
+ out_file.write(" NOT_REACHED ();\n\n")
+ continue
+
+ decls = []
+ arg_idx = 0
+ for arg in op['ARGS']:
+ name = arg['NAME']
+ type_ = arg['TYPE']
+ ctype = c_type(type_)
+ if not 'IDX' in arg:
+ func = "get_%s_arg" % type_['ATOM']
+ decls += ["%sarg_%s = %s (node, %s)" % (ctype, name, func, arg_idx)]
+ else:
+ idx = arg['IDX']
+ decl = "size_t arg_%s = node->n_args" % idx
+ if arg_idx > 0:
+ decl += " - %s" % arg_idx
+ decls += [decl]
+
+ decls += ["%s*arg_%s = get_%s_args (node, %s, arg_%s, e)" % (ctype, name, type_['ATOM'], arg_idx, idx)]
+ arg_idx += 1
+
+ sysmis_cond = make_sysmis_decl (op, "node->min_valid")
+ if sysmis_cond is not None:
+ decls += [sysmis_cond]
+
+ args = []
+ for arg in op['ARGS']:
+ args += ["arg_%s" % arg['NAME']]
+ if 'IDX' in arg:
+ idx = 'arg_%s' % arg['IDX']
+ if arg['TIMES'] != 1:
+ idx += " / %s" % arg['TIMES']
+ args += [idx]
+
+ for aux in op['AUX']:
+ type_ = aux['TYPE']
+ if type_['ROLE'] == 'leaf':
+ func = "get_%s_arg" % type_['ATOM']
+ args += "%s (node, %s)" % (func, arg_idx)
+ arg_idx += 1
+ elif type_['ROLE'] == 'fixed':
+ args += [type_['FIXED_VALUE']]
+ else:
+ assert False
+
+ result = "eval_%s (%s)" % (op['OPNAME'], ', '.join(args))
+ if decls and sysmis_cond is not None:
+ miss_ret = op['RETURNS']['MISSING_VALUE']
+ decls += ['%sresult = force_sysmis ? %s : %s' % (c_type(op['RETURNS']), miss_ret, result)]
+ result = 'result'
+
+ out_file.write("case %s:\n" % opname)
+ alloc_func = "expr_allocate_%s" % op['RETURNS']['NAME']
+ if decls:
+ out_file.write(" {\n")
+ for decl in decls:
+ out_file.write(" %s;\n" % decl)
+ out_file.write(" return %s (e, %s);\n" % (alloc_func, result))
+ out_file.write(" }\n")
+ else:
+ out_file.write(" return %s (e, %s);\n" % (alloc_func, result))
+ out_file.write("\n")
+
+def generate_parse_inc():
+ members = ['""', '""', '0', '0', '0', "{}", '0', '0']
+ out_file.write("{%s},\n" % ', '.join(members))
+
+ for type_ in types.values():
+ if type_['ROLE'] != 'fixed':
+ human_name = type_.get('HUMAN_NAME', type_['NAME'])
+ members = ('"%s"' % type_['NAME'], '"%s"' % human_name, '0', "OP_%s" % type_['NAME'], '0', "{}", '0', '0')
+ out_file.write("{%s},\n" % ', '.join(members))
+
+ for opname in order:
+ op = ops[opname]
+
+ members = []
+ members += ['"%s"' % op['NAME']]
+
+ if op['CATEGORY'] == 'function':
+ args = []
+ opt_args = []
+ for arg in op['ARGS']:
+ if 'IDX' not in arg:
+ args += [arg['TYPE']['HUMAN_NAME']]
+
+ array = array_arg(op)
+ if array is not None:
+ if op['MIN_VALID'] == 0:
+ array_args = []
+ for i in range(array['TIMES']):
+ array_args += [array['TYPE']['HUMAN_NAME']]
+ args += array_args
+ opt_args = array_args
+ else:
+ for i in range(op['MIN_VALID']):
+ args += [array['TYPE']['HUMAN_NAME']]
+ opt_args += [array['TYPE']['HUMAN_NAME']]
+ human = "%s(%s" % (op['NAME'], ', '.join(args))
+ if opt_args:
+ human += '[, %s]...' % ', '.join(opt_args)
+ human += ')'
+ members += ['"%s"' % human]
+ else:
+ members += ['NULL']
+
+ flags = []
+ if op['ABSORB_MISS']:
+ flags += ['OPF_ABSORB_MISS']
+ if array_arg(op):
+ flags += ['OPF_ARRAY_OPERAND']
+ if op['MIN_VALID'] > 0:
+ flags += ['OPF_MIN_VALID']
+ if not op['OPTIMIZABLE']:
+ flags += ['OPF_NONOPTIMIZABLE']
+ if op['EXTENSION']:
+ flags += ['OPF_EXTENSION']
+ if op['UNIMPLEMENTED']:
+ flags += ['OPF_UNIMPLEMENTED']
+ if op['PERM_ONLY']:
+ flags += ['OPF_PERM_ONLY']
+ if op['NO_ABBREV']:
+ flags += ['OPF_NO_ABBREV']
+ members += [' | '.join(flags) if flags else '0']
+
+ members += ['OP_%s' % op['RETURNS']['NAME']]
+
+ members += ['%s' % len(op['ARGS'])]
+
+ arg_types = ["OP_%s" % arg['TYPE']['NAME'] for arg in op['ARGS']]
+ members += ['{%s}' % ', '.join(arg_types)]
+
+ members += ['%s' % op['MIN_VALID']]
+
+ members += ['%s' % (array_arg(op)['TIMES'] if array_arg(op) else 0)]
+
+ out_file.write('{%s},\n' % ', '.join(members))
+\f
+# Utilities.
+
+def make_sysmis_decl(op, min_valid_src):
+ """Returns a declaration for a boolean variable called `force_sysmis',
+ which will be true when operation 'op' should be system-missing.
+ Returns None if there are no such circumstances.
+
+ If 'op' has a minimum number of valid arguments, 'min_valid_src'
+ should be an an expression that evaluates to the minimum number of
+ valid arguments for 'op'.
+
+ """
+ sysmis_cond = []
+ if not op['ABSORB_MISS']:
+ for arg in op['ARGS']:
+ arg_name = 'arg_%s' % arg['NAME']
+ if 'IDX' not in arg:
+ if arg['TYPE']['NAME'] in ['number', 'boolean']:
+ sysmis_cond += ["!is_valid (%s)" % arg_name]
+ elif arg['TYPE']['NAME'] == 'number':
+ a = arg_name
+ n = 'arg_%s' % arg['IDX']
+ sysmis_cond += ['count_valid (%s, %s) < %s' % (a, n, n)]
+ elif op['MIN_VALID'] > 0:
+ args = op['ARGS']
+ arg = args[-1]
+ a = 'arg_%s' % arg['NAME']
+ n = 'arg_%s' % arg['IDX']
+ sysmis_cond += ["count_valid (%s, %s) < %s" % (a, n, min_valid_src)]
+ for arg in op['ARGS']:
+ if 'CONDITION' in arg:
+ sysmis_cond += ['!(%s)' % arg['CONDITION']]
+ if sysmis_cond:
+ return 'bool force_sysmis = %s' % ' || '.join(sysmis_cond)
+ return None
+
+def array_arg(op):
+ """If 'op' has an array argument, returns it. Otherwise, returns
+ None."""
+ args = op['ARGS']
+ if not args:
+ return None
+ last_arg = args[-1]
+ if 'IDX' in last_arg:
+ return last_arg
+ return None
+
+def usage():
+ print("""\
+%(argv0)s, for generating expression parsers and evaluators from definitions
+usage: generate.pl -o OUTPUT [-i INPUT] [-h]
+ -i INPUT input file containing definitions (default: operations.def)
+ -o OUTPUT output file
+ -h display this help message
+""" % {'argv0': argv0})
+ sys.exit(0)
+
+if __name__ == "__main__":
+ try:
+ options, args = getopt.gnu_getopt(sys.argv[1:], 'hi:o:',
+ ['input=s',
+ 'output=s',
+ 'help'])
+ except getopt.GetoptError as geo:
+ sys.stderr.write("%s: %s\n" % (argv0, geo.msg))
+ sys.exit(1)
+
+ in_file_name = 'operations.def'
+ out_file_name = None
+ for key, value in options:
+ if key in ['-h', '--help']:
+ usage()
+ elif key in ['-i', '--input']:
+ in_file_name = value
+ elif key in ['-o', '--output']:
+ out_file_name = value
+ else:
+ sys.exit(0)
+
+ if out_file_name is None:
+ sys.stderr.write("%(argv0)s: output file must be specified "
+ "(use --help for help)\n" % {'argv0': argv0})
+ sys.exit(1)
+
+ in_file = open(in_file_name, 'r')
+ out_file = open(out_file_name, 'w')
+
+ init_all_types()
+ parse_input()
+
+ print_header()
+ if out_file_name.endswith('evaluate.h'):
+ generate_evaluate_h()
+ elif out_file_name.endswith('evaluate.inc'):
+ generate_evaluate_inc()
+ elif out_file_name.endswith('operations.h'):
+ generate_operations_h()
+ elif out_file_name.endswith('optimize.inc'):
+ generate_optimize_inc()
+ elif out_file_name.endswith('parse.inc'):
+ generate_parse_inc()
+ else:
+ sys.stderr.write("%(argv0)s: unknown output type\n")
+ sys.exit(1)
+ print_trailer()