2 # PSPP - a program for statistical analysis.
3 # Copyright (C) 2017, 2021 Free Software Foundation, Inc.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
26 Defines all our types.
28 Initializes 'types' global.
34 # Common user-visible types used throughout evaluation trees.
35 init_type(Type.new_any('number', 'double', 'number', 'n', 'number', 'ns', 'SYSMIS'))
36 init_type(Type.new_any('string', 'struct substring', 'string', 's', 'string', 'ss', 'empty_string'))
37 init_type(Type.new_any('boolean', 'double', 'number', 'n', 'boolean', 'ns', 'SYSMIS'))
40 init_type(Type.new_atom('format'))
41 init_type(Type.new_leaf('ni_format', 'const struct fmt_spec *', 'format', 'f', 'num_input_format'))
42 init_type(Type.new_leaf('no_format', 'const struct fmt_spec *', 'format', 'f', 'num_output_format'))
45 init_type(Type.new_leaf('integer', 'int', 'integer', 'n', 'integer'))
46 init_type(Type.new_leaf('pos_int', 'int', 'integer', 'n', 'positive_integer_constant'))
49 init_type(Type.new_atom('variable'))
50 init_type(Type.new_leaf('num_var', 'const struct variable *', 'variable', 'Vn', 'num_variable'))
51 init_type(Type.new_leaf('str_var', 'const struct variable *', 'variable', 'Vs', 'string_variable'))
52 init_type(Type.new_leaf('var', 'const struct variable *', 'variable', 'V', 'variable'))
55 init_type(Type.new_leaf('vector', 'const struct vector *', 'vector', 'v', 'vector'))
58 init_type(Type.new_fixed('expression', 'struct expression *', 'e'))
59 init_type(Type.new_fixed('case', 'const struct ccase *', 'c'))
60 init_type(Type.new_fixed('case_idx', 'size_t', 'case_idx'))
61 init_type(Type.new_fixed('dataset', 'struct dataset *', 'ds'))
63 # One of these is emitted at the end of each expression as a sentinel
64 # that tells expr_evaluate() to return the value on the stack.
65 init_type(Type.new_atom('return_number'))
66 init_type(Type.new_atom('return_string'))
68 # Used only for debugging purposes.
69 init_type(Type.new_atom('operation'))
72 init_type has 2 required arguments:
76 'name' is the type's name in operations.def.
78 `OP_$name' is the terminal's type in operations.h.
80 `expr_allocate_$name()' allocates a node of the given type.
82 ROLE: How the type may be used:
84 "any": Usable as operands and function arguments, and
85 function and operator results.
87 "leaf": Usable as operands and function arguments, but not
88 results. (Thus, they appear only in leaf nodes in the parse
91 "fixed": Not allowed either as an operand or argument
92 type or a result type. Used only as auxiliary data.
94 "atom": Not allowed anywhere; just adds the name to
97 All types except those with "atom" as their role also require:
99 C_TYPE: The C type that represents this abstract type.
101 Types with "any" or "leaf" role require:
105 `$atom' is the `struct operation_data' member name.
107 get_$atom_name() obtains the corresponding data from a
110 MANGLE: Short string for name mangling. Use identical strings
111 if two types should not be overloaded.
113 HUMAN_NAME: Name for a type when we describe it to the user.
115 Types with role "any" require:
117 STACK: Name of the local variable in expr_evaluate(), used for
118 maintaining the stack for this type.
120 MISSING_VALUE: Expression used for the missing value of this
123 Types with role "fixed" require:
125 FIXED_VALUE: Expression used for the value of this type.
128 def __init__(self, name, role, human_name):
131 self.human_name = human_name
134 return Type(name, 'atom', name)
136 def new_any(name, c_type, atom, mangle, human_name, stack, missing_value):
137 new = Type(name, 'any', human_name)
142 new.missing_value = missing_value
145 def new_leaf(name, c_type, atom, mangle, human_name):
146 new = Type(name, 'leaf', human_name)
152 def new_fixed(name, c_type, fixed_value):
153 new = Type(name, 'fixed', name)
155 new.fixed_value = fixed_value
158 def init_type(type_):
160 types[type_.name] = type_
165 """Returns the C type of the given type as a string designed to be
166 prepended to a variable name to produce a declaration. (That
167 won't work in general but it works well enough for our types.)
169 c_type = type_.c_type
170 if not c_type.endswith('*'):
182 optimizable, unimplemented, extension, perm_only, absorb_miss, no_abbrev,
185 self.category = category
186 self.returns = returns
189 self.expression = expression
191 self.min_valid = min_valid
192 self.optimizable = optimizable
193 self.unimplemented = unimplemented
194 self.extension = extension
195 self.perm_only = perm_only
196 self.absorb_miss = absorb_miss
197 self.no_abbrev = no_abbrev
199 if mangle is not None:
203 """Parses the entire input.
205 Initializes ops, funcs, opers."""
224 while toktype != 'eof':
226 unimplemented = False
232 if match('extension'):
234 elif match('no_opt'):
236 elif match('absorb_miss'):
238 elif match('perm_only'):
240 elif match('no_abbrev'):
245 return_type = parse_type()
246 if return_type is None:
247 return_type = types['number']
248 if return_type.name not in ['number', 'string', 'boolean']:
249 sys.stderr.write('%s is not a valid return type\n' % return_type.name)
253 if category not in ['operator', 'function']:
254 sys.stderr.write("'operator' or 'function' expected at '%s'" % token)
259 if category == 'function' and '_' in name:
260 sys.stderr.write("function name '%s' may not contain underscore\n" % name)
262 elif category == 'operator' and '.' in name:
263 sys.stderr.write("operator name '%s' may not contain period\n" % name)
266 m = re.match(r'(.*)\.(\d+)$', name)
268 prefix, suffix = m.groups()
270 min_valid = int(suffix)
277 while not match(')'):
280 if arg.idx is not None:
283 sys.stderr.write('array must be last argument\n')
290 if arg.condition is not None:
291 any_arg = '|'.join([a.name for a in args])
292 arg.condition = re.sub(r'\b(%s)\b' % any_arg, r'arg_\1', arg.condition)
294 opname = 'OP_' + name
295 opname = opname.replace('.', '_')
296 if category == 'function':
297 mangle = ''.join([a.type_.mangle for a in args])
298 opname += '_' + mangle
303 while toktype == 'id':
306 sys.stderr.write('parse error\n')
308 if type_.role not in ['leaf', 'fixed']:
309 sys.stderr.write("'%s' is not allowed as auxiliary data\n" % type_.name)
311 aux_name = force('id')
312 aux += [{'TYPE': type_, 'NAME': aux_name}]
316 if name.startswith('RV.'):
317 sys.stderr.write("random variate functions must be marked 'no_opt'\n")
319 for key in ['CASE', 'CASE_IDX']:
321 sys.stderr.write("operators with %s aux data must be marked 'no_opt'\n" % key)
324 if return_type.name == 'string' and not absorb_miss:
326 if arg.type_.name in ['number', 'boolean']:
327 sys.stderr.write("'%s' returns string and has double or bool "
328 "argument, but is not marked ABSORB_MISS\n"
331 if arg.condition is not None:
332 sys.stderr.write("'%s' returns string but has argument with condition\n")
335 if toktype == 'block':
336 block = force('block')
338 elif toktype == 'expression':
339 if token == 'unimplemented':
346 sys.stderr.write("block or expression expected\n")
350 sys.stderr.write("duplicate operation name %s\n" % opname)
353 op = Op(name, category,
354 return_type, args, aux,
357 optimizable, unimplemented, extension, perm_only, absorb_miss,
364 sys.stderr.write("can't have minimum valid count without array arg\n")
366 if aa.type_.name != 'number':
367 sys.stderr.write('minimum valid count allowed only with double array\n')
370 sys.stderr.write("can't have minimu valid count if array has multiplication factor\n")
374 if category == 'function':
381 funcs = sorted(funcs, key=lambda name: (ops[name].name, ops[name].opname))
382 opers = sorted(opers, key=lambda name: ops[name].name)
383 order = funcs + opers
386 """Reads the next token into 'token' and 'toktype'."""
396 m = re.match(r'([a-zA-Z_][a-zA-Z_.0-9]*)(.*)$', line)
398 token, line = m.groups()
402 m = re.match(r'([0-9]+)(.*)$', line)
404 token, line = m.groups()
409 m = re.match(r'([][(),*;.])(.*)$', line)
411 token, line = m.groups()
415 m = re.match(r'=\s*(.*)$', line)
417 toktype = 'expression'
419 token = accumulate_balanced(';')
422 m = re.match(r'{(.*)$', line)
426 token = accumulate_balanced('}')
427 token = token.rstrip('\n')
430 sys.stderr.write("bad character '%s' in input\n" % line[0])
434 """Skip whitespace."""
437 sys.stderr.write("unexpected end of file\n")
452 def accumulate_balanced(end, swallow_end=True):
453 """Accumulates input until a character in 'end' is encountered,
454 except that balanced pairs of (), [], or {} cause 'end' to be
455 ignored. Returns the input read.
461 for idx, c in enumerate(line):
462 if c in end and nest == 0:
474 sys.stderr.write('unbalanced parentheses\n')
481 """Reads the next line from INPUT into 'line'."""
484 line = in_file.readline()
489 line = line.rstrip('\r\n')
490 comment_ofs = line.find('//')
492 line = line[:comment_ofs]
495 """If the current token is an identifier that names a type, returns
496 the type and skips to the next token. Otherwise, returns
500 for type_ in types.values():
501 if type_.name == token:
507 """Makes sure that 'toktype' equals 'type', reads the next token, and
508 returns the previous 'token'.
512 sys.stderr.write("parse error at `%s' expecting %s\n" % (token, type_))
519 """If 'token' equals 'tok', reads the next token and returns true.
520 Otherwise, returns false."""
527 def force_match(tok):
528 """If 'token' equals 'tok', reads the next token. Otherwise, flags an
532 sys.stderr.write("parse error at `%s' expecting `%s'\n" % (token, tok))
536 def __init__(self, name, type_, idx, times, condition):
541 self.condition = condition
544 """Parses and returns a function argument."""
547 type_ = types['number']
550 sys.stderr.write("argument name expected at `%s'\n" % token)
563 if type_.name not in ('number', 'string'):
564 sys.stderr.write('only double and string arrays supported\n')
570 sys.stderr.write('multiplication factor must be two\n')
575 condition = name + ' ' + accumulate_balanced(',)', swallow_end=False)
578 return Arg(name, type_, idx, times, condition)
581 """Prints the output file header."""
584 Generated from %s by generate.py.
587 """ % (out_file_name, in_file_name))
590 """Prints the output file trailer."""
601 def generate_evaluate_h():
602 out_file.write("#include \"helpers.h\"\n\n")
612 args += [c_type(arg.type_) + arg.name]
614 args += [c_type(arg.type_) + arg.name + '[]']
615 args += ['size_t %s' % arg.idx]
617 args += [c_type(aux['TYPE']) + aux['NAME']]
622 statements = op.block + '\n'
624 statements = " return %s;\n" % op.expression
626 out_file.write("static inline %s\n" % c_type (op.returns))
627 out_file.write("eval_%s (%s)\n" % (opname, ', '.join(args)))
628 out_file.write("{\n")
629 out_file.write(statements)
630 out_file.write("}\n\n")
632 def generate_evaluate_inc():
636 out_file.write("case %s:\n" % opname)
637 out_file.write(" NOT_REACHED ();\n\n")
644 ctype = c_type(type_)
645 args += ['arg_%s' % arg.name]
647 decl = '%sarg_%s' % (ctype, arg.name)
648 if type_.role == 'any':
649 decls = ['%s = *--%s' % (decl, type_.stack)] + decls
650 elif type_.role == 'leaf':
651 decls += ['%s = op++->%s' % (decl, type_.atom)]
656 decls = ['%s*arg_%s = %s -= arg_%s' % (ctype, arg.name, type_.stack, idx)] + decls
657 decls = ['size_t arg_%s = op++->integer' % idx] + decls
661 idx += ' / %s' % arg.times
666 if type_.role == 'leaf':
667 ctype = c_type(type_)
668 decls += ['%saux_%s = op++->%s' % (ctype, name, type_.atom)]
669 args += ['aux_%s' % name]
670 elif type_.role == 'fixed':
671 args += [type_.fixed_value]
673 sysmis_cond = make_sysmis_decl(op, 'op++->integer')
674 if sysmis_cond is not None:
675 decls += [sysmis_cond]
677 result = 'eval_%s (%s)' % (op.opname, ', '.join(args))
679 stack = op.returns.stack
681 out_file.write("case %s:\n" % opname)
683 out_file.write(" {\n")
685 out_file.write(" %s;\n" % decl)
686 if sysmis_cond is not None:
687 miss_ret = op.returns.missing_value
688 out_file.write(" *%s++ = force_sysmis ? %s : %s;\n" % (stack, miss_ret, result))
690 out_file.write(" *%s++ = %s;\n" % (stack, result))
691 out_file.write(" }\n")
693 out_file.write(" *%s++ = %s;\n" % (stack, result))
694 out_file.write(" break;\n\n")
696 def generate_operations_h():
697 out_file.write("#include <stdlib.h>\n")
698 out_file.write("#include <stdbool.h>\n\n")
700 out_file.write("typedef enum")
701 out_file.write(" {\n")
703 for type_ in types.values():
704 if type_.role != 'fixed':
705 atoms += ["OP_%s" % type_.name]
707 print_operations('atom', 1, atoms)
708 print_operations('function', "OP_atom_last + 1", funcs)
709 print_operations('operator', "OP_function_last + 1", opers)
710 print_range("OP_composite", "OP_function_first", "OP_operator_last")
711 out_file.write(",\n\n")
712 print_range("OP", "OP_atom_first", "OP_composite_last")
713 out_file.write("\n }\n")
714 out_file.write("operation_type, atom_type;\n")
716 print_predicate('is_operation', 'OP')
717 for key in ('atom', 'composite', 'function', 'operator'):
718 print_predicate("is_%s" % key, "OP_%s" % key)
720 def print_operations(type_, first, names):
721 out_file.write(" /* %s types. */\n" % type_.title())
722 out_file.write(" %s = %s,\n" % (names[0], first))
723 for name in names[1:]:
724 out_file.write(" %s,\n" % name)
725 print_range("OP_%s" % type_, names[0], names[len(names) - 1])
726 out_file.write(",\n\n")
728 def print_range(prefix, first, last):
729 out_file.write(" %s_first = %s,\n" % (prefix, first))
730 out_file.write(" %s_last = %s,\n" % (prefix, last))
731 out_file.write(" n_%s = %s_last - %s_first + 1" % (prefix, prefix, prefix))
733 def print_predicate(function, category):
736 out_file.write("\nstatic inline bool\n")
737 out_file.write("%s (operation_type op)\n" % function)
738 out_file.write("{\n")
739 if function != 'is_operation':
740 out_file.write(" assert (is_operation (op));\n")
741 out_file.write(" return op >= %s_first && op <= %s_last;\n" % (category, category))
742 out_file.write("}\n")
744 def generate_optimize_inc():
748 if not op.optimizable or op.unimplemented:
749 out_file.write("case %s:\n" % opname)
750 out_file.write(" NOT_REACHED ();\n\n")
758 ctype = c_type(type_)
760 func = "get_%s_arg" % type_.atom
761 decls += ["%sarg_%s = %s (node, %s)" % (ctype, name, func, arg_idx)]
763 decl = "size_t arg_%s = node->n_args" % arg.idx
765 decl += " - %s" % arg_idx
768 decls += ["%s*arg_%s = get_%s_args (node, %s, arg_%s, e)" % (ctype, name, type_.atom, arg_idx, arg.idx)]
771 sysmis_cond = make_sysmis_decl (op, "node->min_valid")
772 if sysmis_cond is not None:
773 decls += [sysmis_cond]
777 args += ["arg_%s" % arg.name]
778 if arg.idx is not None:
779 idx = 'arg_%s' % arg.idx
781 idx += " / %s" % arg.times
786 if type_.role == 'leaf':
787 func = "get_%s_arg" % type_.atom
788 args += "%s (node, %s)" % (func, arg_idx)
790 elif type_.role == 'fixed':
791 args += [type_.fixed_value]
795 result = "eval_%s (%s)" % (op.opname, ', '.join(args))
796 if decls and sysmis_cond is not None:
797 miss_ret = op.returns.missing_value
798 decls += ['%sresult = force_sysmis ? %s : %s' % (c_type(op.returns), miss_ret, result)]
801 out_file.write("case %s:\n" % opname)
802 alloc_func = "expr_allocate_%s" % op.returns.name
804 out_file.write(" {\n")
806 out_file.write(" %s;\n" % decl)
807 out_file.write(" return %s (e, %s);\n" % (alloc_func, result))
808 out_file.write(" }\n")
810 out_file.write(" return %s (e, %s);\n" % (alloc_func, result))
813 def generate_parse_inc():
814 members = ['""', '""', '0', '0', '0', "{}", '0', '0']
815 out_file.write("{%s},\n" % ', '.join(members))
817 for type_ in types.values():
818 if type_.role != 'fixed':
819 members = ('"%s"' % type_.name, '"%s"' % type_.human_name, '0', "OP_%s" % type_.name, '0', "{}", '0', '0')
820 out_file.write("{%s},\n" % ', '.join(members))
826 members += ['"%s"' % op.name]
828 if op.category == 'function':
833 args += [arg.type_.human_name]
835 array = array_arg(op)
836 if array is not None:
837 if op.min_valid == 0:
839 for i in range(array.times):
840 array_args += [array.type_.human_name]
842 opt_args = array_args
844 for i in range(op.min_valid):
845 args += [array.type_.human_name]
846 opt_args += [array.type_.human_name]
847 human = "%s(%s" % (op.name, ', '.join(args))
849 human += '[, %s]...' % ', '.join(opt_args)
851 members += ['"%s"' % human]
857 flags += ['OPF_ABSORB_MISS']
859 flags += ['OPF_ARRAY_OPERAND']
861 flags += ['OPF_MIN_VALID']
862 if not op.optimizable:
863 flags += ['OPF_NONOPTIMIZABLE']
865 flags += ['OPF_EXTENSION']
867 flags += ['OPF_UNIMPLEMENTED']
869 flags += ['OPF_PERM_ONLY']
871 flags += ['OPF_NO_ABBREV']
872 members += [' | '.join(flags) if flags else '0']
874 members += ['OP_%s' % op.returns.name]
876 members += ['%s' % len(op.args)]
878 arg_types = ["OP_%s" % arg.type_.name for arg in op.args]
879 members += ['{%s}' % ', '.join(arg_types)]
881 members += ['%s' % op.min_valid]
883 members += ['%s' % (array_arg(op).times if array_arg(op) else 0)]
885 out_file.write('{%s},\n' % ', '.join(members))
889 def make_sysmis_decl(op, min_valid_src):
890 """Returns a declaration for a boolean variable called `force_sysmis',
891 which will be true when operation 'op' should be system-missing.
892 Returns None if there are no such circumstances.
894 If 'op' has a minimum number of valid arguments, 'min_valid_src'
895 should be an an expression that evaluates to the minimum number of
896 valid arguments for 'op'.
900 if not op.absorb_miss:
902 arg_name = 'arg_%s' % arg.name
904 if arg.type_.name in ['number', 'boolean']:
905 sysmis_cond += ["!is_valid (%s)" % arg_name]
906 elif arg.type_.name == 'number':
908 n = 'arg_%s' % arg.idx
909 sysmis_cond += ['count_valid (%s, %s) < %s' % (a, n, n)]
910 elif op.min_valid > 0:
913 a = 'arg_%s' % arg.name
914 n = 'arg_%s' % arg.idx
915 sysmis_cond += ["count_valid (%s, %s) < %s" % (a, n, min_valid_src)]
917 if arg.condition is not None:
918 sysmis_cond += ['!(%s)' % arg.condition]
920 return 'bool force_sysmis = %s' % ' || '.join(sysmis_cond)
924 """If 'op' has an array argument, returns it. Otherwise, returns
930 if last_arg.idx is not None:
936 %(argv0)s, for generating expression parsers and evaluators from definitions
937 usage: generate.pl -o OUTPUT [-i INPUT] [-h]
938 -i INPUT input file containing definitions (default: operations.def)
939 -o OUTPUT output file
940 -h display this help message
941 """ % {'argv0': argv0})
944 if __name__ == "__main__":
946 options, args = getopt.gnu_getopt(sys.argv[1:], 'hi:o:',
950 except getopt.GetoptError as geo:
951 sys.stderr.write("%s: %s\n" % (argv0, geo.msg))
954 in_file_name = 'operations.def'
956 for key, value in options:
957 if key in ['-h', '--help']:
959 elif key in ['-i', '--input']:
961 elif key in ['-o', '--output']:
962 out_file_name = value
966 if out_file_name is None:
967 sys.stderr.write("%(argv0)s: output file must be specified "
968 "(use --help for help)\n" % {'argv0': argv0})
971 in_file = open(in_file_name, 'r')
972 out_file = open(out_file_name, 'w')
978 if out_file_name.endswith('evaluate.h'):
979 generate_evaluate_h()
980 elif out_file_name.endswith('evaluate.inc'):
981 generate_evaluate_inc()
982 elif out_file_name.endswith('operations.h'):
983 generate_operations_h()
984 elif out_file_name.endswith('optimize.inc'):
985 generate_optimize_inc()
986 elif out_file_name.endswith('parse.inc'):
989 sys.stderr.write("%(argv0)s: unknown output type\n")