2 # PSPP - a program for statistical analysis.
3 # Copyright (C) 2017, 2021 Free Software Foundation, Inc.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
26 Defines all our types.
28 Initializes 'types' global.
34 # Common user-visible types used throughout evaluation trees.
35 init_type('number', 'any', C_TYPE='double',
36 ATOM='number', MANGLE='n', HUMAN_NAME='number',
37 STACK='ns', MISSING_VALUE='SYSMIS')
38 init_type('string', 'any', C_TYPE='struct substring',
39 ATOM='string', MANGLE='s', HUMAN_NAME='string',
40 STACK='ss', MISSING_VALUE='empty_string')
41 init_type('boolean', 'any', C_TYPE='double',
42 ATOM='number', MANGLE='n', HUMAN_NAME='boolean',
43 STACK='ns', MISSING_VALUE='SYSMIS')
46 init_type('format', 'atom')
47 init_type('ni_format', 'leaf', C_TYPE='const struct fmt_spec *',
48 ATOM='format', MANGLE='f',
49 HUMAN_NAME='num_input_format')
50 init_type('no_format', 'leaf', C_TYPE='const struct fmt_spec *',
51 ATOM='format', MANGLE='f',
52 HUMAN_NAME='num_output_format')
55 init_type('integer', 'leaf', C_TYPE='int',
56 ATOM='integer', MANGLE='n', HUMAN_NAME='integer')
57 init_type('pos_int', 'leaf', C_TYPE='int',
58 ATOM='integer', MANGLE='n',
59 HUMAN_NAME='positive_integer_constant')
62 init_type('variable', 'atom')
63 init_type('num_var', 'leaf', C_TYPE='const struct variable *',
64 ATOM='variable', MANGLE='Vn',
65 HUMAN_NAME='num_variable')
66 init_type('str_var', 'leaf', C_TYPE='const struct variable *',
67 ATOM='variable', MANGLE='Vs',
68 HUMAN_NAME='string_variable')
69 init_type('var', 'leaf', C_TYPE='const struct variable *',
70 ATOM='variable', MANGLE='V',
71 HUMAN_NAME='variable')
74 init_type('vector', 'leaf', C_TYPE='const struct vector *',
75 ATOM='vector', MANGLE='v', HUMAN_NAME='vector')
78 init_type('expression', 'fixed', C_TYPE='struct expression *',
80 init_type('case', 'fixed', C_TYPE='const struct ccase *',
82 init_type('case_idx', 'fixed', C_TYPE='size_t',
83 FIXED_VALUE='case_idx')
84 init_type('dataset', 'fixed', C_TYPE='struct dataset *',
87 # One of these is emitted at the end of each expression as a sentinel
88 # that tells expr_evaluate() to return the value on the stack.
89 init_type('return_number', 'atom')
90 init_type('return_string', 'atom')
92 # Used only for debugging purposes.
93 init_type('operation', 'atom')
95 def init_type(name, role, **rest):
97 init_type has 2 required arguments:
101 'name' is the type's name in operations.def.
103 `OP_$name' is the terminal's type in operations.h.
105 `expr_allocate_$name()' allocates a node of the given type.
107 ROLE: How the type may be used:
109 "any": Usable as operands and function arguments, and
110 function and operator results.
112 "leaf": Usable as operands and function arguments, but
113 not function arguments or results. (Thus, they appear
114 only in leaf nodes in the parse type.)
116 "fixed": Not allowed either as an operand or argument
117 type or a result type. Used only as auxiliary data.
119 "atom": Not allowed anywhere; just adds the name to
122 All types except those with "atom" as their role also require:
124 C_TYPE: The C type that represents this abstract type.
126 Types with "any" or "leaf" role require:
130 `$atom' is the `struct operation_data' member name.
132 get_$atom_name() obtains the corresponding data from a
135 MANGLE: Short string for name mangling. Use identical strings
136 if two types should not be overloaded.
138 HUMAN_NAME: Name for a type when we describe it to the user.
140 Types with role "any" require:
142 STACK: Name of the local variable in expr_evaluate(), used for
143 maintaining the stack for this type.
145 MISSING_VALUE: Expression used for the missing value of this
148 Types with role "fixed" require:
150 FIXED_VALUE: Expression used for the value of this type.
153 new_type = { 'NAME': name, 'ROLE': role } | rest
155 need_keys = ['NAME', 'ROLE']
157 need_keys += ['C_TYPE', 'ATOM', 'MANGLE', 'HUMAN_NAME', 'STACK', 'MISSING_VALUE']
159 need_keys += ['C_TYPE', 'ATOM', 'MANGLE', 'HUMAN_NAME']
160 elif role == 'fixed':
161 need_keys += ['C_TYPE', 'FIXED_VALUE']
165 sys.stderr.write("no role '%s'\n" % role)
168 for key in new_type.keys():
169 if not key in new_type:
170 sys.stderr.write("%s lacks %s\n" % (name, key))
172 for key in need_keys:
173 if not key in need_keys:
174 sys.stderr.write("%s has superfluous key %s\n" % (name, key))
177 types[name] = new_type
182 """Returns the C type of the given type as a string designed to be
183 prepended to a variable name to produce a declaration. (That
184 won't work in general but it works well enough for our types.)
186 c_type = type_["C_TYPE"]
187 if not c_type.endswith('*'):
194 """Parses the entire input.
196 Initializes ops, funcs, opers."""
215 while toktype != 'eof':
218 'UNIMPLEMENTED': False,
221 'ABSORB_MISS': False,
225 if match('extension'):
226 op['EXTENSION'] = True
227 elif match('no_opt'):
228 op['OPTIMIZABLE'] = False
229 elif match('absorb_miss'):
230 op['ABSORB_MISS'] = True
231 elif match('perm_only'):
232 op['PERM_ONLY'] = True
233 elif match('no_abbrev'):
234 op['NO_ABBREV'] = True
238 return_type = parse_type()
239 if return_type is None:
240 return_type = types['number']
241 if return_type['NAME'] not in ['number', 'string', 'boolean']:
242 sys.stderr.write('%s is not a valid return type\n' % return_type['NAME'])
244 op['RETURNS'] = return_type
246 op['CATEGORY'] = token
247 if op['CATEGORY'] not in ['operator', 'function']:
248 sys.stderr.write("'operator' or 'function' expected at '%s'" % token)
253 if op['CATEGORY'] == 'function' and '_' in name:
254 sys.stderr.write("function name '%s' may not contain underscore\n" % name)
256 elif op['CATEGORY'] == 'operator' and '.' in name:
257 sys.stderr.write("operator name '%s' may not contain period\n" % name)
260 m = re.match(r'(.*)\.(\d+)$', name)
262 prefix, suffix = m.groups()
264 op['MIN_VALID'] = int(suffix)
265 op['ABSORB_MISS'] = True
272 while not match(')'):
275 if arg.idx is not None:
278 sys.stderr.write('array must be last argument\n')
284 for arg in op['ARGS']:
285 if arg.condition is not None:
286 any_arg = '|'.join([a.name for a in op['ARGS']])
287 arg.condition = re.sub(r'\b(%s)\b' % any_arg, r'arg_\1', arg.condition)
289 opname = 'OP_' + op['NAME']
290 opname = opname.replace('.', '_')
291 if op['CATEGORY'] == 'function':
292 mangle = ''.join([a.type_['MANGLE'] for a in op['ARGS']])
293 op['MANGLE'] = mangle
294 opname += '_' + mangle
295 op['OPNAME'] = opname
297 if op['MIN_VALID'] > 0:
300 sys.stderr.write("can't have minimum valid count without array arg\n")
302 if aa.type_['NAME'] != 'number':
303 sys.stderr.write('minimum valid count allowed only with double array\n')
306 sys.stderr.write("can't have minimu valid count if array has multiplication factor\n")
310 while toktype == 'id':
313 sys.stderr.write('parse error\n')
315 if type_['ROLE'] not in ['leaf', 'fixed']:
316 sys.stderr.write("'%s' is not allowed as auxiliary data\n"
320 op['AUX'] += [{'TYPE': type_, 'NAME': name}]
323 if op['OPTIMIZABLE']:
324 if op['NAME'].startswith('RV.'):
325 sys.stderr.write("random variate functions must be marked 'no_opt'\n")
327 for key in ['CASE', 'CASE_IDX']:
329 sys.stderr.write("operators with %s aux data must be marked 'no_opt'\n" % key)
332 if op['RETURNS']['NAME'] == 'string' and not op['ABSORB_MISS']:
333 for arg in op['ARGS']:
334 if arg.type_['NAME'] in ['number', 'boolean']:
335 sys.stderr.write("'%s' returns string and has double or bool "
336 "argument, but is not marked ABSORB_MISS\n"
339 if arg.condition is not None:
340 sys.stderr.write("'%s' returns string but has argument with condition\n")
343 if toktype == 'block':
344 op['BLOCK'] = force('block')
345 elif toktype == 'expression':
346 if token == 'unimplemented':
347 op['UNIMPLEMENTED'] = True
349 op['EXPRESSION'] = token
352 sys.stderr.write("block or expression expected\n")
356 sys.stderr.write("duplicate operation name %s\n" % opname)
359 if op['CATEGORY'] == 'function':
365 funcs = sorted(funcs, key=lambda name: (ops[name]['NAME'], ops[name]['OPNAME']))
366 opers = sorted(opers, key=lambda name: ops[name]['NAME'])
367 order = funcs + opers
370 """Reads the next token into 'token' and 'toktype'."""
380 m = re.match(r'([a-zA-Z_][a-zA-Z_.0-9]*)(.*)$', line)
382 token, line = m.groups()
386 m = re.match(r'([0-9]+)(.*)$', line)
388 token, line = m.groups()
393 m = re.match(r'([][(),*;.])(.*)$', line)
395 token, line = m.groups()
399 m = re.match(r'=\s*(.*)$', line)
401 toktype = 'expression'
403 token = accumulate_balanced(';')
406 m = re.match(r'{(.*)$', line)
410 token = accumulate_balanced('}')
411 token = token.rstrip('\n')
414 sys.stderr.write("bad character '%s' in input\n" % line[0])
418 """Skip whitespace."""
421 sys.stderr.write("unexpected end of file\n")
436 def accumulate_balanced(end, swallow_end=True):
437 """Accumulates input until a character in 'end' is encountered,
438 except that balanced pairs of (), [], or {} cause 'end' to be
439 ignored. Returns the input read.
445 for idx, c in enumerate(line):
446 if c in end and nest == 0:
458 sys.stderr.write('unbalanced parentheses\n')
465 """Reads the next line from INPUT into 'line'."""
468 line = in_file.readline()
473 line = line.rstrip('\r\n')
474 comment_ofs = line.find('//')
476 line = line[:comment_ofs]
479 """If the current token is an identifier that names a type, returns
480 the type and skips to the next token. Otherwise, returns
484 for type_ in types.values():
485 if type_.get("NAME") == token:
491 """Makes sure that 'toktype' equals 'type', reads the next token, and
492 returns the previous 'token'.
496 sys.stderr.write("parse error at `%s' expecting %s\n" % (token, type_))
503 """If 'token' equals 'tok', reads the next token and returns true.
504 Otherwise, returns false."""
511 def force_match(tok):
512 """If 'token' equals 'tok', reads the next token. Otherwise, flags an
516 sys.stderr.write("parse error at `%s' expecting `%s'\n" % (token, tok))
520 def __init__(self, name, type_, idx, times, condition):
525 self.condition = condition
528 """Parses and returns a function argument."""
531 type_ = types['number']
534 sys.stderr.write("argument name expected at `%s'\n" % token)
547 if type_['NAME'] not in ('number', 'string'):
548 sys.stderr.write('only double and string arrays supported\n')
554 sys.stderr.write('multiplication factor must be two\n')
559 condition = name + ' ' + accumulate_balanced(',)', swallow_end=False)
562 return Arg(name, type_, idx, times, condition)
565 """Prints the output file header."""
568 Generated from %s by generate.py.
571 """ % (out_file_name, in_file_name))
574 """Prints the output file trailer."""
585 def generate_evaluate_h():
586 out_file.write("#include \"helpers.h\"\n\n")
590 if op['UNIMPLEMENTED']:
594 for arg in op['ARGS']:
596 args += [c_type(arg.type_) + arg.name]
598 args += [c_type(arg.type_) + arg.name + '[]']
599 args += ['size_t %s' % arg.idx]
600 for aux in op['AUX']:
601 args += [c_type(aux['TYPE']) + aux['NAME']]
606 statements = op['BLOCK'] + '\n'
608 statements = " return %s;\n" % op['EXPRESSION']
610 out_file.write("static inline %s\n" % c_type (op['RETURNS']))
611 out_file.write("eval_%s (%s)\n" % (opname, ', '.join(args)))
612 out_file.write("{\n")
613 out_file.write(statements)
614 out_file.write("}\n\n")
616 def generate_evaluate_inc():
619 if op['UNIMPLEMENTED']:
620 out_file.write("case %s:\n" % opname)
621 out_file.write(" NOT_REACHED ();\n\n")
626 for arg in op['ARGS']:
628 ctype = c_type(type_)
629 args += ['arg_%s' % arg.name]
631 decl = '%sarg_%s' % (ctype, arg.name)
632 if type_['ROLE'] == 'any':
633 decls = ['%s = *--%s' % (decl, type_['STACK'])] + decls
634 elif type_['ROLE'] == 'leaf':
635 decls += ['%s = op++->%s' % (decl, type_['ATOM'])]
640 stack = type_['STACK']
641 decls = ['%s*arg_%s = %s -= arg_%s' % (ctype, arg.name, stack, idx)] + decls
642 decls = ['size_t arg_%s = op++->integer' % idx] + decls
646 idx += ' / %s' % arg.times
648 for aux in op['AUX']:
651 if type_['ROLE'] == 'leaf':
652 ctype = c_type(type_)
653 decls += ['%saux_%s = op++->%s' % (ctype, name, type_['ATOM'])]
654 args += ['aux_%s' % name]
655 elif type_['ROLE'] == 'fixed':
656 args += [type_['FIXED_VALUE']]
658 sysmis_cond = make_sysmis_decl(op, 'op++->integer')
659 if sysmis_cond is not None:
660 decls += [sysmis_cond]
662 result = 'eval_%s (%s)' % (op['OPNAME'], ', '.join(args))
664 stack = op['RETURNS']['STACK']
666 out_file.write("case %s:\n" % opname)
668 out_file.write(" {\n")
670 out_file.write(" %s;\n" % decl)
671 if sysmis_cond is not None:
672 miss_ret = op['RETURNS']['MISSING_VALUE']
673 out_file.write(" *%s++ = force_sysmis ? %s : %s;\n" % (stack, miss_ret, result))
675 out_file.write(" *%s++ = %s;\n" % (stack, result))
676 out_file.write(" }\n")
678 out_file.write(" *%s++ = %s;\n" % (stack, result))
679 out_file.write(" break;\n\n")
681 def generate_operations_h():
682 out_file.write("#include <stdlib.h>\n")
683 out_file.write("#include <stdbool.h>\n\n")
685 out_file.write("typedef enum")
686 out_file.write(" {\n")
688 for type_ in types.values():
689 if type_['ROLE'] != 'fixed':
690 atoms += ["OP_%s" % type_['NAME']]
692 print_operations('atom', 1, atoms)
693 print_operations('function', "OP_atom_last + 1", funcs)
694 print_operations('operator', "OP_function_last + 1", opers)
695 print_range("OP_composite", "OP_function_first", "OP_operator_last")
696 out_file.write(",\n\n")
697 print_range("OP", "OP_atom_first", "OP_composite_last")
698 out_file.write("\n }\n")
699 out_file.write("operation_type, atom_type;\n")
701 print_predicate('is_operation', 'OP')
702 for key in ('atom', 'composite', 'function', 'operator'):
703 print_predicate("is_%s" % key, "OP_%s" % key)
705 def print_operations(type_, first, names):
706 out_file.write(" /* %s types. */\n" % type_.title())
707 out_file.write(" %s = %s,\n" % (names[0], first))
708 for name in names[1:]:
709 out_file.write(" %s,\n" % name)
710 print_range("OP_%s" % type_, names[0], names[len(names) - 1])
711 out_file.write(",\n\n")
713 def print_range(prefix, first, last):
714 out_file.write(" %s_first = %s,\n" % (prefix, first))
715 out_file.write(" %s_last = %s,\n" % (prefix, last))
716 out_file.write(" n_%s = %s_last - %s_first + 1" % (prefix, prefix, prefix))
718 def print_predicate(function, category):
721 out_file.write("\nstatic inline bool\n")
722 out_file.write("%s (operation_type op)\n" % function)
723 out_file.write("{\n")
724 if function != 'is_operation':
725 out_file.write(" assert (is_operation (op));\n")
726 out_file.write(" return op >= %s_first && op <= %s_last;\n" % (category, category))
727 out_file.write("}\n")
729 def generate_optimize_inc():
733 if not op['OPTIMIZABLE'] or op['UNIMPLEMENTED']:
734 out_file.write("case %s:\n" % opname)
735 out_file.write(" NOT_REACHED ();\n\n")
740 for arg in op['ARGS']:
743 ctype = c_type(type_)
745 func = "get_%s_arg" % type_['ATOM']
746 decls += ["%sarg_%s = %s (node, %s)" % (ctype, name, func, arg_idx)]
748 decl = "size_t arg_%s = node->n_args" % arg.idx
750 decl += " - %s" % arg_idx
753 decls += ["%s*arg_%s = get_%s_args (node, %s, arg_%s, e)" % (ctype, name, type_['ATOM'], arg_idx, arg.idx)]
756 sysmis_cond = make_sysmis_decl (op, "node->min_valid")
757 if sysmis_cond is not None:
758 decls += [sysmis_cond]
761 for arg in op['ARGS']:
762 args += ["arg_%s" % arg.name]
763 if arg.idx is not None:
764 idx = 'arg_%s' % arg.idx
766 idx += " / %s" % arg.times
769 for aux in op['AUX']:
771 if type_['ROLE'] == 'leaf':
772 func = "get_%s_arg" % type_['ATOM']
773 args += "%s (node, %s)" % (func, arg_idx)
775 elif type_['ROLE'] == 'fixed':
776 args += [type_['FIXED_VALUE']]
780 result = "eval_%s (%s)" % (op['OPNAME'], ', '.join(args))
781 if decls and sysmis_cond is not None:
782 miss_ret = op['RETURNS']['MISSING_VALUE']
783 decls += ['%sresult = force_sysmis ? %s : %s' % (c_type(op['RETURNS']), miss_ret, result)]
786 out_file.write("case %s:\n" % opname)
787 alloc_func = "expr_allocate_%s" % op['RETURNS']['NAME']
789 out_file.write(" {\n")
791 out_file.write(" %s;\n" % decl)
792 out_file.write(" return %s (e, %s);\n" % (alloc_func, result))
793 out_file.write(" }\n")
795 out_file.write(" return %s (e, %s);\n" % (alloc_func, result))
798 def generate_parse_inc():
799 members = ['""', '""', '0', '0', '0', "{}", '0', '0']
800 out_file.write("{%s},\n" % ', '.join(members))
802 for type_ in types.values():
803 if type_['ROLE'] != 'fixed':
804 human_name = type_.get('HUMAN_NAME', type_['NAME'])
805 members = ('"%s"' % type_['NAME'], '"%s"' % human_name, '0', "OP_%s" % type_['NAME'], '0', "{}", '0', '0')
806 out_file.write("{%s},\n" % ', '.join(members))
812 members += ['"%s"' % op['NAME']]
814 if op['CATEGORY'] == 'function':
817 for arg in op['ARGS']:
819 args += [arg.type_['HUMAN_NAME']]
821 array = array_arg(op)
822 if array is not None:
823 if op['MIN_VALID'] == 0:
825 for i in range(array.times):
826 array_args += [array.type_['HUMAN_NAME']]
828 opt_args = array_args
830 for i in range(op['MIN_VALID']):
831 args += [array.type_['HUMAN_NAME']]
832 opt_args += [array.type_['HUMAN_NAME']]
833 human = "%s(%s" % (op['NAME'], ', '.join(args))
835 human += '[, %s]...' % ', '.join(opt_args)
837 members += ['"%s"' % human]
842 if op['ABSORB_MISS']:
843 flags += ['OPF_ABSORB_MISS']
845 flags += ['OPF_ARRAY_OPERAND']
846 if op['MIN_VALID'] > 0:
847 flags += ['OPF_MIN_VALID']
848 if not op['OPTIMIZABLE']:
849 flags += ['OPF_NONOPTIMIZABLE']
851 flags += ['OPF_EXTENSION']
852 if op['UNIMPLEMENTED']:
853 flags += ['OPF_UNIMPLEMENTED']
855 flags += ['OPF_PERM_ONLY']
857 flags += ['OPF_NO_ABBREV']
858 members += [' | '.join(flags) if flags else '0']
860 members += ['OP_%s' % op['RETURNS']['NAME']]
862 members += ['%s' % len(op['ARGS'])]
864 arg_types = ["OP_%s" % arg.type_['NAME'] for arg in op['ARGS']]
865 members += ['{%s}' % ', '.join(arg_types)]
867 members += ['%s' % op['MIN_VALID']]
869 members += ['%s' % (array_arg(op).times if array_arg(op) else 0)]
871 out_file.write('{%s},\n' % ', '.join(members))
875 def make_sysmis_decl(op, min_valid_src):
876 """Returns a declaration for a boolean variable called `force_sysmis',
877 which will be true when operation 'op' should be system-missing.
878 Returns None if there are no such circumstances.
880 If 'op' has a minimum number of valid arguments, 'min_valid_src'
881 should be an an expression that evaluates to the minimum number of
882 valid arguments for 'op'.
886 if not op['ABSORB_MISS']:
887 for arg in op['ARGS']:
888 arg_name = 'arg_%s' % arg.name
890 if arg.type_['NAME'] in ['number', 'boolean']:
891 sysmis_cond += ["!is_valid (%s)" % arg_name]
892 elif arg.type_['NAME'] == 'number':
894 n = 'arg_%s' % arg.idx
895 sysmis_cond += ['count_valid (%s, %s) < %s' % (a, n, n)]
896 elif op['MIN_VALID'] > 0:
899 a = 'arg_%s' % arg.name
900 n = 'arg_%s' % arg.idx
901 sysmis_cond += ["count_valid (%s, %s) < %s" % (a, n, min_valid_src)]
902 for arg in op['ARGS']:
903 if arg.condition is not None:
904 sysmis_cond += ['!(%s)' % arg.condition]
906 return 'bool force_sysmis = %s' % ' || '.join(sysmis_cond)
910 """If 'op' has an array argument, returns it. Otherwise, returns
916 if last_arg.idx is not None:
922 %(argv0)s, for generating expression parsers and evaluators from definitions
923 usage: generate.pl -o OUTPUT [-i INPUT] [-h]
924 -i INPUT input file containing definitions (default: operations.def)
925 -o OUTPUT output file
926 -h display this help message
927 """ % {'argv0': argv0})
930 if __name__ == "__main__":
932 options, args = getopt.gnu_getopt(sys.argv[1:], 'hi:o:',
936 except getopt.GetoptError as geo:
937 sys.stderr.write("%s: %s\n" % (argv0, geo.msg))
940 in_file_name = 'operations.def'
942 for key, value in options:
943 if key in ['-h', '--help']:
945 elif key in ['-i', '--input']:
947 elif key in ['-o', '--output']:
948 out_file_name = value
952 if out_file_name is None:
953 sys.stderr.write("%(argv0)s: output file must be specified "
954 "(use --help for help)\n" % {'argv0': argv0})
957 in_file = open(in_file_name, 'r')
958 out_file = open(out_file_name, 'w')
964 if out_file_name.endswith('evaluate.h'):
965 generate_evaluate_h()
966 elif out_file_name.endswith('evaluate.inc'):
967 generate_evaluate_inc()
968 elif out_file_name.endswith('operations.h'):
969 generate_operations_h()
970 elif out_file_name.endswith('optimize.inc'):
971 generate_optimize_inc()
972 elif out_file_name.endswith('parse.inc'):
975 sys.stderr.write("%(argv0)s: unknown output type\n")