2 # PSPP - a program for statistical analysis.
3 # Copyright (C) 2017, 2021 Free Software Foundation, Inc.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
27 sys.stderr.write("%s\n" % s)
32 """Defines all our types.
34 Initializes 'types' global.
42 # Common user-visible types used throughout evaluation trees.
43 Type.new_any('number', 'double', 'number', 'n',
44 'number', 'ns', 'SYSMIS'),
45 Type.new_any('string', 'struct substring', 'string', 's',
46 'string', 'ss', 'empty_string'),
47 Type.new_any('boolean', 'double', 'number', 'n',
48 'boolean', 'ns', 'SYSMIS'),
51 Type.new_atom('format'),
52 Type.new_leaf('ni_format', 'const struct fmt_spec *',
53 'format', 'f', 'num_input_format'),
54 Type.new_leaf('no_format', 'const struct fmt_spec *',
55 'format', 'f', 'num_output_format'),
58 Type.new_leaf('integer', 'int', 'integer', 'n',
60 Type.new_leaf('pos_int', 'int', 'integer', 'n',
61 'positive_integer_constant'),
64 Type.new_atom('variable'),
65 Type.new_leaf('num_var', 'const struct variable *',
66 'variable', 'Vn', 'num_variable'),
67 Type.new_leaf('str_var', 'const struct variable *',
68 'variable', 'Vs', 'string_variable'),
69 Type.new_leaf('var', 'const struct variable *',
70 'variable', 'V', 'variable'),
73 Type.new_leaf('vector', 'const struct vector *',
74 'vector', 'v', 'vector'),
76 # Types that appear only as auxiliary data.
77 Type.new_auxonly('expression', 'struct expression *', 'e'),
78 Type.new_auxonly('case', 'const struct ccase *', 'c'),
79 Type.new_auxonly('case_idx', 'size_t', 'case_idx'),
80 Type.new_auxonly('dataset', 'struct dataset *', 'ds'),
82 # One of these is emitted at the end of each expression as a
83 # sentinel that tells expr_evaluate() to return the value on
85 Type.new_atom('return_number'),
86 Type.new_atom('return_string'),
88 # Used only for debugging purposes.
89 Type.new_atom('operation'),
95 def __init__(self, name, role, human_name, c_type=None):
98 self.human_name = human_name
100 if c_type.endswith('*'):
103 self.c_type = c_type + ' '
106 """Creates and returns a new atom Type with the given 'name'.
108 An atom isn't directly allowed as an operand or function
109 argument type. They are all exceptional cases in some way.
112 return Type(name, 'atom', name)
114 def new_any(name, c_type, atom, mangle, human_name, stack,
116 """Creates and returns a new Type that can appear in any context, that
117 is, it can be an operation's argument type or return type.
119 'c_type' is the type used for C objects of this type.
121 'mangle' should be a short string for name mangling purposes,
122 to allow overloading functions with the same name but
123 different argument types. Use the same 'mangle' for two
124 different types if those two types should not be overloaded.
126 'atom' should be the name of the member of "union
127 operation_data" that holds a value of this type.
129 'human_name' should be a name to use when describing this type
130 to the user (see Op.prototype()).
132 'stack' is the name of the local variable in expr_evaluate()
133 used for maintaining a stack of this type.
135 'missing_value' is the expression used for a missing value of
139 new = Type(name, 'any', human_name, c_type)
143 new.missing_value = missing_value
146 def new_leaf(name, c_type, atom, mangle, human_name):
147 """Creates and returns a new leaf Type. A leaf type can appear in
148 expressions as an operation's argument type, but not as a return type.
149 (Thus, it only appears in a parse tree as a leaf node.)
151 The other arguments are as for new_any().
153 new = Type(name, 'leaf', human_name, c_type)
158 def new_auxonly(name, c_type, auxonly_value):
159 """Creates and returns a new auxiliary-only Type. An auxiliary-only
160 Type is one that gets passed into the evaluation function but
161 isn't supplied directly by the user as an operand or argument.
163 'c_type' is as in new_any().
165 'auxonly_value' is the name of the local variable in
166 expr_evaluate() that has the value of this auxiliary data.
169 new = Type(name, 'auxonly', name, c_type)
170 new.auxonly_value = auxonly_value
174 """If the current token is an identifier that names a type, returns
175 the type and skips to the next token. Otherwise, returns
179 for type_ in types.values():
180 if type_.name == token:
186 class Category(enum.Enum):
187 FUNCTION = enum.auto()
188 OPERATOR = enum.auto()
192 def __init__(self, name, category, returns, args, aux, expression,
193 block, min_valid, optimizable, unimplemented,
194 extension, perm_only, absorb_miss, no_abbrev):
196 self.category = category
197 self.returns = returns
200 self.expression = expression
202 self.min_valid = min_valid
203 self.optimizable = optimizable
204 self.unimplemented = unimplemented
205 self.extension = extension
206 self.perm_only = perm_only
207 self.absorb_miss = absorb_miss
208 self.no_abbrev = no_abbrev
210 self.opname = ('OP_%s' % name).replace('.', '_')
211 if category == Category.FUNCTION:
212 self.opname += '_%s' % (''.join([a.type_.mangle for a in args]))
215 """If this operation has an array argument, returns it. Otherwise,
218 if self.args and self.args[-1].idx is not None:
223 def sysmis_decl(self, min_valid_src):
224 """Returns a declaration for a boolean variable called `force_sysmis',
225 which will be true when this operation should be
226 system-missing. Returns None if there are no such
229 If this operation has a minimum number of valid arguments,
230 'min_valid_src' should be an an expression that evaluates to
231 the minimum number of valid arguments for this operation.
235 if not self.absorb_miss:
236 for arg in self.args:
237 arg_name = 'arg_%s' % arg.name
239 if arg.type_.name in ['number', 'boolean']:
240 sysmis_cond += ['!is_valid (%s)' % arg_name]
241 elif arg.type_.name == 'number':
243 n = 'arg_%s' % arg.idx
244 sysmis_cond += ['count_valid (%s, %s) < %s' % (a, n, n)]
245 elif self.min_valid > 0:
248 a = 'arg_%s' % arg.name
249 n = 'arg_%s' % arg.idx
250 sysmis_cond += ['count_valid (%s, %s) < %s'
251 % (a, n, min_valid_src)]
252 for arg in self.args:
253 if arg.condition is not None:
254 sysmis_cond += ['!(%s)' % arg.condition]
256 return 'bool force_sysmis = %s' % ' || '.join(sysmis_cond)
260 """Composes and returns a string that describes the function in a way
261 suitable for a human to understand, something like a C
262 function prototype, e.g. "ABS(number)" or "ANY(number,
263 number[, number]...)".
265 This doesn't make sense for operators so this function just
266 returns None for them.
269 if self.category == Category.FUNCTION:
272 for arg in self.args:
274 args += [arg.type_.human_name]
276 array = self.array_arg()
277 if array is not None:
278 if self.min_valid == 0:
280 for i in range(array.times):
281 array_args += [array.type_.human_name]
283 opt_args = array_args
285 for i in range(self.min_valid):
286 args += [array.type_.human_name]
287 opt_args += [array.type_.human_name]
289 prototype = '%s(%s' % (self.name, ', '.join(args))
291 prototype += '[, %s]...' % ', '.join(opt_args)
298 """Returns the OPF_* flags that apply to 'self'."""
301 flags += ['OPF_ABSORB_MISS']
303 flags += ['OPF_ARRAY_OPERAND']
304 if self.min_valid > 0:
305 flags += ['OPF_MIN_VALID']
306 if not self.optimizable:
307 flags += ['OPF_NONOPTIMIZABLE']
309 flags += ['OPF_EXTENSION']
310 if self.unimplemented:
311 flags += ['OPF_UNIMPLEMENTED']
313 flags += ['OPF_PERM_ONLY']
315 flags += ['OPF_NO_ABBREV']
316 return ' | '.join(flags) if flags else '0'
320 """Parses the entire input.
322 Initializes ops, funcs, opers."""
340 while toktype != 'eof':
342 unimplemented = False
348 if match('extension'):
350 elif match('no_opt'):
352 elif match('absorb_miss'):
354 elif match('perm_only'):
356 elif match('no_abbrev'):
361 return_type = Type.parse()
362 if return_type is None:
363 return_type = types['number']
364 if return_type.name not in ['number', 'string', 'boolean']:
365 die('%s is not a valid return type' % return_type.name)
367 if token == 'operator':
368 category = Category.OPERATOR
369 elif token == 'function':
370 category = Category.FUNCTION
372 die("'operator' or 'function' expected at '%s'" % token)
376 if category == Category.FUNCTION and '_' in name:
377 die("function name '%s' may not contain underscore" % name)
378 elif category == Category.OPERATOR and '.' in name:
379 die("operator name '%s' may not contain period" % name)
381 m = re.match(r'(.*)\.(\d+)$', name)
383 prefix, suffix = m.groups()
385 min_valid = int(suffix)
392 while not match(')'):
395 if arg.idx is not None:
398 die('array must be last argument')
404 if arg.condition is not None:
405 any_arg = '|'.join([a.name for a in args])
406 arg.condition = re.sub(r'\b(%s)\b' % any_arg,
407 r'arg_\1', arg.condition)
410 while toktype == 'id':
414 if type_.role not in ['leaf', 'auxonly']:
415 die("'%s' is not allowed as auxiliary data" % type_.name)
416 aux_name = force('id')
417 aux += [{'TYPE': type_, 'NAME': aux_name}]
421 if name.startswith('RV.'):
422 die("random variate functions must be marked 'no_opt'")
423 for key in ['CASE', 'CASE_IDX']:
425 die("operators with %s aux data must be marked 'no_opt'"
428 if return_type.name == 'string' and not absorb_miss:
430 if arg.type_.name in ['number', 'boolean']:
431 die("'%s' returns string and has double or bool "
432 "argument, but is not marked ABSORB_MISS" % name)
433 if arg.condition is not None:
434 die("'%s' returns string but has "
435 "argument with condition")
437 if toktype == 'block':
438 block = force('block')
440 elif toktype == 'expression':
441 if token == 'unimplemented':
448 die('block or expression expected')
450 op = Op(name, category,
451 return_type, args, aux,
454 optimizable, unimplemented, extension, perm_only, absorb_miss,
460 die("can't have minimum valid count without array arg")
461 if aa.type_.name != 'number':
462 die('minimum valid count allowed only with double array')
464 die("can't have minimum valid count if "
465 "array has multiplication factor")
468 die("duplicate operation name '%s'" % op.opname)
470 if category == Category.FUNCTION:
477 funcs = sorted(funcs, key=lambda f: (f.name, f.opname))
478 opers = sorted(opers, key=lambda o: o.name)
479 order = funcs + opers
483 """Reads the next token into 'token' and 'toktype'."""
493 m = re.match(r'([a-zA-Z_][a-zA-Z_.0-9]*)(.*)$', line)
495 token, line = m.groups()
499 m = re.match(r'([0-9]+)(.*)$', line)
501 token, line = m.groups()
506 m = re.match(r'([][(),*;.])(.*)$', line)
508 token, line = m.groups()
512 m = re.match(r'=\s*(.*)$', line)
514 toktype = 'expression'
516 token = accumulate_balanced(';')
519 m = re.match(r'{(.*)$', line)
523 token = accumulate_balanced('}')
524 token = token.rstrip('\n')
527 die("bad character '%s' in input" % line[0])
531 """Skip whitespace."""
534 die('unexpected end of file')
549 def accumulate_balanced(end, swallow_end=True):
550 """Accumulates input until a character in 'end' is encountered,
551 except that balanced pairs of (), [], or {} cause 'end' to be
552 ignored. Returns the input read.
558 for idx, c in enumerate(line):
559 if c in end and nest == 0:
571 die('unbalanced parentheses')
578 """Reads the next line from INPUT into 'line'."""
581 line = in_file.readline()
586 line = line.rstrip('\r\n')
587 comment_ofs = line.find('//')
589 line = line[:comment_ofs]
593 """Makes sure that 'toktype' equals 'type', reads the next token, and
594 returns the previous 'token'.
598 die("parse error at `%s' expecting %s" % (token, type_))
605 """If 'token' equals 'tok', reads the next token and returns true.
606 Otherwise, returns false."""
614 def force_match(tok):
615 """If 'token' equals 'tok', reads the next token. Otherwise, flags an
619 die("parse error at `%s' expecting `%s'" % (token, tok))
623 def __init__(self, name, type_, idx, times, condition):
628 self.condition = condition
631 """Parses and returns a function argument."""
634 type_ = types['number']
637 die("argument name expected at `%s'" % token)
649 if type_.name not in ('number', 'string'):
650 die('only double and string arrays supported')
655 die('multiplication factor must be two')
659 condition = name + ' '
660 condition += accumulate_balanced(',)', swallow_end=False)
663 return Arg(name, type_, idx, times, condition)
667 """Prints the output file header."""
670 Generated from %s by generate.py.
673 """ % (out_file_name, in_file_name))
677 """Prints the output file trailer."""
689 def generate_evaluate_h():
690 out_file.write('#include "helpers.h"\n\n')
699 args += [arg.type_.c_type + arg.name]
701 args += [arg.type_.c_type + arg.name + '[]']
702 args += ['size_t %s' % arg.idx]
704 args += [aux['TYPE'].c_type + aux['NAME']]
709 statements = op.block + '\n'
711 statements = ' return %s;\n' % op.expression
713 out_file.write('static inline %s\n' % op.returns.c_type)
714 out_file.write('eval_%s (%s)\n' % (op.opname, ', '.join(args)))
715 out_file.write('{\n')
716 out_file.write(statements)
717 out_file.write('}\n\n')
720 def generate_evaluate_inc():
723 out_file.write('case %s:\n' % op.opname)
724 out_file.write(' NOT_REACHED ();\n\n')
731 c_type = type_.c_type
732 args += ['arg_%s' % arg.name]
734 decl = '%sarg_%s' % (c_type, arg.name)
735 if type_.role == 'any':
736 decls = ['%s = *--%s' % (decl, type_.stack)] + decls
737 elif type_.role == 'leaf':
738 decls += ['%s = op++->%s' % (decl, type_.atom)]
743 decls = ['%s*arg_%s = %s -= arg_%s'
744 % (c_type, arg.name, type_.stack, idx)] + decls
745 decls = ['size_t arg_%s = op++->integer' % idx] + decls
749 idx += ' / %s' % arg.times
754 if type_.role == 'leaf':
755 decls += ['%saux_%s = op++->%s'
756 % (type_.c_type, name, type_.atom)]
757 args += ['aux_%s' % name]
758 elif type_.role == 'auxonly':
759 args += [type_.auxonly_value]
761 sysmis_cond = op.sysmis_decl('op++->integer')
762 if sysmis_cond is not None:
763 decls += [sysmis_cond]
765 result = 'eval_%s (%s)' % (op.opname, ', '.join(args))
767 stack = op.returns.stack
769 out_file.write('case %s:\n' % op.opname)
771 out_file.write(' {\n')
773 out_file.write(' %s;\n' % decl)
774 if sysmis_cond is not None:
775 miss_ret = op.returns.missing_value
776 out_file.write(' *%s++ = force_sysmis ? %s : %s;\n'
777 % (stack, miss_ret, result))
779 out_file.write(' *%s++ = %s;\n' % (stack, result))
780 out_file.write(' }\n')
782 out_file.write(' *%s++ = %s;\n' % (stack, result))
783 out_file.write(' break;\n\n')
786 def generate_operations_h():
787 out_file.write('#include <stdlib.h>\n')
788 out_file.write('#include <stdbool.h>\n\n')
790 out_file.write('typedef enum')
791 out_file.write(' {\n')
793 for type_ in types.values():
794 if type_.role != 'auxonly':
795 atoms += ['OP_%s' % type_.name]
797 print_operations('atom', 1, atoms)
798 print_operations('function', 'OP_atom_last + 1',
799 [f.opname for f in funcs])
800 print_operations('operator', 'OP_function_last + 1',
801 [o.opname for o in opers])
802 print_range('OP_composite', 'OP_function_first', 'OP_operator_last')
803 out_file.write(',\n\n')
804 print_range('OP', 'OP_atom_first', 'OP_composite_last')
805 out_file.write('\n }\n')
806 out_file.write('operation_type, atom_type;\n')
808 print_predicate('is_operation', 'OP')
809 for key in ('atom', 'composite', 'function', 'operator'):
810 print_predicate('is_%s' % key, 'OP_%s' % key)
813 def print_operations(type_, first, names):
814 out_file.write(' /* %s types. */\n' % type_.title())
815 out_file.write(' %s = %s,\n' % (names[0], first))
816 for name in names[1:]:
817 out_file.write(' %s,\n' % name)
818 print_range('OP_%s' % type_, names[0], names[-1])
819 out_file.write(',\n\n')
822 def print_range(prefix, first, last):
823 out_file.write(' %s_first = %s,\n' % (prefix, first))
824 out_file.write(' %s_last = %s,\n' % (prefix, last))
825 out_file.write(' n_%s = %s_last - %s_first + 1'
826 % (prefix, prefix, prefix))
829 def print_predicate(function, category):
830 out_file.write('\nstatic inline bool\n')
831 out_file.write('%s (operation_type op)\n' % function)
832 out_file.write('{\n')
833 if function != 'is_operation':
834 out_file.write(' assert (is_operation (op));\n')
835 out_file.write(' return op >= %s_first && op <= %s_last;\n'
836 % (category, category))
837 out_file.write('}\n')
840 def generate_optimize_inc():
842 if not op.optimizable or op.unimplemented:
843 out_file.write('case %s:\n' % op.opname)
844 out_file.write(' NOT_REACHED ();\n\n')
852 c_type = type_.c_type
854 func = 'get_%s_arg' % type_.atom
855 decls += ['%sarg_%s = %s (node, %s)'
856 % (c_type, name, func, arg_idx)]
858 decl = 'size_t arg_%s = node->n_args' % arg.idx
860 decl += ' - %s' % arg_idx
863 decls += ['%s*arg_%s = get_%s_args '
864 '(node, %s, arg_%s, e)'
865 % (c_type, name, type_.atom, arg_idx, arg.idx)]
868 sysmis_cond = op.sysmis_decl('node->min_valid')
869 if sysmis_cond is not None:
870 decls += [sysmis_cond]
874 args += ['arg_%s' % arg.name]
875 if arg.idx is not None:
876 idx = 'arg_%s' % arg.idx
878 idx += ' / %s' % arg.times
883 if type_.role == 'leaf':
884 func = 'get_%s_arg' % type_.atom
885 args += '%s (node, %s)' % (func, arg_idx)
887 elif type_.role == 'auxonly':
888 args += [type_.auxonly_value]
892 result = 'eval_%s (%s)' % (op.opname, ', '.join(args))
893 if decls and sysmis_cond is not None:
894 miss_ret = op.returns.missing_value
895 decls += ['%sresult = force_sysmis ? %s : %s'
896 % (op.returns.c_type, miss_ret, result)]
899 out_file.write('case %s:\n' % op.opname)
900 alloc_func = 'expr_allocate_%s' % op.returns.name
902 out_file.write(' {\n')
904 out_file.write(' %s;\n' % decl)
905 out_file.write(' return %s (e, %s);\n' % (alloc_func, result))
906 out_file.write(' }\n')
908 out_file.write(' return %s (e, %s);\n' % (alloc_func, result))
912 def generate_parse_inc():
913 members = ['""', '""', '0', '0', '0', '{}', '0', '0']
914 out_file.write('{%s},\n' % ', '.join(members))
916 for type_ in types.values():
917 if type_.role != 'auxonly':
918 members = ('"%s"' % type_.name, '"%s"' % type_.human_name,
919 '0', 'OP_%s' % type_.name, '0', '{}', '0', '0')
920 out_file.write('{%s},\n' % ', '.join(members))
924 members += ['"%s"' % op.name]
926 prototype = op.prototype()
927 members += ['"%s"' % prototype if prototype else 'NULL']
929 members += [op.flags()]
931 members += ['OP_%s' % op.returns.name]
933 members += ['%s' % len(op.args)]
935 arg_types = ['OP_%s' % arg.type_.name for arg in op.args]
936 members += ['{%s}' % ', '.join(arg_types)]
938 members += ['%s' % op.min_valid]
940 members += ['%s' % (op.array_arg().times if op.array_arg() else 0)]
942 out_file.write('{%s},\n' % ', '.join(members))
947 %s, for generating expression parsers and evaluators from definitions
948 usage: generate.py -o OUTPUT [-i INPUT] [-h]
949 -i INPUT input file containing definitions (default: operations.def)
950 -o OUTPUT output file
951 -h display this help message
956 if __name__ == '__main__':
958 options, args = getopt.gnu_getopt(sys.argv[1:], 'hi:o:',
962 except getopt.GetoptError as geo:
963 die('%s: %s' % (argv0, geo.msg))
965 in_file_name = 'operations.def'
967 for key, value in options:
968 if key in ['-h', '--help']:
970 elif key in ['-i', '--input']:
972 elif key in ['-o', '--output']:
973 out_file_name = value
977 if out_file_name is None:
978 die('%s: output file must be specified '
979 '(use --help for help)' % argv0)
981 in_file = open(in_file_name, 'r')
982 out_file = open(out_file_name, 'w')
988 if out_file_name.endswith('evaluate.h'):
989 generate_evaluate_h()
990 elif out_file_name.endswith('evaluate.inc'):
991 generate_evaluate_inc()
992 elif out_file_name.endswith('operations.h'):
993 generate_operations_h()
994 elif out_file_name.endswith('optimize.inc'):
995 generate_optimize_inc()
996 elif out_file_name.endswith('parse.inc'):
999 die('%s: unknown output type' % argv0)