2 # PSPP - a program for statistical analysis.
3 # Copyright (C) 2017, 2021 Free Software Foundation, Inc.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
27 sys.stderr.write("%s\n" % s)
32 """Defines all our types.
34 Initializes 'types' global.
42 # Common user-visible types used throughout evaluation trees.
43 Type.new_any('number', 'double', 'number', 'n',
44 'number', 'ns', 'SYSMIS'),
45 Type.new_any('string', 'struct substring', 'string', 's',
46 'string', 'ss', 'empty_string'),
47 Type.new_any('boolean', 'double', 'number', 'n',
48 'boolean', 'ns', 'SYSMIS'),
49 Type.new_any('integer', 'int', 'number', 'n',
50 'integer', 'ns', 'SYSMIS'),
53 Type.new_atom('format'),
54 Type.new_leaf('ni_format', 'const struct fmt_spec *',
55 'format', 'f', 'num_input_format'),
56 Type.new_leaf('no_format', 'const struct fmt_spec *',
57 'format', 'f', 'num_output_format'),
60 Type.new_leaf('pos_int', 'int', 'integer', 'n',
61 'positive_integer_constant'),
64 Type.new_atom('variable'),
65 Type.new_leaf('num_var', 'const struct variable *',
66 'variable', 'Vn', 'num_variable'),
67 Type.new_leaf('str_var', 'const struct variable *',
68 'variable', 'Vs', 'string_variable'),
69 Type.new_leaf('var', 'const struct variable *',
70 'variable', 'V', 'variable'),
73 Type.new_leaf('vector', 'const struct vector *',
74 'vector', 'v', 'vector'),
76 # Types as leaves or auxiliary data.
77 Type.new_leaf('expr_node', 'const struct expr_node *',
78 'expr_node', 'e', 'expr_node'),
80 # Types that appear only as auxiliary data.
81 Type.new_auxonly('expression', 'struct expression *', 'e'),
82 Type.new_auxonly('case', 'const struct ccase *', 'c'),
83 Type.new_auxonly('case_idx', 'size_t', 'case_idx'),
84 Type.new_auxonly('dataset', 'struct dataset *', 'ds'),
86 # One of these is emitted at the end of each expression as a
87 # sentinel that tells expr_evaluate() to return the value on
89 Type.new_atom('return_number'),
90 Type.new_atom('return_string'),
92 # Used only for debugging purposes.
93 Type.new_atom('operation'),
99 def __init__(self, name, role, human_name, c_type=None):
102 self.human_name = human_name
104 if c_type.endswith('*'):
107 self.c_type = c_type + ' '
110 """Creates and returns a new atom Type with the given 'name'.
112 An atom isn't directly allowed as an operand or function
113 argument type. They are all exceptional cases in some way.
116 return Type(name, 'atom', name)
118 def new_any(name, c_type, atom, mangle, human_name, stack,
120 """Creates and returns a new Type that can appear in any context, that
121 is, it can be an operation's argument type or return type.
123 'c_type' is the type used for C objects of this type.
125 'atom' should be the name of the member of "union
126 operation_data" that holds a value of this type.
128 'mangle' should be a short string for name mangling purposes,
129 to allow overloading functions with the same name but
130 different argument types. Use the same 'mangle' for two
131 different types if those two types should not be overloaded.
133 'human_name' should be a name to use when describing this type
134 to the user (see Op.prototype()).
136 'stack' is the name of the local variable in expr_evaluate()
137 used for maintaining a stack of this type.
139 'missing_value' is the expression used for a missing value of
143 new = Type(name, 'any', human_name, c_type)
147 new.missing_value = missing_value
150 def new_leaf(name, c_type, atom, mangle, human_name):
151 """Creates and returns a new leaf Type. A leaf type can appear in
152 expressions as an operation's argument type, but not as a return type.
153 (Thus, it only appears in a parse tree as a leaf node.)
155 The other arguments are as for new_any().
157 new = Type(name, 'leaf', human_name, c_type)
162 def new_auxonly(name, c_type, auxonly_value):
163 """Creates and returns a new auxiliary-only Type. An auxiliary-only
164 Type is one that gets passed into the evaluation function but
165 isn't supplied directly by the user as an operand or argument.
167 'c_type' is as in new_any().
169 'auxonly_value' is the name of the local variable in
170 expr_evaluate() that has the value of this auxiliary data.
173 new = Type(name, 'auxonly', name, c_type)
174 new.auxonly_value = auxonly_value
178 """If the current token is an identifier that names a type, returns
179 the type and skips to the next token. Otherwise, returns
183 for type_ in types.values():
184 if type_.name == token:
190 class Category(enum.Enum):
191 FUNCTION = enum.auto()
192 OPERATOR = enum.auto()
196 def __init__(self, name, category, returns, args, aux, expression,
197 block, min_valid, optimizable, unimplemented,
198 extension, perm_only, absorb_miss, no_abbrev):
200 self.category = category
201 self.returns = returns
204 self.expression = expression
206 self.min_valid = min_valid
207 self.optimizable = optimizable
208 self.unimplemented = unimplemented
209 self.extension = extension
210 self.perm_only = perm_only
211 self.absorb_miss = absorb_miss
212 self.no_abbrev = no_abbrev
214 self.opname = ('OP_%s' % name).replace('.', '_')
215 if category == Category.FUNCTION:
216 self.opname += '_%s' % (''.join([a.type_.mangle for a in args]))
219 """If this operation has an array argument, returns it. Otherwise,
222 if self.args and self.args[-1].idx is not None:
227 def sysmis_decl(self, min_valid_src):
228 """Returns a declaration for a boolean variable called `force_sysmis',
229 which will be true when this operation should be
230 system-missing. Returns None if there are no such
233 If this operation has a minimum number of valid arguments,
234 'min_valid_src' should be an an expression that evaluates to
235 the minimum number of valid arguments for this operation.
239 if not self.absorb_miss:
240 for arg in self.args:
241 arg_name = 'arg_%s' % arg.name
243 if arg.type_.name in ['number', 'boolean']:
244 sysmis_cond += ['!is_valid (%s)' % arg_name]
245 elif arg.type_.name == 'number':
247 n = 'arg_%s' % arg.idx
248 sysmis_cond += ['count_valid (%s, %s) < %s' % (a, n, n)]
249 elif self.min_valid > 0:
252 a = 'arg_%s' % arg.name
253 n = 'arg_%s' % arg.idx
254 sysmis_cond += ['count_valid (%s, %s) < %s'
255 % (a, n, min_valid_src)]
256 for arg in self.args:
257 if arg.condition is not None:
258 sysmis_cond += ['!(%s)' % arg.condition]
260 return 'bool force_sysmis = %s' % ' || '.join(sysmis_cond)
264 """Composes and returns a string that describes the function in a way
265 suitable for a human to understand, something like a C
266 function prototype, e.g. "ABS(number)" or "ANY(number,
267 number[, number]...)".
269 This doesn't make sense for operators so this function just
270 returns None for them.
273 if self.category == Category.FUNCTION:
276 for arg in self.args:
278 args += [arg.type_.human_name]
280 array = self.array_arg()
281 if array is not None:
282 if self.min_valid == 0:
284 for i in range(array.times):
285 array_args += [array.type_.human_name]
287 opt_args = array_args
289 for i in range(self.min_valid):
290 args += [array.type_.human_name]
291 opt_args += [array.type_.human_name]
293 prototype = '%s(%s' % (self.name, ', '.join(args))
295 prototype += '[, %s]...' % ', '.join(opt_args)
302 """Returns the OPF_* flags that apply to 'self'."""
305 flags += ['OPF_ABSORB_MISS']
307 flags += ['OPF_ARRAY_OPERAND']
308 if self.min_valid > 0:
309 flags += ['OPF_MIN_VALID']
310 if not self.optimizable:
311 flags += ['OPF_NONOPTIMIZABLE']
313 flags += ['OPF_EXTENSION']
314 if self.unimplemented:
315 flags += ['OPF_UNIMPLEMENTED']
317 flags += ['OPF_PERM_ONLY']
319 flags += ['OPF_NO_ABBREV']
321 if aux['TYPE'].name == 'expr_node':
322 flags += ['OPF_EXPR_NODE']
324 return ' | '.join(flags) if flags else '0'
328 """Parses the entire input.
330 Initializes ops, funcs, opers."""
348 while toktype != 'eof':
350 unimplemented = False
356 if match('extension'):
358 elif match('no_opt'):
360 elif match('absorb_miss'):
362 elif match('perm_only'):
364 elif match('no_abbrev'):
369 return_type = Type.parse()
370 if return_type is None:
371 return_type = types['number']
372 if return_type.name not in ['number', 'string', 'boolean']:
373 die('%s is not a valid return type' % return_type.name)
375 if token == 'operator':
376 category = Category.OPERATOR
377 elif token == 'function':
378 category = Category.FUNCTION
380 die("'operator' or 'function' expected at '%s'" % token)
384 if category == Category.FUNCTION and '_' in name:
385 die("function name '%s' may not contain underscore" % name)
386 elif category == Category.OPERATOR and '.' in name:
387 die("operator name '%s' may not contain period" % name)
389 m = re.match(r'(.*)\.(\d+)$', name)
391 prefix, suffix = m.groups()
393 min_valid = int(suffix)
400 while not match(')'):
403 if arg.idx is not None:
406 die('array must be last argument')
412 if arg.condition is not None:
413 any_arg = '|'.join([a.name for a in args])
414 arg.condition = re.sub(r'\b(%s)\b' % any_arg,
415 r'arg_\1', arg.condition)
418 while toktype == 'id':
422 if type_.role not in ['leaf', 'auxonly']:
423 die("'%s' is not allowed as auxiliary data" % type_.name)
424 aux_name = force('id')
425 aux += [{'TYPE': type_, 'NAME': aux_name}]
429 if name.startswith('RV.'):
430 die("random variate functions must be marked 'no_opt'")
431 for key in ['CASE', 'CASE_IDX']:
433 die("operators with %s aux data must be marked 'no_opt'"
436 if return_type.name == 'string' and not absorb_miss:
438 if arg.type_.name in ['number', 'boolean']:
439 die("'%s' returns string and has double or bool "
440 "argument, but is not marked ABSORB_MISS" % name)
441 if arg.condition is not None:
442 die("'%s' returns string but has "
443 "argument with condition")
445 if toktype == 'block':
446 block = force('block')
448 elif toktype == 'expression':
449 if token == 'unimplemented':
456 die('block or expression expected')
458 op = Op(name, category,
459 return_type, args, aux,
462 optimizable, unimplemented, extension, perm_only, absorb_miss,
468 die("can't have minimum valid count without array arg")
469 if aa.type_.name != 'number':
470 die('minimum valid count allowed only with double array')
472 die("can't have minimum valid count if "
473 "array has multiplication factor")
476 die("duplicate operation name '%s'" % op.opname)
478 if category == Category.FUNCTION:
485 funcs = sorted(funcs, key=lambda f: (f.name, f.opname))
486 opers = sorted(opers, key=lambda o: o.name)
487 order = funcs + opers
491 """Reads the next token into 'token' and 'toktype'."""
501 m = re.match(r'([a-zA-Z_][a-zA-Z_.0-9]*)(.*)$', line)
503 token, line = m.groups()
507 m = re.match(r'([0-9]+)(.*)$', line)
509 token, line = m.groups()
514 m = re.match(r'([][(),*;.])(.*)$', line)
516 token, line = m.groups()
520 m = re.match(r'=\s*(.*)$', line)
522 toktype = 'expression'
524 token = accumulate_balanced(';')
527 m = re.match(r'{(.*)$', line)
531 token = accumulate_balanced('}')
532 token = token.rstrip('\n')
535 die("bad character '%s' in input" % line[0])
539 """Skip whitespace."""
542 die('unexpected end of file')
557 def accumulate_balanced(end, swallow_end=True):
558 """Accumulates input until a character in 'end' is encountered,
559 except that balanced pairs of (), [], or {} cause 'end' to be
560 ignored. Returns the input read.
566 for idx, c in enumerate(line):
567 if c in end and nest == 0:
579 die('unbalanced parentheses')
586 """Reads the next line from INPUT into 'line'."""
589 line = in_file.readline()
594 line = line.rstrip('\r\n')
595 comment_ofs = line.find('//')
597 line = line[:comment_ofs]
601 """Makes sure that 'toktype' equals 'type', reads the next token, and
602 returns the previous 'token'.
606 die("parse error at `%s' expecting %s" % (token, type_))
613 """If 'token' equals 'tok', reads the next token and returns true.
614 Otherwise, returns false."""
622 def force_match(tok):
623 """If 'token' equals 'tok', reads the next token. Otherwise, flags an
627 die("parse error at `%s' expecting `%s'" % (token, tok))
631 def __init__(self, name, type_, idx, times, condition):
636 self.condition = condition
639 """Parses and returns a function argument."""
642 type_ = types['number']
645 die("argument name expected at `%s'" % token)
657 if type_.name not in ('number', 'string'):
658 die('only double and string arrays supported')
663 die('multiplication factor must be two')
667 condition = name + ' '
668 condition += accumulate_balanced(',)', swallow_end=False)
671 return Arg(name, type_, idx, times, condition)
675 """Prints the output file header."""
678 Generated from %s by generate.py.
681 """ % (out_file_name, in_file_name))
685 """Prints the output file trailer."""
697 def generate_evaluate_h():
698 out_file.write('#include "helpers.h"\n\n')
707 args += [arg.type_.c_type + arg.name]
709 args += [arg.type_.c_type + arg.name + '[]']
710 args += ['size_t %s' % arg.idx]
712 args += [aux['TYPE'].c_type + aux['NAME']]
717 statements = op.block + '\n'
719 statements = ' return %s;\n' % op.expression
721 out_file.write('static inline %s\n' % op.returns.c_type)
722 out_file.write('eval_%s (%s)\n' % (op.opname, ', '.join(args)))
723 out_file.write('{\n')
724 out_file.write(statements)
725 out_file.write('}\n\n')
728 def generate_evaluate_inc():
731 out_file.write('case %s:\n' % op.opname)
732 out_file.write(' NOT_REACHED ();\n\n')
739 c_type = type_.c_type
740 args += ['arg_%s' % arg.name]
742 decl = '%sarg_%s' % (c_type, arg.name)
743 if type_.role == 'any':
744 decls = ['%s = *--%s' % (decl, type_.stack)] + decls
745 elif type_.role == 'leaf':
746 decls += ['%s = op++->%s' % (decl, type_.atom)]
751 decls = ['%s*arg_%s = %s -= arg_%s'
752 % (c_type, arg.name, type_.stack, idx)] + decls
753 decls = ['size_t arg_%s = op++->integer' % idx] + decls
757 idx += ' / %s' % arg.times
762 if type_.role == 'leaf':
763 decls += ['%saux_%s = op++->%s'
764 % (type_.c_type, name, type_.atom)]
765 args += ['aux_%s' % name]
766 elif type_.name == 'expr_node':
767 decls += ['%saux_%s = op++->node'
768 % (type_.c_type, name)]
769 args += ['aux_%s' % name]
770 elif type_.role == 'auxonly':
771 args += [type_.auxonly_value]
773 sysmis_cond = op.sysmis_decl('op++->integer')
774 if sysmis_cond is not None:
775 decls += [sysmis_cond]
777 result = 'eval_%s (%s)' % (op.opname, ', '.join(args))
779 stack = op.returns.stack
781 out_file.write('case %s:\n' % op.opname)
783 out_file.write(' {\n')
785 out_file.write(' %s;\n' % decl)
786 if sysmis_cond is not None:
787 miss_ret = op.returns.missing_value
788 out_file.write(' *%s++ = force_sysmis ? %s : %s;\n'
789 % (stack, miss_ret, result))
791 out_file.write(' *%s++ = %s;\n' % (stack, result))
792 out_file.write(' }\n')
794 out_file.write(' *%s++ = %s;\n' % (stack, result))
795 out_file.write(' break;\n\n')
798 def generate_operations_h():
799 out_file.write('#include <stdlib.h>\n')
800 out_file.write('#include <stdbool.h>\n\n')
802 out_file.write('typedef enum')
803 out_file.write(' {\n')
805 for type_ in types.values():
806 if type_.role != 'auxonly':
807 atoms += ['OP_%s' % type_.name]
809 print_operations('atom', 1, atoms)
810 print_operations('function', 'OP_atom_last + 1',
811 [f.opname for f in funcs])
812 print_operations('operator', 'OP_function_last + 1',
813 [o.opname for o in opers])
814 print_range('OP_composite', 'OP_function_first', 'OP_operator_last')
815 out_file.write(',\n\n')
816 print_range('OP', 'OP_atom_first', 'OP_composite_last')
817 out_file.write('\n }\n')
818 out_file.write('operation_type, atom_type;\n')
820 print_predicate('is_operation', 'OP')
821 for key in ('atom', 'composite', 'function', 'operator'):
822 print_predicate('is_%s' % key, 'OP_%s' % key)
825 def print_operations(type_, first, names):
826 out_file.write(' /* %s types. */\n' % type_.title())
827 out_file.write(' %s = %s,\n' % (names[0], first))
828 for name in names[1:]:
829 out_file.write(' %s,\n' % name)
830 print_range('OP_%s' % type_, names[0], names[-1])
831 out_file.write(',\n\n')
834 def print_range(prefix, first, last):
835 out_file.write(' %s_first = %s,\n' % (prefix, first))
836 out_file.write(' %s_last = %s,\n' % (prefix, last))
837 out_file.write(' n_%s = %s_last - %s_first + 1'
838 % (prefix, prefix, prefix))
841 def print_predicate(function, category):
842 out_file.write('\nstatic inline bool\n')
843 out_file.write('%s (operation_type op)\n' % function)
844 out_file.write('{\n')
845 if function != 'is_operation':
846 out_file.write(' assert (is_operation (op));\n')
847 out_file.write(' return op >= %s_first && op <= %s_last;\n'
848 % (category, category))
849 out_file.write('}\n')
852 def generate_optimize_inc():
854 if not op.optimizable or op.unimplemented:
855 out_file.write('case %s:\n' % op.opname)
856 out_file.write(' NOT_REACHED ();\n\n')
864 c_type = type_.c_type
866 func = 'get_%s_arg' % type_.atom
867 decls += ['%sarg_%s = %s (node, %s)'
868 % (c_type, name, func, arg_idx)]
870 decl = 'size_t arg_%s = node->n_args' % arg.idx
872 decl += ' - %s' % arg_idx
875 decls += ['%s*arg_%s = get_%s_args '
876 '(node, %s, arg_%s, e)'
877 % (c_type, name, type_.atom, arg_idx, arg.idx)]
880 sysmis_cond = op.sysmis_decl('node->min_valid')
881 if sysmis_cond is not None:
882 decls += [sysmis_cond]
886 args += ['arg_%s' % arg.name]
887 if arg.idx is not None:
888 idx = 'arg_%s' % arg.idx
890 idx += ' / %s' % arg.times
895 if type_.role == 'leaf':
896 assert type_.name == 'expr_node'
898 elif type_.role == 'auxonly':
899 args += [type_.auxonly_value]
903 result = 'eval_%s (%s)' % (op.opname, ', '.join(args))
904 if decls and sysmis_cond is not None:
905 miss_ret = op.returns.missing_value
906 decls += ['%sresult = force_sysmis ? %s : %s'
907 % (op.returns.c_type, miss_ret, result)]
910 out_file.write('case %s:\n' % op.opname)
911 alloc_func = 'expr_allocate_%s' % op.returns.name
913 out_file.write(' {\n')
915 out_file.write(' %s;\n' % decl)
916 out_file.write(' return %s (e, %s);\n' % (alloc_func, result))
917 out_file.write(' }\n')
919 out_file.write(' return %s (e, %s);\n' % (alloc_func, result))
923 def generate_parse_inc():
924 members = ['""', '""', '0', '0', '0', '{}', '0', '0']
925 out_file.write('{%s},\n' % ', '.join(members))
927 for type_ in types.values():
928 if type_.role != 'auxonly':
929 members = ('"%s"' % type_.name, '"%s"' % type_.human_name,
930 '0', 'OP_%s' % type_.name, '0', '{}', '0', '0')
931 out_file.write('{%s},\n' % ', '.join(members))
935 members += ['"%s"' % op.name]
937 prototype = op.prototype()
938 members += ['"%s"' % prototype if prototype else 'NULL']
940 members += [op.flags()]
942 members += ['OP_%s' % op.returns.name]
944 members += ['%s' % len(op.args)]
946 arg_types = ['OP_%s' % arg.type_.name for arg in op.args]
947 members += ['{%s}' % ', '.join(arg_types)]
949 members += ['%s' % op.min_valid]
951 members += ['%s' % (op.array_arg().times if op.array_arg() else 0)]
953 out_file.write('{%s},\n' % ', '.join(members))
958 %s, for generating expression parsers and evaluators from definitions
959 usage: generate.py -o OUTPUT [-i INPUT] [-h]
960 -i INPUT input file containing definitions (default: operations.def)
961 -o OUTPUT output file
962 -h display this help message
967 if __name__ == '__main__':
969 options, args = getopt.gnu_getopt(sys.argv[1:], 'hi:o:',
973 except getopt.GetoptError as geo:
974 die('%s: %s' % (argv0, geo.msg))
976 in_file_name = 'operations.def'
978 for key, value in options:
979 if key in ['-h', '--help']:
981 elif key in ['-i', '--input']:
983 elif key in ['-o', '--output']:
984 out_file_name = value
988 if out_file_name is None:
989 die('%s: output file must be specified '
990 '(use --help for help)' % argv0)
992 in_file = open(in_file_name, 'r')
993 out_file = open(out_file_name, 'w')
999 if out_file_name.endswith('evaluate.h'):
1000 generate_evaluate_h()
1001 elif out_file_name.endswith('evaluate.inc'):
1002 generate_evaluate_inc()
1003 elif out_file_name.endswith('operations.h'):
1004 generate_operations_h()
1005 elif out_file_name.endswith('optimize.inc'):
1006 generate_optimize_inc()
1007 elif out_file_name.endswith('parse.inc'):
1008 generate_parse_inc()
1010 die('%s: unknown output type' % argv0)