2 # PSPP - a program for statistical analysis.
3 # Copyright (C) 2017, 2021 Free Software Foundation, Inc.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
27 sys.stderr.write("%s\n" % s)
32 """Defines all our types.
34 Initializes 'types' global.
42 # Common user-visible types used throughout evaluation trees.
43 Type.new_any('number', 'double', 'number', 'n',
44 'number', 'ns', 'SYSMIS'),
45 Type.new_any('string', 'struct substring', 'string', 's',
46 'string', 'ss', 'empty_string'),
47 Type.new_any('boolean', 'double', 'number', 'n',
48 'boolean', 'ns', 'SYSMIS'),
49 Type.new_any('integer', 'int', 'number', 'n',
50 'integer', 'ns', 'SYSMIS'),
53 Type.new_atom('format'),
54 Type.new_leaf('ni_format', 'const struct fmt_spec *',
55 'format', 'f', 'num_input_format'),
56 Type.new_leaf('no_format', 'const struct fmt_spec *',
57 'format', 'f', 'num_output_format'),
60 Type.new_leaf('pos_int', 'int', 'integer', 'n',
61 'positive_integer_constant'),
64 Type.new_atom('variable'),
65 Type.new_leaf('num_var', 'const struct variable *',
66 'variable', 'Vn', 'num_variable'),
67 Type.new_leaf('str_var', 'const struct variable *',
68 'variable', 'Vs', 'string_variable'),
69 Type.new_leaf('var', 'const struct variable *',
70 'variable', 'V', 'variable'),
73 Type.new_leaf('vector', 'const struct vector *',
74 'vector', 'v', 'vector'),
76 # Types that appear only as auxiliary data.
77 Type.new_auxonly('expression', 'struct expression *', 'e'),
78 Type.new_auxonly('expr_node', 'const struct expr_node *', 'n'),
79 Type.new_auxonly('case', 'const struct ccase *', 'c'),
80 Type.new_auxonly('case_idx', 'size_t', 'case_idx'),
81 Type.new_auxonly('dataset', 'struct dataset *', 'ds'),
83 # One of these is emitted at the end of each expression as a
84 # sentinel that tells expr_evaluate() to return the value on
86 Type.new_atom('return_number'),
87 Type.new_atom('return_string'),
89 # Used only for debugging purposes.
90 Type.new_atom('operation'),
91 Type.new_atom('exprnode'),
97 def __init__(self, name, role, human_name, c_type=None):
100 self.human_name = human_name
102 if c_type.endswith('*'):
105 self.c_type = c_type + ' '
108 """Creates and returns a new atom Type with the given 'name'.
110 An atom isn't directly allowed as an operand or function
111 argument type. They are all exceptional cases in some way.
114 return Type(name, 'atom', name)
116 def new_any(name, c_type, atom, mangle, human_name, stack,
118 """Creates and returns a new Type that can appear in any context, that
119 is, it can be an operation's argument type or return type.
121 'c_type' is the type used for C objects of this type.
123 'mangle' should be a short string for name mangling purposes,
124 to allow overloading functions with the same name but
125 different argument types. Use the same 'mangle' for two
126 different types if those two types should not be overloaded.
128 'atom' should be the name of the member of "union
129 operation_data" that holds a value of this type.
131 'human_name' should be a name to use when describing this type
132 to the user (see Op.prototype()).
134 'stack' is the name of the local variable in expr_evaluate()
135 used for maintaining a stack of this type.
137 'missing_value' is the expression used for a missing value of
141 new = Type(name, 'any', human_name, c_type)
145 new.missing_value = missing_value
148 def new_leaf(name, c_type, atom, mangle, human_name):
149 """Creates and returns a new leaf Type. A leaf type can appear in
150 expressions as an operation's argument type, but not as a return type.
151 (Thus, it only appears in a parse tree as a leaf node.)
153 The other arguments are as for new_any().
155 new = Type(name, 'leaf', human_name, c_type)
160 def new_auxonly(name, c_type, auxonly_value):
161 """Creates and returns a new auxiliary-only Type. An auxiliary-only
162 Type is one that gets passed into the evaluation function but
163 isn't supplied directly by the user as an operand or argument.
165 'c_type' is as in new_any().
167 'auxonly_value' is the name of the local variable in
168 expr_evaluate() that has the value of this auxiliary data.
171 new = Type(name, 'auxonly', name, c_type)
172 new.auxonly_value = auxonly_value
176 """If the current token is an identifier that names a type, returns
177 the type and skips to the next token. Otherwise, returns
181 for type_ in types.values():
182 if type_.name == token:
188 class Category(enum.Enum):
189 FUNCTION = enum.auto()
190 OPERATOR = enum.auto()
194 def __init__(self, name, category, returns, args, aux, expression,
195 block, min_valid, optimizable, unimplemented,
196 extension, perm_only, absorb_miss, no_abbrev):
198 self.category = category
199 self.returns = returns
202 self.expression = expression
204 self.min_valid = min_valid
205 self.optimizable = optimizable
206 self.unimplemented = unimplemented
207 self.extension = extension
208 self.perm_only = perm_only
209 self.absorb_miss = absorb_miss
210 self.no_abbrev = no_abbrev
212 self.opname = ('OP_%s' % name).replace('.', '_')
213 if category == Category.FUNCTION:
214 self.opname += '_%s' % (''.join([a.type_.mangle for a in args]))
217 """If this operation has an array argument, returns it. Otherwise,
220 if self.args and self.args[-1].idx is not None:
225 def sysmis_decl(self, min_valid_src):
226 """Returns a declaration for a boolean variable called `force_sysmis',
227 which will be true when this operation should be
228 system-missing. Returns None if there are no such
231 If this operation has a minimum number of valid arguments,
232 'min_valid_src' should be an an expression that evaluates to
233 the minimum number of valid arguments for this operation.
237 if not self.absorb_miss:
238 for arg in self.args:
239 arg_name = 'arg_%s' % arg.name
241 if arg.type_.name in ['number', 'boolean']:
242 sysmis_cond += ['!is_valid (%s)' % arg_name]
243 elif arg.type_.name == 'number':
245 n = 'arg_%s' % arg.idx
246 sysmis_cond += ['count_valid (%s, %s) < %s' % (a, n, n)]
247 elif self.min_valid > 0:
250 a = 'arg_%s' % arg.name
251 n = 'arg_%s' % arg.idx
252 sysmis_cond += ['count_valid (%s, %s) < %s'
253 % (a, n, min_valid_src)]
254 for arg in self.args:
255 if arg.condition is not None:
256 sysmis_cond += ['!(%s)' % arg.condition]
258 return 'bool force_sysmis = %s' % ' || '.join(sysmis_cond)
262 """Composes and returns a string that describes the function in a way
263 suitable for a human to understand, something like a C
264 function prototype, e.g. "ABS(number)" or "ANY(number,
265 number[, number]...)".
267 This doesn't make sense for operators so this function just
268 returns None for them.
271 if self.category == Category.FUNCTION:
274 for arg in self.args:
276 args += [arg.type_.human_name]
278 array = self.array_arg()
279 if array is not None:
280 if self.min_valid == 0:
282 for i in range(array.times):
283 array_args += [array.type_.human_name]
285 opt_args = array_args
287 for i in range(self.min_valid):
288 args += [array.type_.human_name]
289 opt_args += [array.type_.human_name]
291 prototype = '%s(%s' % (self.name, ', '.join(args))
293 prototype += '[, %s]...' % ', '.join(opt_args)
300 """Returns the OPF_* flags that apply to 'self'."""
303 flags += ['OPF_ABSORB_MISS']
305 flags += ['OPF_ARRAY_OPERAND']
306 if self.min_valid > 0:
307 flags += ['OPF_MIN_VALID']
308 if not self.optimizable:
309 flags += ['OPF_NONOPTIMIZABLE']
311 flags += ['OPF_EXTENSION']
312 if self.unimplemented:
313 flags += ['OPF_UNIMPLEMENTED']
315 flags += ['OPF_PERM_ONLY']
317 flags += ['OPF_NO_ABBREV']
319 if aux['TYPE'].name == 'expr_node':
320 flags += ['OPF_EXPR_NODE']
322 return ' | '.join(flags) if flags else '0'
326 """Parses the entire input.
328 Initializes ops, funcs, opers."""
346 while toktype != 'eof':
348 unimplemented = False
354 if match('extension'):
356 elif match('no_opt'):
358 elif match('absorb_miss'):
360 elif match('perm_only'):
362 elif match('no_abbrev'):
367 return_type = Type.parse()
368 if return_type is None:
369 return_type = types['number']
370 if return_type.name not in ['number', 'string', 'boolean']:
371 die('%s is not a valid return type' % return_type.name)
373 if token == 'operator':
374 category = Category.OPERATOR
375 elif token == 'function':
376 category = Category.FUNCTION
378 die("'operator' or 'function' expected at '%s'" % token)
382 if category == Category.FUNCTION and '_' in name:
383 die("function name '%s' may not contain underscore" % name)
384 elif category == Category.OPERATOR and '.' in name:
385 die("operator name '%s' may not contain period" % name)
387 m = re.match(r'(.*)\.(\d+)$', name)
389 prefix, suffix = m.groups()
391 min_valid = int(suffix)
398 while not match(')'):
401 if arg.idx is not None:
404 die('array must be last argument')
410 if arg.condition is not None:
411 any_arg = '|'.join([a.name for a in args])
412 arg.condition = re.sub(r'\b(%s)\b' % any_arg,
413 r'arg_\1', arg.condition)
416 while toktype == 'id':
420 if type_.role not in ['leaf', 'auxonly']:
421 die("'%s' is not allowed as auxiliary data" % type_.name)
422 aux_name = force('id')
423 aux += [{'TYPE': type_, 'NAME': aux_name}]
427 if name.startswith('RV.'):
428 die("random variate functions must be marked 'no_opt'")
429 for key in ['CASE', 'CASE_IDX']:
431 die("operators with %s aux data must be marked 'no_opt'"
434 if return_type.name == 'string' and not absorb_miss:
436 if arg.type_.name in ['number', 'boolean']:
437 die("'%s' returns string and has double or bool "
438 "argument, but is not marked ABSORB_MISS" % name)
439 if arg.condition is not None:
440 die("'%s' returns string but has "
441 "argument with condition")
443 if toktype == 'block':
444 block = force('block')
446 elif toktype == 'expression':
447 if token == 'unimplemented':
454 die('block or expression expected')
456 op = Op(name, category,
457 return_type, args, aux,
460 optimizable, unimplemented, extension, perm_only, absorb_miss,
466 die("can't have minimum valid count without array arg")
467 if aa.type_.name != 'number':
468 die('minimum valid count allowed only with double array')
470 die("can't have minimum valid count if "
471 "array has multiplication factor")
474 die("duplicate operation name '%s'" % op.opname)
476 if category == Category.FUNCTION:
483 funcs = sorted(funcs, key=lambda f: (f.name, f.opname))
484 opers = sorted(opers, key=lambda o: o.name)
485 order = funcs + opers
489 """Reads the next token into 'token' and 'toktype'."""
499 m = re.match(r'([a-zA-Z_][a-zA-Z_.0-9]*)(.*)$', line)
501 token, line = m.groups()
505 m = re.match(r'([0-9]+)(.*)$', line)
507 token, line = m.groups()
512 m = re.match(r'([][(),*;.])(.*)$', line)
514 token, line = m.groups()
518 m = re.match(r'=\s*(.*)$', line)
520 toktype = 'expression'
522 token = accumulate_balanced(';')
525 m = re.match(r'{(.*)$', line)
529 token = accumulate_balanced('}')
530 token = token.rstrip('\n')
533 die("bad character '%s' in input" % line[0])
537 """Skip whitespace."""
540 die('unexpected end of file')
555 def accumulate_balanced(end, swallow_end=True):
556 """Accumulates input until a character in 'end' is encountered,
557 except that balanced pairs of (), [], or {} cause 'end' to be
558 ignored. Returns the input read.
564 for idx, c in enumerate(line):
565 if c in end and nest == 0:
577 die('unbalanced parentheses')
584 """Reads the next line from INPUT into 'line'."""
587 line = in_file.readline()
592 line = line.rstrip('\r\n')
593 comment_ofs = line.find('//')
595 line = line[:comment_ofs]
599 """Makes sure that 'toktype' equals 'type', reads the next token, and
600 returns the previous 'token'.
604 die("parse error at `%s' expecting %s" % (token, type_))
611 """If 'token' equals 'tok', reads the next token and returns true.
612 Otherwise, returns false."""
620 def force_match(tok):
621 """If 'token' equals 'tok', reads the next token. Otherwise, flags an
625 die("parse error at `%s' expecting `%s'" % (token, tok))
629 def __init__(self, name, type_, idx, times, condition):
634 self.condition = condition
637 """Parses and returns a function argument."""
640 type_ = types['number']
643 die("argument name expected at `%s'" % token)
655 if type_.name not in ('number', 'string'):
656 die('only double and string arrays supported')
661 die('multiplication factor must be two')
665 condition = name + ' '
666 condition += accumulate_balanced(',)', swallow_end=False)
669 return Arg(name, type_, idx, times, condition)
673 """Prints the output file header."""
676 Generated from %s by generate.py.
679 """ % (out_file_name, in_file_name))
683 """Prints the output file trailer."""
695 def generate_evaluate_h():
696 out_file.write('#include "helpers.h"\n\n')
705 args += [arg.type_.c_type + arg.name]
707 args += [arg.type_.c_type + arg.name + '[]']
708 args += ['size_t %s' % arg.idx]
710 args += [aux['TYPE'].c_type + aux['NAME']]
715 statements = op.block + '\n'
717 statements = ' return %s;\n' % op.expression
719 out_file.write('static inline %s\n' % op.returns.c_type)
720 out_file.write('eval_%s (%s)\n' % (op.opname, ', '.join(args)))
721 out_file.write('{\n')
722 out_file.write(statements)
723 out_file.write('}\n\n')
726 def generate_evaluate_inc():
729 out_file.write('case %s:\n' % op.opname)
730 out_file.write(' NOT_REACHED ();\n\n')
737 c_type = type_.c_type
738 args += ['arg_%s' % arg.name]
740 decl = '%sarg_%s' % (c_type, arg.name)
741 if type_.role == 'any':
742 decls = ['%s = *--%s' % (decl, type_.stack)] + decls
743 elif type_.role == 'leaf':
744 decls += ['%s = op++->%s' % (decl, type_.atom)]
749 decls = ['%s*arg_%s = %s -= arg_%s'
750 % (c_type, arg.name, type_.stack, idx)] + decls
751 decls = ['size_t arg_%s = op++->integer' % idx] + decls
755 idx += ' / %s' % arg.times
760 if type_.role == 'leaf':
761 decls += ['%saux_%s = op++->%s'
762 % (type_.c_type, name, type_.atom)]
763 args += ['aux_%s' % name]
764 elif type_.name == 'expr_node':
765 decls += ['%saux_%s = op++->node'
766 % (type_.c_type, name)]
767 args += ['aux_%s' % name]
768 elif type_.role == 'auxonly':
769 args += [type_.auxonly_value]
771 sysmis_cond = op.sysmis_decl('op++->integer')
772 if sysmis_cond is not None:
773 decls += [sysmis_cond]
775 result = 'eval_%s (%s)' % (op.opname, ', '.join(args))
777 stack = op.returns.stack
779 out_file.write('case %s:\n' % op.opname)
781 out_file.write(' {\n')
783 out_file.write(' %s;\n' % decl)
784 if sysmis_cond is not None:
785 miss_ret = op.returns.missing_value
786 out_file.write(' *%s++ = force_sysmis ? %s : %s;\n'
787 % (stack, miss_ret, result))
789 out_file.write(' *%s++ = %s;\n' % (stack, result))
790 out_file.write(' }\n')
792 out_file.write(' *%s++ = %s;\n' % (stack, result))
793 out_file.write(' break;\n\n')
796 def generate_operations_h():
797 out_file.write('#include <stdlib.h>\n')
798 out_file.write('#include <stdbool.h>\n\n')
800 out_file.write('typedef enum')
801 out_file.write(' {\n')
803 for type_ in types.values():
804 if type_.role != 'auxonly':
805 atoms += ['OP_%s' % type_.name]
807 print_operations('atom', 1, atoms)
808 print_operations('function', 'OP_atom_last + 1',
809 [f.opname for f in funcs])
810 print_operations('operator', 'OP_function_last + 1',
811 [o.opname for o in opers])
812 print_range('OP_composite', 'OP_function_first', 'OP_operator_last')
813 out_file.write(',\n\n')
814 print_range('OP', 'OP_atom_first', 'OP_composite_last')
815 out_file.write('\n }\n')
816 out_file.write('operation_type, atom_type;\n')
818 print_predicate('is_operation', 'OP')
819 for key in ('atom', 'composite', 'function', 'operator'):
820 print_predicate('is_%s' % key, 'OP_%s' % key)
823 def print_operations(type_, first, names):
824 out_file.write(' /* %s types. */\n' % type_.title())
825 out_file.write(' %s = %s,\n' % (names[0], first))
826 for name in names[1:]:
827 out_file.write(' %s,\n' % name)
828 print_range('OP_%s' % type_, names[0], names[-1])
829 out_file.write(',\n\n')
832 def print_range(prefix, first, last):
833 out_file.write(' %s_first = %s,\n' % (prefix, first))
834 out_file.write(' %s_last = %s,\n' % (prefix, last))
835 out_file.write(' n_%s = %s_last - %s_first + 1'
836 % (prefix, prefix, prefix))
839 def print_predicate(function, category):
840 out_file.write('\nstatic inline bool\n')
841 out_file.write('%s (operation_type op)\n' % function)
842 out_file.write('{\n')
843 if function != 'is_operation':
844 out_file.write(' assert (is_operation (op));\n')
845 out_file.write(' return op >= %s_first && op <= %s_last;\n'
846 % (category, category))
847 out_file.write('}\n')
850 def generate_optimize_inc():
852 if not op.optimizable or op.unimplemented:
853 out_file.write('case %s:\n' % op.opname)
854 out_file.write(' NOT_REACHED ();\n\n')
862 c_type = type_.c_type
864 func = 'get_%s_arg' % type_.atom
865 decls += ['%sarg_%s = %s (node, %s)'
866 % (c_type, name, func, arg_idx)]
868 decl = 'size_t arg_%s = node->n_args' % arg.idx
870 decl += ' - %s' % arg_idx
873 decls += ['%s*arg_%s = get_%s_args '
874 '(node, %s, arg_%s, e)'
875 % (c_type, name, type_.atom, arg_idx, arg.idx)]
878 sysmis_cond = op.sysmis_decl('node->min_valid')
879 if sysmis_cond is not None:
880 decls += [sysmis_cond]
884 args += ['arg_%s' % arg.name]
885 if arg.idx is not None:
886 idx = 'arg_%s' % arg.idx
888 idx += ' / %s' % arg.times
893 if type_.role == 'leaf':
894 func = 'get_%s_arg' % type_.atom
895 args += '%s (node, %s)' % (func, arg_idx)
897 elif type_.name == 'expr_node':
899 elif type_.role == 'auxonly':
900 args += [type_.auxonly_value]
904 result = 'eval_%s (%s)' % (op.opname, ', '.join(args))
905 if decls and sysmis_cond is not None:
906 miss_ret = op.returns.missing_value
907 decls += ['%sresult = force_sysmis ? %s : %s'
908 % (op.returns.c_type, miss_ret, result)]
911 out_file.write('case %s:\n' % op.opname)
912 alloc_func = 'expr_allocate_%s' % op.returns.name
914 out_file.write(' {\n')
916 out_file.write(' %s;\n' % decl)
917 out_file.write(' return %s (e, %s);\n' % (alloc_func, result))
918 out_file.write(' }\n')
920 out_file.write(' return %s (e, %s);\n' % (alloc_func, result))
924 def generate_parse_inc():
925 members = ['""', '""', '0', '0', '0', '{}', '0', '0']
926 out_file.write('{%s},\n' % ', '.join(members))
928 for type_ in types.values():
929 if type_.role != 'auxonly':
930 members = ('"%s"' % type_.name, '"%s"' % type_.human_name,
931 '0', 'OP_%s' % type_.name, '0', '{}', '0', '0')
932 out_file.write('{%s},\n' % ', '.join(members))
936 members += ['"%s"' % op.name]
938 prototype = op.prototype()
939 members += ['"%s"' % prototype if prototype else 'NULL']
941 members += [op.flags()]
943 members += ['OP_%s' % op.returns.name]
945 members += ['%s' % len(op.args)]
947 arg_types = ['OP_%s' % arg.type_.name for arg in op.args]
948 members += ['{%s}' % ', '.join(arg_types)]
950 members += ['%s' % op.min_valid]
952 members += ['%s' % (op.array_arg().times if op.array_arg() else 0)]
954 out_file.write('{%s},\n' % ', '.join(members))
959 %s, for generating expression parsers and evaluators from definitions
960 usage: generate.py -o OUTPUT [-i INPUT] [-h]
961 -i INPUT input file containing definitions (default: operations.def)
962 -o OUTPUT output file
963 -h display this help message
968 if __name__ == '__main__':
970 options, args = getopt.gnu_getopt(sys.argv[1:], 'hi:o:',
974 except getopt.GetoptError as geo:
975 die('%s: %s' % (argv0, geo.msg))
977 in_file_name = 'operations.def'
979 for key, value in options:
980 if key in ['-h', '--help']:
982 elif key in ['-i', '--input']:
984 elif key in ['-o', '--output']:
985 out_file_name = value
989 if out_file_name is None:
990 die('%s: output file must be specified '
991 '(use --help for help)' % argv0)
993 in_file = open(in_file_name, 'r')
994 out_file = open(out_file_name, 'w')
1000 if out_file_name.endswith('evaluate.h'):
1001 generate_evaluate_h()
1002 elif out_file_name.endswith('evaluate.inc'):
1003 generate_evaluate_inc()
1004 elif out_file_name.endswith('operations.h'):
1005 generate_operations_h()
1006 elif out_file_name.endswith('optimize.inc'):
1007 generate_optimize_inc()
1008 elif out_file_name.endswith('parse.inc'):
1009 generate_parse_inc()
1011 die('%s: unknown output type' % argv0)