2 # PSPP - a program for statistical analysis.
3 # Copyright (C) 2017, 2021 Free Software Foundation, Inc.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
27 sys.stderr.write("%s\n" % s)
32 """Defines all our types.
34 Initializes 'types' global.
42 # Common user-visible types used throughout evaluation trees.
43 Type.new_any('number', 'double', 'number', 'n',
44 'number', 'ns', 'SYSMIS'),
45 Type.new_any('string', 'struct substring', 'string', 's',
46 'string', 'ss', 'empty_string'),
47 Type.new_any('boolean', 'double', 'number', 'n',
48 'boolean', 'ns', 'SYSMIS'),
49 Type.new_any('integer', 'int', 'number', 'n',
50 'integer', 'ns', 'SYSMIS'),
53 Type.new_atom('format'),
54 Type.new_leaf('ni_format', 'struct fmt_spec',
55 'format', 'f', 'num_input_format'),
56 Type.new_leaf('no_format', 'struct fmt_spec',
57 'format', 'f', 'num_output_format'),
60 Type.new_leaf('pos_int', 'int', 'integer', 'n',
61 'positive_integer_constant'),
64 Type.new_atom('variable'),
65 Type.new_leaf('num_var', 'const struct variable *',
66 'variable', 'Vn', 'num_variable'),
67 Type.new_leaf('str_var', 'const struct variable *',
68 'variable', 'Vs', 'string_variable'),
69 Type.new_leaf('var', 'const struct variable *',
70 'variable', 'V', 'variable'),
73 Type.new_leaf('vector', 'const struct vector *',
74 'vector', 'v', 'vector'),
75 Type.new_any('num_vec_elem', 'double', 'number', 'n',
76 'number', 'ns', 'SYSMIS'),
78 # Types as leaves or auxiliary data.
79 Type.new_leaf('expr_node', 'const struct expr_node *',
80 'expr_node', 'e', 'expr_node'),
82 # Types that appear only as auxiliary data.
83 Type.new_auxonly('expression', 'struct expression *', 'e'),
84 Type.new_auxonly('case', 'const struct ccase *', 'c'),
85 Type.new_auxonly('case_idx', 'size_t', 'case_idx'),
86 Type.new_auxonly('dataset', 'struct dataset *', 'ds'),
88 # One of these is emitted at the end of each expression as a
89 # sentinel that tells expr_evaluate() to return the value on
91 Type.new_atom('return_number'),
92 Type.new_atom('return_string'),
94 # Used only for debugging purposes.
95 Type.new_atom('operation'),
101 def __init__(self, name, role, human_name, c_type=None):
104 self.human_name = human_name
106 if c_type.endswith('*'):
109 self.c_type = c_type + ' '
112 """Creates and returns a new atom Type with the given 'name'.
114 An atom isn't directly allowed as an operand or function
115 argument type. They are all exceptional cases in some way.
118 return Type(name, 'atom', name)
120 def new_any(name, c_type, atom, mangle, human_name, stack,
122 """Creates and returns a new Type that can appear in any context, that
123 is, it can be an operation's argument type or return type.
125 'c_type' is the type used for C objects of this type.
127 'atom' should be the name of the member of "union
128 operation_data" that holds a value of this type.
130 'mangle' should be a short string for name mangling purposes,
131 to allow overloading functions with the same name but
132 different argument types. Use the same 'mangle' for two
133 different types if those two types should not be overloaded.
135 'human_name' should be a name to use when describing this type
136 to the user (see Op.prototype()).
138 'stack' is the name of the local variable in expr_evaluate()
139 used for maintaining a stack of this type.
141 'missing_value' is the expression used for a missing value of
145 new = Type(name, 'any', human_name, c_type)
149 new.missing_value = missing_value
152 def new_leaf(name, c_type, atom, mangle, human_name):
153 """Creates and returns a new leaf Type. A leaf type can appear in
154 expressions as an operation's argument type, but not as a return type.
155 (Thus, it only appears in a parse tree as a leaf node.)
157 The other arguments are as for new_any().
159 new = Type(name, 'leaf', human_name, c_type)
164 def new_auxonly(name, c_type, auxonly_value):
165 """Creates and returns a new auxiliary-only Type. An auxiliary-only
166 Type is one that gets passed into the evaluation function but
167 isn't supplied directly by the user as an operand or argument.
169 'c_type' is as in new_any().
171 'auxonly_value' is the name of the local variable in
172 expr_evaluate() that has the value of this auxiliary data.
175 new = Type(name, 'auxonly', name, c_type)
176 new.auxonly_value = auxonly_value
180 """If the current token is an identifier that names a type, returns
181 the type and skips to the next token. Otherwise, returns
185 for type_ in types.values():
186 if type_.name == token:
192 class Category(enum.Enum):
193 FUNCTION = enum.auto()
194 OPERATOR = enum.auto()
198 def __init__(self, name, category, returns, args, aux, expression,
199 block, min_valid, optimizable, unimplemented,
200 extension, perm_only, absorb_miss, no_abbrev):
202 self.category = category
203 self.returns = returns
206 self.expression = expression
208 self.min_valid = min_valid
209 self.optimizable = optimizable
210 self.unimplemented = unimplemented
211 self.extension = extension
212 self.perm_only = perm_only
213 self.absorb_miss = absorb_miss
214 self.no_abbrev = no_abbrev
216 self.opname = ('OP_%s' % name).replace('.', '_')
217 if category == Category.FUNCTION:
218 self.opname += '_%s' % (''.join([a.type_.mangle for a in args]))
221 """If this operation has an array argument, returns it. Otherwise,
224 if self.args and self.args[-1].idx is not None:
229 def sysmis_decl(self, min_valid_src):
230 """Returns a declaration for a boolean variable called `force_sysmis',
231 which will be true when this operation should be
232 system-missing. Returns None if there are no such
235 If this operation has a minimum number of valid arguments,
236 'min_valid_src' should be an expression that evaluates to
237 the minimum number of valid arguments for this operation.
241 if not self.absorb_miss:
242 for arg in self.args:
243 arg_name = 'arg_%s' % arg.name
245 if arg.type_.name in ['number', 'boolean', 'integer']:
246 sysmis_cond += ['!is_valid (%s)' % arg_name]
247 elif arg.type_.name == 'number':
249 n = 'arg_%s' % arg.idx
250 sysmis_cond += ['count_valid (%s, %s) < %s' % (a, n, n)]
251 elif self.min_valid > 0:
254 a = 'arg_%s' % arg.name
255 n = 'arg_%s' % arg.idx
256 sysmis_cond += ['count_valid (%s, %s) < %s'
257 % (a, n, min_valid_src)]
258 for arg in self.args:
259 if arg.condition is not None:
260 sysmis_cond += ['!(%s)' % arg.condition]
262 return 'bool force_sysmis = %s' % ' || '.join(sysmis_cond)
266 """Composes and returns a string that describes the function in a way
267 suitable for a human to understand, something like a C
268 function prototype, e.g. "ABS(number)" or "ANY(number,
269 number[, number]...)".
271 This doesn't make sense for operators so this function just
272 returns None for them.
275 if self.category == Category.FUNCTION:
278 for arg in self.args:
280 args += [arg.type_.human_name]
282 array = self.array_arg()
283 if array is not None:
284 if self.min_valid == 0:
286 for i in range(array.times):
287 array_args += [array.type_.human_name]
289 opt_args = array_args
291 for i in range(self.min_valid):
292 args += [array.type_.human_name]
293 opt_args += [array.type_.human_name]
295 prototype = '%s(%s' % (self.name, ', '.join(args))
297 prototype += '[, %s]...' % ', '.join(opt_args)
304 """Returns the OPF_* flags that apply to 'self'."""
307 flags += ['OPF_ABSORB_MISS']
309 flags += ['OPF_ARRAY_OPERAND']
310 if self.min_valid > 0:
311 flags += ['OPF_MIN_VALID']
312 if not self.optimizable:
313 flags += ['OPF_NONOPTIMIZABLE']
315 flags += ['OPF_EXTENSION']
316 if self.unimplemented:
317 flags += ['OPF_UNIMPLEMENTED']
319 flags += ['OPF_PERM_ONLY']
321 flags += ['OPF_NO_ABBREV']
323 if aux['TYPE'].name == 'expr_node':
324 flags += ['OPF_EXPR_NODE']
326 return ' | '.join(flags) if flags else '0'
330 """Parses the entire input.
332 Initializes ops, funcs, opers."""
350 while toktype != 'eof':
352 unimplemented = False
358 if match('extension'):
360 elif match('no_opt'):
362 elif match('absorb_miss'):
364 elif match('perm_only'):
366 elif match('no_abbrev'):
371 return_type = Type.parse()
372 if return_type is None:
373 return_type = types['number']
374 if return_type.name not in ['number', 'string', 'boolean', 'num_vec_elem']:
375 die('%s is not a valid return type' % return_type.name)
377 if token == 'operator':
378 category = Category.OPERATOR
379 elif token == 'function':
380 category = Category.FUNCTION
382 die("'operator' or 'function' expected at '%s'" % token)
386 if category == Category.FUNCTION and '_' in name:
387 die("function name '%s' may not contain underscore" % name)
388 elif category == Category.OPERATOR and '.' in name:
389 die("operator name '%s' may not contain period" % name)
391 m = re.match(r'(.*)\.(\d+)$', name)
393 prefix, suffix = m.groups()
395 min_valid = int(suffix)
402 while not match(')'):
405 if arg.idx is not None:
408 die('array must be last argument')
414 if arg.condition is not None:
415 any_arg = '|'.join([a.name for a in args])
416 arg.condition = re.sub(r'\b(%s)\b' % any_arg,
417 r'arg_\1', arg.condition)
420 while toktype == 'id':
424 if type_.role not in ['leaf', 'auxonly']:
425 die("'%s' is not allowed as auxiliary data" % type_.name)
426 aux_name = force('id')
427 aux += [{'TYPE': type_, 'NAME': aux_name}]
431 if name.startswith('RV.'):
432 die("random variate functions must be marked 'no_opt'")
433 for key in ['CASE', 'CASE_IDX']:
435 die("operators with %s aux data must be marked 'no_opt'"
438 if return_type.name == 'string' and not absorb_miss:
440 if arg.type_.name in ['number', 'boolean']:
441 die("'%s' returns string and has double or bool "
442 "argument, but is not marked ABSORB_MISS" % name)
443 if arg.condition is not None:
444 die("'%s' returns string but has "
445 "argument with condition")
447 if toktype == 'block':
448 block = force('block')
450 elif toktype == 'expression':
451 if token == 'unimplemented':
458 die('block or expression expected')
460 op = Op(name, category,
461 return_type, args, aux,
464 optimizable, unimplemented, extension, perm_only, absorb_miss,
470 die("can't have minimum valid count without array arg")
471 if aa.type_.name != 'number':
472 die('minimum valid count allowed only with double array')
474 die("can't have minimum valid count if "
475 "array has multiplication factor")
478 die("duplicate operation name '%s'" % op.opname)
480 if category == Category.FUNCTION:
487 funcs = sorted(funcs, key=lambda f: (f.name, f.opname))
488 opers = sorted(opers, key=lambda o: o.name)
489 order = funcs + opers
493 """Reads the next token into 'token' and 'toktype'."""
503 m = re.match(r'([a-zA-Z_][a-zA-Z_.0-9]*)(.*)$', line)
505 token, line = m.groups()
509 m = re.match(r'([0-9]+)(.*)$', line)
511 token, line = m.groups()
516 m = re.match(r'([][(),*;.])(.*)$', line)
518 token, line = m.groups()
522 m = re.match(r'=\s*(.*)$', line)
524 toktype = 'expression'
526 token = accumulate_balanced(';')
529 m = re.match(r'{(.*)$', line)
533 token = accumulate_balanced('}')
534 token = token.rstrip('\n')
537 die("bad character '%s' in input" % line[0])
541 """Skip whitespace."""
544 die('unexpected end of file')
559 def accumulate_balanced(end, swallow_end=True):
560 """Accumulates input until a character in 'end' is encountered,
561 except that balanced pairs of (), [], or {} cause 'end' to be
562 ignored. Returns the input read.
568 for idx, c in enumerate(line):
569 if c in end and nest == 0:
581 die('unbalanced parentheses')
588 """Reads the next line from INPUT into 'line'."""
591 line = in_file.readline()
596 line = line.rstrip('\r\n')
597 comment_ofs = line.find('//')
599 line = line[:comment_ofs]
603 """Makes sure that 'toktype' equals 'type', reads the next token, and
604 returns the previous 'token'.
608 die("parse error at `%s' expecting %s" % (token, type_))
615 """If 'token' equals 'tok', reads the next token and returns true.
616 Otherwise, returns false."""
624 def force_match(tok):
625 """If 'token' equals 'tok', reads the next token. Otherwise, flags an
629 die("parse error at `%s' expecting `%s'" % (token, tok))
633 def __init__(self, name, type_, idx, times, condition):
638 self.condition = condition
641 """Parses and returns a function argument."""
644 type_ = types['number']
647 die("argument name expected at `%s'" % token)
659 if type_.name not in ('number', 'string'):
660 die('only double and string arrays supported')
665 die('multiplication factor must be two')
669 condition = name + ' '
670 condition += accumulate_balanced(',)', swallow_end=False)
673 return Arg(name, type_, idx, times, condition)
677 """Prints the output file header."""
678 sys.stdout.write("""\
679 /* Generated by generate.py. Do not modify! */
684 """Prints the output file trailer."""
685 sys.stdout.write("""\
696 def generate_evaluate_h():
697 sys.stdout.write('#include "helpers.h"\n\n')
706 args += [arg.type_.c_type + arg.name]
708 args += [arg.type_.c_type + arg.name + '[]']
709 args += ['size_t %s' % arg.idx]
711 args += [aux['TYPE'].c_type + aux['NAME']]
716 statements = op.block + '\n'
718 statements = ' return %s;\n' % op.expression
720 sys.stdout.write('static inline %s\n' % op.returns.c_type)
721 sys.stdout.write('eval_%s (%s)\n' % (op.opname, ', '.join(args)))
722 sys.stdout.write('{\n')
723 sys.stdout.write(statements)
724 sys.stdout.write('}\n\n')
727 def generate_evaluate_inc():
730 sys.stdout.write('case %s:\n' % op.opname)
731 sys.stdout.write(' NOT_REACHED ();\n\n')
738 if type_.c_type == 'int ':
741 args += ['arg_%s == SYSMIS ? INT_MIN : arg_%s'
742 % (arg.name, arg.name)]
744 args += ['arg_%s' % arg.name]
746 c_type = type_.c_type
747 args += ['arg_%s' % arg.name]
749 decl = '%sarg_%s' % (c_type, arg.name)
750 if type_.role == 'any':
751 decls = ['%s = *--%s' % (decl, type_.stack)] + decls
752 elif type_.role == 'leaf':
753 decls += ['%s = op++->%s' % (decl, type_.atom)]
758 decls = ['%s*arg_%s = %s -= arg_%s'
759 % (c_type, arg.name, type_.stack, idx)] + decls
760 decls = ['size_t arg_%s = op++->integer' % idx] + decls
764 idx += ' / %s' % arg.times
769 if type_.role == 'leaf':
770 decls += ['%saux_%s = op++->%s'
771 % (type_.c_type, name, type_.atom)]
772 args += ['aux_%s' % name]
773 elif type_.name == 'expr_node':
774 decls += ['%saux_%s = op++->node'
775 % (type_.c_type, name)]
776 args += ['aux_%s' % name]
777 elif type_.role == 'auxonly':
778 args += [type_.auxonly_value]
780 sysmis_cond = op.sysmis_decl('op++->integer')
781 if sysmis_cond is not None:
782 decls += [sysmis_cond]
784 result = 'eval_%s (%s)' % (op.opname, ', '.join(args))
786 stack = op.returns.stack
788 sys.stdout.write('case %s:\n' % op.opname)
790 sys.stdout.write(' {\n')
792 sys.stdout.write(' %s;\n' % decl)
793 if sysmis_cond is not None:
794 miss_ret = op.returns.missing_value
795 sys.stdout.write(' *%s++ = force_sysmis ? %s : %s;\n'
796 % (stack, miss_ret, result))
798 sys.stdout.write(' *%s++ = %s;\n' % (stack, result))
799 sys.stdout.write(' }\n')
801 sys.stdout.write(' *%s++ = %s;\n' % (stack, result))
802 sys.stdout.write(' break;\n\n')
805 def generate_operations_h():
806 sys.stdout.write('#include <stdlib.h>\n')
807 sys.stdout.write('#include <stdbool.h>\n\n')
809 sys.stdout.write('typedef enum')
810 sys.stdout.write(' {\n')
812 for type_ in types.values():
813 if type_.role != 'auxonly':
814 atoms += ['OP_%s' % type_.name]
816 print_operations('atom', 1, atoms)
817 print_operations('function', 'OP_atom_last + 1',
818 [f.opname for f in funcs])
819 print_operations('operator', 'OP_function_last + 1',
820 [o.opname for o in opers])
821 print_range('OP_composite', 'OP_function_first', 'OP_operator_last')
822 sys.stdout.write(',\n\n')
823 print_range('OP', 'OP_atom_first', 'OP_composite_last')
824 sys.stdout.write('\n }\n')
825 sys.stdout.write('operation_type, atom_type;\n')
827 print_predicate('is_operation', 'OP')
828 for key in ('atom', 'composite', 'function', 'operator'):
829 print_predicate('is_%s' % key, 'OP_%s' % key)
832 def print_operations(type_, first, names):
833 sys.stdout.write(' /* %s types. */\n' % type_.title())
834 sys.stdout.write(' %s = %s,\n' % (names[0], first))
835 for name in names[1:]:
836 sys.stdout.write(' %s,\n' % name)
837 print_range('OP_%s' % type_, names[0], names[-1])
838 sys.stdout.write(',\n\n')
841 def print_range(prefix, first, last):
842 sys.stdout.write(' %s_first = %s,\n' % (prefix, first))
843 sys.stdout.write(' %s_last = %s,\n' % (prefix, last))
844 sys.stdout.write(' n_%s = %s_last - %s_first + 1'
845 % (prefix, prefix, prefix))
848 def print_predicate(function, category):
849 sys.stdout.write('\nstatic inline bool\n')
850 sys.stdout.write('%s (operation_type op)\n' % function)
851 sys.stdout.write('{\n')
852 if function != 'is_operation':
853 sys.stdout.write(' assert (is_operation (op));\n')
854 sys.stdout.write(' return op >= %s_first && op <= %s_last;\n'
855 % (category, category))
856 sys.stdout.write('}\n')
859 def generate_optimize_inc():
861 if not op.optimizable or op.unimplemented:
862 sys.stdout.write('case %s:\n' % op.opname)
863 sys.stdout.write(' NOT_REACHED ();\n\n')
871 c_type = type_.c_type
873 func = ('get_integer_arg' if type_.name == 'integer'
874 else 'get_%s_arg' % type_.atom)
875 decls += ['%sarg_%s = %s (node, %s)'
876 % (c_type, name, func, arg_idx)]
878 decl = 'size_t arg_%s = node->n_args' % arg.idx
880 decl += ' - %s' % arg_idx
883 decls += ['%s*arg_%s = get_%s_args '
884 '(node, %s, arg_%s, e)'
885 % (c_type, name, type_.atom, arg_idx, arg.idx)]
888 sysmis_cond = op.sysmis_decl('node->min_valid')
889 if sysmis_cond is not None:
890 decls += [sysmis_cond]
894 args += ['arg_%s' % arg.name]
895 if arg.idx is not None:
896 idx = 'arg_%s' % arg.idx
898 idx += ' / %s' % arg.times
903 if type_.role == 'leaf':
904 assert type_.name == 'expr_node'
906 elif type_.role == 'auxonly':
907 args += [type_.auxonly_value]
911 result = 'eval_%s (%s)' % (op.opname, ', '.join(args))
912 if decls and sysmis_cond is not None:
913 miss_ret = op.returns.missing_value
914 decls += ['%sresult = force_sysmis ? %s : %s'
915 % (op.returns.c_type, miss_ret, result)]
918 sys.stdout.write('case %s:\n' % op.opname)
919 alloc_func = 'expr_allocate_%s' % op.returns.name
921 sys.stdout.write(' {\n')
923 sys.stdout.write(' %s;\n' % decl)
924 sys.stdout.write(' return %s (e, %s);\n' % (alloc_func, result))
925 sys.stdout.write(' }\n')
927 sys.stdout.write(' return %s (e, %s);\n' % (alloc_func, result))
928 sys.stdout.write('\n')
931 def generate_parse_inc():
932 members = ['""', '""', '0', '0', '0', '{}', '0', '0']
933 sys.stdout.write('{%s},\n' % ', '.join(members))
935 for type_ in types.values():
936 if type_.role != 'auxonly':
937 members = ('"%s"' % type_.name, '"%s"' % type_.human_name,
938 '0', 'OP_%s' % type_.name, '0', '{}', '0', '0')
939 sys.stdout.write('{%s},\n' % ', '.join(members))
943 members += ['"%s"' % op.name]
945 prototype = op.prototype()
946 members += ['"%s"' % prototype if prototype else 'NULL']
948 members += [op.flags()]
950 members += ['OP_%s' % op.returns.name]
952 members += ['%s' % len(op.args)]
954 arg_types = ['OP_%s' % arg.type_.name for arg in op.args]
955 members += ['{%s}' % ', '.join(arg_types)]
957 members += ['%s' % op.min_valid]
959 members += ['%s' % (op.array_arg().times if op.array_arg() else 0)]
961 sys.stdout.write('{%s},\n' % ', '.join(members))
966 %s, for generating expression parsers and evaluators from definitions
967 usage: generate.py -o OUTPUT_TYPE [-i INPUT] [-h] > OUTPUT
968 -i INPUT input file containing definitions (default: operations.def)
969 -o OUTPUT output file type, one of: evaluate.h, evaluate.inc,
970 operations.h, optimize.inc, parse.inc
971 -h display this help message
976 if __name__ == '__main__':
978 options, args = getopt.gnu_getopt(sys.argv[1:], 'hi:o:',
982 except getopt.GetoptError as geo:
983 die('%s: %s' % (argv0, geo.msg))
985 in_file_name = 'operations.def'
987 for key, value in options:
988 if key in ['-h', '--help']:
990 elif key in ['-i', '--input']:
992 elif key in ['-o', '--output']:
993 out_file_name = value
997 if out_file_name is None:
998 die('%s: output file must be specified '
999 '(use --help for help)' % argv0)
1001 in_file = open(in_file_name, 'r')
1007 if out_file_name == 'evaluate.h':
1008 generate_evaluate_h()
1009 elif out_file_name == 'evaluate.inc':
1010 generate_evaluate_inc()
1011 elif out_file_name == 'operations.h':
1012 generate_operations_h()
1013 elif out_file_name == 'optimize.inc':
1014 generate_optimize_inc()
1015 elif out_file_name == 'parse.inc':
1016 generate_parse_inc()
1018 die('%s: unknown output type' % argv0)