From 6640b245f84dfcf03f8109dc99b3c7b73f8831d1 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 13 Dec 2021 21:06:23 -0800 Subject: [PATCH] expressions: Rewrite code generator in Python. This doesn't change the code that's generated at all. It's preparation for doing that later. --- src/language/expressions/automake.mk | 4 +- src/language/expressions/generate.pl | 987 ------------------------- src/language/expressions/generate.py | 1000 ++++++++++++++++++++++++++ 3 files changed, 1002 insertions(+), 989 deletions(-) delete mode 100644 src/language/expressions/generate.pl create mode 100644 src/language/expressions/generate.py diff --git a/src/language/expressions/automake.mk b/src/language/expressions/automake.mk index 4d2b4b70cc..0ff62645c6 100644 --- a/src/language/expressions/automake.mk +++ b/src/language/expressions/automake.mk @@ -35,13 +35,13 @@ expressions_built_sources = \ BUILT_SOURCES += $(expressions_built_sources) CLEANFILES += $(expressions_built_sources) -helpers = src/language/expressions/generate.pl \ +helpers = src/language/expressions/generate.py \ src/language/expressions/operations.def EXTRA_DIST += $(helpers) $(expressions_built_sources): $(helpers) $(AV_V_GEN)$(MKDIR_P) `dirname $@` && \ - $(PERL) $< -o $@ -i $(top_srcdir)/src/language/expressions/operations.def + $(PYTHON3) $< -o $@ -i $(top_srcdir)/src/language/expressions/operations.def AM_CPPFLAGS += -I"$(abs_top_builddir)/src/language/expressions" \ -I"$(top_srcdir)/src/language/expressions" diff --git a/src/language/expressions/generate.pl b/src/language/expressions/generate.pl deleted file mode 100644 index 188d4eb11c..0000000000 --- a/src/language/expressions/generate.pl +++ /dev/null @@ -1,987 +0,0 @@ -# PSPP - a program for statistical analysis. -# Copyright (C) 2017 Free Software Foundation, Inc. -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -# -use strict; -use warnings 'all'; - -use Getopt::Long; - -# Parse command line. -our ($input_file); -our ($output_file); -parse_cmd_line (); - -# Initialize type system. -our (%type, @types); -init_all_types (); - -# Parse input file. -our (%ops); -our (@funcs, @opers, @order); -parse_input (); - -# Produce output. -print_header (); -if ($output_file =~ /evaluate\.h$/) { - generate_evaluate_h (); -} elsif ($output_file =~ /evaluate\.inc$/) { - generate_evaluate_inc (); -} elsif ($output_file =~ /operations\.h$/) { - generate_operations_h (); -} elsif ($output_file =~ /optimize\.inc$/) { - generate_optimize_inc (); -} elsif ($output_file =~ /parse\.inc$/) { - generate_parse_inc (); -} else { - die "$output_file: unknown output type\n"; -} -print_trailer (); - -# Command line. - -# Parses the command line. -# -# Initializes $input_file, $output_file. -sub parse_cmd_line { - GetOptions ("i|input=s" => \$input_file, - "o|output=s" => \$output_file, - "h|help" => sub { usage (); }) - or exit 1; - - $input_file = "operations.def" if !defined $input_file; - die "$0: output file must be specified\n" if !defined $output_file; - - open (INPUT, "<$input_file") or die "$input_file: open: $!\n"; - open (OUTPUT, ">$output_file") or die "$output_file: create: $!\n"; - - select (OUTPUT); -} - -sub usage { - print < 'double', - ATOM => 'number', MANGLE => 'n', HUMAN_NAME => 'number', - STACK => 'ns', MISSING_VALUE => 'SYSMIS'); - init_type ('string', 'any', C_TYPE => 'struct substring', - ATOM => 'string', MANGLE => 's', HUMAN_NAME => 'string', - STACK => 'ss', MISSING_VALUE => 'empty_string'); - init_type ('boolean', 'any', C_TYPE => 'double', - ATOM => 'number', MANGLE => 'n', HUMAN_NAME => 'boolean', - STACK => 'ns', MISSING_VALUE => 'SYSMIS'); - - # Format types. - init_type ('format', 'atom'); - init_type ('ni_format', 'leaf', C_TYPE => 'const struct fmt_spec *', - ATOM => 'format', MANGLE => 'f', - HUMAN_NAME => 'num_input_format'); - init_type ('no_format', 'leaf', C_TYPE => 'const struct fmt_spec *', - ATOM => 'format', MANGLE => 'f', - HUMAN_NAME => 'num_output_format'); - - # Integer types. - init_type ('integer', 'leaf', C_TYPE => 'int', - ATOM => 'integer', MANGLE => 'n', HUMAN_NAME => 'integer'); - init_type ('pos_int', 'leaf', C_TYPE => 'int', - ATOM => 'integer', MANGLE => 'n', - HUMAN_NAME => 'positive_integer_constant'); - - # Variable names. - init_type ('variable', 'atom'); - init_type ('num_var', 'leaf', C_TYPE => 'const struct variable *', - ATOM => 'variable', MANGLE => 'Vn', - HUMAN_NAME => 'num_variable'); - init_type ('str_var', 'leaf', C_TYPE => 'const struct variable *', - ATOM => 'variable', MANGLE => 'Vs', - HUMAN_NAME => 'string_variable'); - init_type ('var', 'leaf', C_TYPE => 'const struct variable *', - ATOM => 'variable', MANGLE => 'V', - HUMAN_NAME => 'variable'); - - # Vectors. - init_type ('vector', 'leaf', C_TYPE => 'const struct vector *', - ATOM => 'vector', MANGLE => 'v', HUMAN_NAME => 'vector'); - - # Fixed types. - init_type ('expression', 'fixed', C_TYPE => 'struct expression *', - FIXED_VALUE => 'e'); - init_type ('case', 'fixed', C_TYPE => 'const struct ccase *', - FIXED_VALUE => 'c'); - init_type ('case_idx', 'fixed', C_TYPE => 'size_t', - FIXED_VALUE => 'case_idx'); - init_type ('dataset', 'fixed', C_TYPE => 'struct dataset *', - FIXED_VALUE => 'ds'); - - # One of these is emitted at the end of each expression as a sentinel - # that tells expr_evaluate() to return the value on the stack. - init_type ('return_number', 'atom'); - init_type ('return_string', 'atom'); - - # Used only for debugging purposes. - init_type ('operation', 'atom'); -} - -# init_type has 2 required arguments: -# -# NAME: Type name. -# -# `$name' is the type's name in operations.def. -# -# `OP_$name' is the terminal's type in operations.h. -# -# `expr_allocate_$name()' allocates a node of the given type. -# -# ROLE: How the type may be used: -# -# "any": Usable as operands and function arguments, and -# function and operator results. -# -# "leaf": Usable as operands and function arguments, but -# not function arguments or results. (Thus, they appear -# only in leaf nodes in the parse type.) -# -# "fixed": Not allowed either as an operand or argument -# type or a result type. Used only as auxiliary data. -# -# "atom": Not allowed anywhere; just adds the name to -# the list of atoms. -# -# All types except those with "atom" as their role also require: -# -# C_TYPE: The C type that represents this abstract type. -# -# Types with "any" or "leaf" role require: -# -# ATOM: -# -# `$atom' is the `struct operation_data' member name. -# -# get_$atom_name() obtains the corresponding data from a -# node. -# -# MANGLE: Short string for name mangling. Use identical strings -# if two types should not be overloaded. -# -# HUMAN_NAME: Name for a type when we describe it to the user. -# -# Types with role "any" require: -# -# STACK: Name of the local variable in expr_evaluate(), used for -# maintaining the stack for this type. -# -# MISSING_VALUE: Expression used for the missing value of this -# type. -# -# Types with role "fixed" require: -# -# FIXED_VALUE: Expression used for the value of this type. -sub init_type { - my ($name, $role, %rest) = @_; - my ($type) = $type{"\U$name"} = {NAME => $name, ROLE => $role, %rest}; - - my (@need_keys) = qw (NAME ROLE); - if ($role eq 'any') { - push (@need_keys, qw (C_TYPE ATOM MANGLE HUMAN_NAME STACK MISSING_VALUE)); - } elsif ($role eq 'leaf') { - push (@need_keys, qw (C_TYPE ATOM MANGLE HUMAN_NAME)); - } elsif ($role eq 'fixed') { - push (@need_keys, qw (C_TYPE FIXED_VALUE)); - } elsif ($role eq 'atom') { - } else { - die "no role `$role'"; - } - - my (%have_keys); - $have_keys{$_} = 1 foreach keys %$type; - for my $key (@need_keys) { - defined $type->{$key} or die "$name lacks $key"; - delete $have_keys{$key}; - } - scalar (keys (%have_keys)) == 0 - or die "$name has superfluous key(s) " . join (', ', keys (%have_keys)); - - push (@types, $type); -} - -# c_type(type). -# -# Returns the C type of the given type as a string designed to be -# prepended to a variable name to produce a declaration. (That won't -# work in general but it works well enough for our types.) -sub c_type { - my ($type) = @_; - my ($c_type) = $type->{C_TYPE}; - defined $c_type or die; - - # Append a space unless (typically) $c_type ends in `*'. - $c_type .= ' ' if $c_type =~ /\w$/; - - return $c_type; -} - -# Input parsing. - -# Parses the entire input. -# -# Initializes %ops, @funcs, @opers. -sub parse_input { - get_line (); - get_token (); - while ($toktype ne 'eof') { - my (%op); - - $op{OPTIMIZABLE} = 1; - $op{UNIMPLEMENTED} = 0; - $op{EXTENSION} = 0; - $op{PERM_ONLY} = 0; - for (;;) { - if (match ('extension')) { - $op{EXTENSION} = 1; - } elsif (match ('no_opt')) { - $op{OPTIMIZABLE} = 0; - } elsif (match ('absorb_miss')) { - $op{ABSORB_MISS} = 1; - } elsif (match ('perm_only')) { - $op{PERM_ONLY} = 1; - } elsif (match ('no_abbrev')) { - $op{NO_ABBREV} = 1; - } else { - last; - } - } - - $op{RETURNS} = parse_type () || $type{NUMBER}; - die "$op{RETURNS} is not a valid return type" - if !any ($op{RETURNS}, @type{qw (NUMBER STRING BOOLEAN)}); - - $op{CATEGORY} = $token; - if (!any ($op{CATEGORY}, qw (operator function))) { - die "`operator' or `function' expected at `$token'"; - } - get_token (); - - my ($name) = force ("id"); - - die "function name may not contain underscore" - if $op{CATEGORY} eq 'function' && $name =~ /_/; - die "operator name may not contain period" - if $op{CATEGORY} eq 'operator' && $name =~ /\./; - - if (my ($prefix, $suffix) = $name =~ /^(.*)\.(\d+)$/) { - $name = $prefix; - $op{MIN_VALID} = $suffix; - $op{ABSORB_MISS} = 1; - } - $op{NAME} = $name; - - force_match ('('); - @{$op{ARGS}} = (); - while (!match (')')) { - my ($arg) = parse_arg (); - push (@{$op{ARGS}}, $arg); - if (defined ($arg->{IDX})) { - last if match (')'); - die "array must be last argument"; - } - if (!match (',')) { - force_match (')'); - last; - } - } - - for my $arg (@{$op{ARGS}}) { - next if !defined $arg->{CONDITION}; - my ($any_arg) = join ('|', map ($_->{NAME}, @{$op{ARGS}})); - $arg->{CONDITION} =~ s/\b($any_arg)\b/arg_$1/g; - } - - my ($opname) = "OP_$op{NAME}"; - $opname =~ tr/./_/; - if ($op{CATEGORY} eq 'function') { - my ($mangle) = join ('', map ($_->{TYPE}{MANGLE}, @{$op{ARGS}})); - $op{MANGLE} = $mangle; - $opname .= "_$mangle"; - } - $op{OPNAME} = $opname; - - if ($op{MIN_VALID}) { - my ($array_arg) = array_arg (\%op); - die "can't have minimum valid count without array arg" - if !defined $array_arg; - die "minimum valid count allowed only with double array" - if $array_arg->{TYPE} ne $type{NUMBER}; - die "can't have minimum valid count if array has multiplication factor" - if $array_arg->{TIMES} != 1; - } - - while ($toktype eq 'id') { - my ($type) = parse_type () or die "parse error"; - die "`$type->{NAME}' is not allowed as auxiliary data" - unless $type->{ROLE} eq 'leaf' || $type->{ROLE} eq 'fixed'; - my ($name) = force ("id"); - push (@{$op{AUX}}, {TYPE => $type, NAME => $name}); - force_match (';'); - } - - if ($op{OPTIMIZABLE}) { - die "random variate functions must be marked `no_opt'" - if $op{NAME} =~ /^RV\./; - for my $aux (@{$op{AUX}}) { - if (any ($aux->{TYPE}, @type{qw (CASE CASE_IDX)})) { - die "operators with $aux->{TYPE} aux data must be " - . "marked `no_opt'"; - } - } - } - - if ($op{RETURNS} eq $type{STRING} && !defined ($op{ABSORB_MISS})) { - my (@args); - for my $arg (@{$op{ARGS}}) { - if (any ($arg->{TYPE}, @type{qw (NUMBER BOOLEAN)})) { - die "$op{NAME} returns string and has double or bool " - . "argument, but is not marked ABSORB_MISS"; - } - if (defined $arg->{CONDITION}) { - die "$op{NAME} returns string but has argument with condition"; - } - } - } - - if ($toktype eq 'block') { - $op{BLOCK} = force ('block'); - } elsif ($toktype eq 'expression') { - if ($token eq 'unimplemented') { - $op{UNIMPLEMENTED} = 1; - } else { - $op{EXPRESSION} = $token; - } - get_token (); - } else { - die "block or expression expected"; - } - - die "duplicate operation name $opname" if defined $ops{$opname}; - $ops{$opname} = \%op; - if ($op{CATEGORY} eq 'function') { - push (@funcs, $opname); - } else { - push (@opers, $opname); - } - } - close(INPUT); - - @funcs = sort {$ops{$a}->{NAME} cmp $ops{$b}->{NAME} - || - $ops{$a}->{OPNAME} cmp $ops{$b}->{OPNAME}} - @funcs; - @opers = sort {$ops{$a}->{NAME} cmp $ops{$b}->{NAME}} @opers; - @order = (@funcs, @opers); -} - -# Reads the next token into $token, $toktype. -sub get_token { - our ($line); - lookahead (); - return if defined ($toktype) && $toktype eq 'eof'; - $toktype = 'id', $token = $1, return - if $line =~ /\G([a-zA-Z_][a-zA-Z_.0-9]*)/gc; - $toktype = 'int', $token = $1, return if $line =~ /\G([0-9]+)/gc; - $toktype = 'punct', $token = $1, return if $line =~ /\G([][(),*;.])/gc; - if ($line =~ /\G=/gc) { - $toktype = "expression"; - $line =~ /\G\s+/gc; - $token = accumulate_balanced (';'); - } elsif ($line =~ /\G\{/gc) { - $toktype = "block"; - $token = accumulate_balanced ('}'); - $token =~ s/^\n+//; - } else { - die "bad character `" . substr ($line, pos $line, 1) . "' in input"; - } -} - -# Skip whitespace, then return the remainder of the line. -sub lookahead { - our ($line); - die "unexpected end of file" if !defined ($line); - for (;;) { - $line =~ /\G\s+/gc; - last if pos ($line) < length ($line); - get_line (); - $token = $toktype = 'eof', return if !defined ($line); - } - return substr ($line, pos ($line)); -} - -# accumulate_balanced($chars) -# -# Accumulates input until a character in $chars is encountered, except -# that balanced pairs of (), [], or {} cause $chars to be ignored. -# -# Returns the input read. -sub accumulate_balanced { - my ($end) = @_; - my ($s) = ""; - my ($nest) = 0; - our ($line); - for (;;) { - my ($start) = pos ($line); - if ($line =~ /\G([^][(){};,]*)([][(){};,])/gc) { - $s .= substr ($line, $start, pos ($line) - $start - 1) - if pos ($line) > $start; - my ($last) = substr ($line, pos ($line) - 1, 1); - if ($last =~ /[[({]/) { - $nest++; - $s .= $last; - } elsif ($last =~ /[])}]/) { - if ($nest > 0) { - $nest--; - $s .= $last; - } elsif (index ($end, $last) >= 0) { - return $s; - } else { - die "unbalanced parentheses"; - } - } elsif (index ($end, $last) >= 0) { - return $s if !$nest; - $s .= $last; - } else { - $s .= $last; - } - } else { - $s .= substr ($line, pos ($line)) . "\n"; - get_line (); - } - } -} - -# Reads the next line from INPUT into $line. -sub get_line { - our ($line); - $line = ; - if (defined ($line)) { - chomp $line; - $line =~ s%//.*%%; - pos ($line) = 0; - } -} - -# If the current token is an identifier that names a type, -# returns the type and skips to the next token. -# Otherwise, returns undef. -sub parse_type { - if ($toktype eq 'id') { - foreach my $type (values (%type)) { - get_token (), return $type - if defined ($type->{NAME}) && $type->{NAME} eq $token; - } - } - return; -} - -# force($type). -# -# Makes sure that $toktype equals $type, reads the next token, and -# returns the previous $token. -sub force { - my ($type) = @_; - die "parse error at `$token' expecting $type" - if $type ne $toktype; - my ($tok) = $token; - get_token (); - return $tok; -} - -# force($tok). -# -# If $token equals $tok, reads the next token and returns true. -# Otherwise, returns false. -sub match { - my ($tok) = @_; - if ($token eq $tok) { - get_token (); - return 1; - } else { - return 0; - } -} - -# force_match($tok). -# -# If $token equals $tok, reads the next token. -# Otherwise, flags an error in the input. -sub force_match { - my ($tok) = @_; - die "parse error at `$token' expecting `$tok'" if !match ($tok); -} - -# Parses and returns a function argument. -sub parse_arg { - my (%arg); - $arg{TYPE} = parse_type () || $type{NUMBER}; - die "argument name expected at `$token'" if $toktype ne 'id'; - $arg{NAME} = $token; - - if (lookahead () =~ /^[[,)]/) { - get_token (); - if (match ('[')) { - die "only double and string arrays supported" - if !any ($arg{TYPE}, @type{qw (NUMBER STRING)}); - $arg{IDX} = force ('id'); - if (match ('*')) { - $arg{TIMES} = force ('int'); - die "multiplication factor must be two" - if $arg{TIMES} != 2; - } else { - $arg{TIMES} = 1; - } - force_match (']'); - } - } else { - $arg{CONDITION} = $arg{NAME} . ' ' . accumulate_balanced (',)'); - our ($line); - pos ($line) -= 1; - get_token (); - } - return \%arg; -} - -# Output. - -# Prints the output file header. -sub print_header { - print <{UNIMPLEMENTED}; - - my (@args); - for my $arg (@{$op->{ARGS}}) { - if (!defined $arg->{IDX}) { - push (@args, c_type ($arg->{TYPE}) . $arg->{NAME}); - } else { - push (@args, c_type ($arg->{TYPE}) . "$arg->{NAME}" . "[]"); - push (@args, "size_t $arg->{IDX}"); - } - } - for my $aux (@{$op->{AUX}}) { - push (@args, c_type ($aux->{TYPE}) . $aux->{NAME}); - } - push (@args, "void") if !@args; - - my ($statements) = $op->{BLOCK} || " return $op->{EXPRESSION};\n"; - - print "static inline ", c_type ($op->{RETURNS}), "\n"; - print "eval_$opname (", join (', ', @args), ")\n"; - print "{\n"; - print "$statements"; - print "}\n\n"; - } -} - -sub generate_evaluate_inc { - for my $opname (@order) { - my ($op) = $ops{$opname}; - - if ($op->{UNIMPLEMENTED}) { - print "case $opname:\n"; - print " NOT_REACHED ();\n\n"; - next; - } - - my (@decls); - my (@args); - for my $arg (@{$op->{ARGS}}) { - my ($name) = $arg->{NAME}; - my ($type) = $arg->{TYPE}; - my ($c_type) = c_type ($type); - my ($idx) = $arg->{IDX}; - push (@args, "arg_$arg->{NAME}"); - if (!defined ($idx)) { - my ($decl) = "${c_type}arg_$name"; - if ($type->{ROLE} eq 'any') { - unshift (@decls, "$decl = *--$type->{STACK}"); - } elsif ($type->{ROLE} eq 'leaf') { - push (@decls, "$decl = op++->$type->{ATOM}"); - } else { - die; - } - } else { - my ($stack) = $type->{STACK}; - defined $stack or die; - unshift (@decls, - "$c_type*arg_$arg->{NAME} = $stack -= arg_$idx"); - unshift (@decls, "size_t arg_$arg->{IDX} = op++->integer"); - - my ($idx) = "arg_$idx"; - if ($arg->{TIMES} != 1) { - $idx .= " / $arg->{TIMES}"; - } - push (@args, $idx); - } - } - for my $aux (@{$op->{AUX}}) { - my ($type) = $aux->{TYPE}; - my ($name) = $aux->{NAME}; - if ($type->{ROLE} eq 'leaf') { - my ($c_type) = c_type ($type); - push (@decls, "${c_type}aux_$name = op++->$type->{ATOM}"); - push (@args, "aux_$name"); - } elsif ($type->{ROLE} eq 'fixed') { - push (@args, $type->{FIXED_VALUE}); - } - } - - my ($sysmis_cond) = make_sysmis_decl ($op, "op++->integer"); - push (@decls, $sysmis_cond) if defined $sysmis_cond; - - my ($result) = "eval_$op->{OPNAME} (" . join (', ', @args) . ")"; - - my ($stack) = $op->{RETURNS}{STACK}; - - print "case $opname:\n"; - if (@decls) { - print " {\n"; - print " $_;\n" foreach @decls; - if (defined $sysmis_cond) { - my ($miss_ret) = $op->{RETURNS}{MISSING_VALUE}; - print " *$stack++ = force_sysmis ? $miss_ret : $result;\n"; - } else { - print " *$stack++ = $result;\n"; - } - print " }\n"; - } else { - print " *$stack++ = $result;\n"; - } - print " break;\n\n"; - } -} - -sub generate_operations_h { - print "#include \n"; - print "#include \n\n"; - - print "typedef enum"; - print " {\n"; - my (@atoms); - foreach my $type (@types) { - next if $type->{ROLE} eq 'fixed'; - push (@atoms, "OP_$type->{NAME}"); - } - print_operations ('atom', 1, \@atoms); - print_operations ('function', "OP_atom_last + 1", \@funcs); - print_operations ('operator', "OP_function_last + 1", \@opers); - print_range ("OP_composite", "OP_function_first", "OP_operator_last"); - print ",\n\n"; - print_range ("OP", "OP_atom_first", "OP_composite_last"); - print "\n }\n"; - print "operation_type, atom_type;\n"; - - print_predicate ('is_operation', 'OP'); - print_predicate ("is_$_", "OP_$_") - foreach qw (atom composite function operator); -} - -sub print_operations { - my ($type, $first, $names) = @_; - print " /* \u$type types. */\n"; - print " $names->[0] = $first,\n"; - print " $_,\n" foreach @$names[1...$#{$names}]; - print_range ("OP_$type", $names->[0], $names->[$#{$names}]); - print ",\n\n"; -} - -sub print_range { - my ($prefix, $first, $last) = @_; - print " ${prefix}_first = $first,\n"; - print " ${prefix}_last = $last,\n"; - print " n_${prefix} = ${prefix}_last - ${prefix}_first + 1"; -} - -sub print_predicate { - my ($function, $category) = @_; - my ($assertion) = ""; - - print "\nstatic inline bool\n"; - print "$function (operation_type op)\n"; - print "{\n"; - print " assert (is_operation (op));\n" if $function ne 'is_operation'; - print " return op >= ${category}_first && op <= ${category}_last;\n"; - print "}\n"; -} - -sub generate_optimize_inc { - for my $opname (@order) { - my ($op) = $ops{$opname}; - - if (!$op->{OPTIMIZABLE} || $op->{UNIMPLEMENTED}) { - print "case $opname:\n"; - print " NOT_REACHED ();\n\n"; - next; - } - - my (@decls); - my ($arg_idx) = 0; - for my $arg (@{$op->{ARGS}}) { - my ($decl); - my ($name) = $arg->{NAME}; - my ($type) = $arg->{TYPE}; - my ($ctype) = c_type ($type); - my ($idx) = $arg->{IDX}; - if (!defined ($idx)) { - my ($func) = "get_$type->{ATOM}_arg"; - push (@decls, "${ctype}arg_$name = $func (node, $arg_idx)"); - } else { - my ($decl) = "size_t arg_$idx = node->n_args"; - $decl .= " - $arg_idx" if $arg_idx; - push (@decls, $decl); - - push (@decls, "${ctype}*arg_$name = " - . "get_$type->{ATOM}_args " - . " (node, $arg_idx, arg_$idx, e)"); - } - $arg_idx++; - } - - my ($sysmis_cond) = make_sysmis_decl ($op, "node->min_valid"); - push (@decls, $sysmis_cond) if defined $sysmis_cond; - - my (@args); - for my $arg (@{$op->{ARGS}}) { - push (@args, "arg_$arg->{NAME}"); - if (defined $arg->{IDX}) { - my ($idx) = "arg_$arg->{IDX}"; - $idx .= " / $arg->{TIMES}" if $arg->{TIMES} != 1; - push (@args, $idx); - } - } - for my $aux (@{$op->{AUX}}) { - my ($type) = $aux->{TYPE}; - if ($type->{ROLE} eq 'leaf') { - my ($func) = "get_$type->{ATOM}_arg"; - push (@args, "$func (node, $arg_idx)"); - $arg_idx++; - } elsif ($type->{ROLE} eq 'fixed') { - push (@args, $type->{FIXED_VALUE}); - } else { - die; - } - } - - my ($result) = "eval_$op->{OPNAME} (" . join (', ', @args) . ")"; - if (@decls && defined ($sysmis_cond)) { - my ($miss_ret) = $op->{RETURNS}{MISSING_VALUE}; - push (@decls, c_type ($op->{RETURNS}) . "result = " - . "force_sysmis ? $miss_ret : $result"); - $result = "result"; - } - - print "case $opname:\n"; - my ($alloc_func) = "expr_allocate_$op->{RETURNS}{NAME}"; - if (@decls) { - print " {\n"; - print " $_;\n" foreach @decls; - print " return $alloc_func (e, $result);\n"; - print " }\n"; - } else { - print " return $alloc_func (e, $result);\n"; - } - print "\n"; - } -} - -sub generate_parse_inc { - my (@members) = ("\"\"", "\"\"", 0, 0, 0, "{}", 0, 0); - print "{", join (', ', @members), "},\n"; - - for my $type (@types) { - next if $type->{ROLE} eq 'fixed'; - - my ($human_name) = $type->{HUMAN_NAME}; - $human_name = $type->{NAME} if !defined $human_name; - - my (@members) = ("\"$type->{NAME}\"", "\"$human_name\"", - 0, "OP_$type->{NAME}", 0, "{}", 0, 0); - print "{", join (', ', @members), "},\n"; - } - - for my $opname (@order) { - my ($op) = $ops{$opname}; - - my (@members); - - push (@members, "\"$op->{NAME}\""); - - if ($op->{CATEGORY} eq 'function') { - my (@args, @opt_args); - for my $arg (@{$op->{ARGS}}) { - push (@args, $arg->{TYPE}{HUMAN_NAME}) if !defined $arg->{IDX}; - } - - if (my ($array) = array_arg ($op)) { - if (!defined $op->{MIN_VALID}) { - my (@array_args); - for (my $i = 0; $i < $array->{TIMES}; $i++) { - push (@array_args, $array->{TYPE}{HUMAN_NAME}); - } - push (@args, @array_args); - @opt_args = @array_args; - } else { - for (my $i = 0; $i < $op->{MIN_VALID}; $i++) { - push (@args, $array->{TYPE}{HUMAN_NAME}); - } - push (@opt_args, $array->{TYPE}{HUMAN_NAME}); - } - } - my ($human) = "$op->{NAME}(" . join (', ', @args); - $human .= '[, ' . join (', ', @opt_args) . ']...' if @opt_args; - $human .= ')'; - push (@members, "\"$human\""); - } else { - push (@members, "NULL"); - } - - my (@flags); - push (@flags, "OPF_ABSORB_MISS") if defined $op->{ABSORB_MISS}; - push (@flags, "OPF_ARRAY_OPERAND") if array_arg ($op); - push (@flags, "OPF_MIN_VALID") if defined $op->{MIN_VALID}; - push (@flags, "OPF_NONOPTIMIZABLE") if !$op->{OPTIMIZABLE}; - push (@flags, "OPF_EXTENSION") if $op->{EXTENSION}; - push (@flags, "OPF_UNIMPLEMENTED") if $op->{UNIMPLEMENTED}; - push (@flags, "OPF_PERM_ONLY") if $op->{PERM_ONLY}; - push (@flags, "OPF_NO_ABBREV") if $op->{NO_ABBREV}; - push (@members, @flags ? join (' | ', @flags) : 0); - - push (@members, "OP_$op->{RETURNS}{NAME}"); - - push (@members, scalar (@{$op->{ARGS}})); - - my (@arg_types) = map ("OP_$_->{TYPE}{NAME}", @{$op->{ARGS}}); - push (@members, "{" . join (', ', @arg_types) . "}"); - - push (@members, $op->{MIN_VALID} || 0); - - push (@members, array_arg ($op) ? ${array_arg ($op)}{TIMES} : 0); - - print "{", join (', ', @members), "},\n"; - } -} - -# Utilities. - -# any($target, @list) -# -# Returns true if $target appears in @list, -# false otherwise. -sub any { - $_ eq $_[0] and return 1 foreach @_[1...$#_]; - return 0; -} - -# make_sysmis_decl($op, $min_valid_src) -# -# Returns a declaration for a boolean variable called `force_sysmis', -# which will be true when operation $op should be system-missing. -# Returns undef if there are no such circumstances. -# -# If $op has a minimum number of valid arguments, $min_valid_src -# should be an an expression that evaluates to the minimum number of -# valid arguments for $op. -sub make_sysmis_decl { - my ($op, $min_valid_src) = @_; - my (@sysmis_cond); - if (!$op->{ABSORB_MISS}) { - for my $arg (@{$op->{ARGS}}) { - my ($arg_name) = "arg_$arg->{NAME}"; - if (!defined $arg->{IDX}) { - if (any ($arg->{TYPE}, @type{qw (NUMBER BOOLEAN)})) { - push (@sysmis_cond, "!is_valid ($arg_name)"); - } - } elsif ($arg->{TYPE} eq $type{NUMBER}) { - my ($a) = "$arg_name"; - my ($n) = "arg_$arg->{IDX}"; - push (@sysmis_cond, "count_valid ($a, $n) < $n"); - } - } - } elsif (defined $op->{MIN_VALID}) { - my ($args) = $op->{ARGS}; - my ($arg) = ${$args}[$#{$args}]; - my ($a) = "arg_$arg->{NAME}"; - my ($n) = "arg_$arg->{IDX}"; - push (@sysmis_cond, "count_valid ($a, $n) < $min_valid_src"); - } - for my $arg (@{$op->{ARGS}}) { - push (@sysmis_cond, "!($arg->{CONDITION})") - if defined $arg->{CONDITION}; - } - return "bool force_sysmis = " . join (' || ', @sysmis_cond) - if @sysmis_cond; - return; -} - -# array_arg($op) -# -# If $op has an array argument, return it. -# Otherwise, returns undef. -sub array_arg { - my ($op) = @_; - my ($args) = $op->{ARGS}; - return if !@$args; - my ($last_arg) = $args->[@$args - 1]; - return $last_arg if defined $last_arg->{IDX}; - return; -} diff --git a/src/language/expressions/generate.py b/src/language/expressions/generate.py new file mode 100644 index 0000000000..e530cd8208 --- /dev/null +++ b/src/language/expressions/generate.py @@ -0,0 +1,1000 @@ +#! /usr/bin/python3 +# PSPP - a program for statistical analysis. +# Copyright (C) 2017, 2021 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +import enum +import getopt +import re +import sys + +argv0 = sys.argv[0] + + +def die(s): + sys.stderr.write("%s\n" % s) + sys.exit(1) + + +def init_all_types(): + """Defines all our types. + + Initializes 'types' global. + + """ + + global types + types = {} + + for t in [ + # Common user-visible types used throughout evaluation trees. + Type.new_any('number', 'double', 'number', 'n', + 'number', 'ns', 'SYSMIS'), + Type.new_any('string', 'struct substring', 'string', 's', + 'string', 'ss', 'empty_string'), + Type.new_any('boolean', 'double', 'number', 'n', + 'boolean', 'ns', 'SYSMIS'), + + # Format types. + Type.new_atom('format'), + Type.new_leaf('ni_format', 'const struct fmt_spec *', + 'format', 'f', 'num_input_format'), + Type.new_leaf('no_format', 'const struct fmt_spec *', + 'format', 'f', 'num_output_format'), + + # Integer types. + Type.new_leaf('integer', 'int', 'integer', 'n', + 'integer'), + Type.new_leaf('pos_int', 'int', 'integer', 'n', + 'positive_integer_constant'), + + # Variable names. + Type.new_atom('variable'), + Type.new_leaf('num_var', 'const struct variable *', + 'variable', 'Vn', 'num_variable'), + Type.new_leaf('str_var', 'const struct variable *', + 'variable', 'Vs', 'string_variable'), + Type.new_leaf('var', 'const struct variable *', + 'variable', 'V', 'variable'), + + # Vectors. + Type.new_leaf('vector', 'const struct vector *', + 'vector', 'v', 'vector'), + + # Types that appear only as auxiliary data. + Type.new_auxonly('expression', 'struct expression *', 'e'), + Type.new_auxonly('case', 'const struct ccase *', 'c'), + Type.new_auxonly('case_idx', 'size_t', 'case_idx'), + Type.new_auxonly('dataset', 'struct dataset *', 'ds'), + + # One of these is emitted at the end of each expression as a + # sentinel that tells expr_evaluate() to return the value on + # the stack. + Type.new_atom('return_number'), + Type.new_atom('return_string'), + + # Used only for debugging purposes. + Type.new_atom('operation'), + ]: + types[t.name] = t + + +class Type: + def __init__(self, name, role, human_name, c_type=None): + self.name = name + self.role = role + self.human_name = human_name + if c_type: + if c_type.endswith('*'): + self.c_type = c_type + else: + self.c_type = c_type + ' ' + + def new_atom(name): + """Creates and returns a new atom Type with the given 'name'. + + An atom isn't directly allowed as an operand or function + argument type. They are all exceptional cases in some way. + + """ + return Type(name, 'atom', name) + + def new_any(name, c_type, atom, mangle, human_name, stack, + missing_value): + """Creates and returns a new Type that can appear in any context, that + is, it can be an operation's argument type or return type. + + 'c_type' is the type used for C objects of this type. + + 'mangle' should be a short string for name mangling purposes, + to allow overloading functions with the same name but + different argument types. Use the same 'mangle' for two + different types if those two types should not be overloaded. + + 'atom' should be the name of the member of "union + operation_data" that holds a value of this type. + + 'human_name' should be a name to use when describing this type + to the user (see Op.prototype()). + + 'stack' is the name of the local variable in expr_evaluate() + used for maintaining a stack of this type. + + 'missing_value' is the expression used for a missing value of + this type. + + """ + new = Type(name, 'any', human_name, c_type) + new.atom = atom + new.mangle = mangle + new.stack = stack + new.missing_value = missing_value + return new + + def new_leaf(name, c_type, atom, mangle, human_name): + """Creates and returns a new leaf Type. A leaf type can appear in + expressions as an operation's argument type, but not as a return type. + (Thus, it only appears in a parse tree as a leaf node.) + + The other arguments are as for new_any(). + """ + new = Type(name, 'leaf', human_name, c_type) + new.atom = atom + new.mangle = mangle + return new + + def new_auxonly(name, c_type, auxonly_value): + """Creates and returns a new auxiliary-only Type. An auxiliary-only + Type is one that gets passed into the evaluation function but + isn't supplied directly by the user as an operand or argument. + + 'c_type' is as in new_any(). + + 'auxonly_value' is the name of the local variable in + expr_evaluate() that has the value of this auxiliary data. + + """ + new = Type(name, 'auxonly', name, c_type) + new.auxonly_value = auxonly_value + return new + + def parse(): + """If the current token is an identifier that names a type, returns + the type and skips to the next token. Otherwise, returns + None. + """ + if toktype == 'id': + for type_ in types.values(): + if type_.name == token: + get_token() + return type_ + return None + + +class Category(enum.Enum): + FUNCTION = enum.auto() + OPERATOR = enum.auto() + + +class Op: + def __init__(self, name, category, returns, args, aux, expression, + block, min_valid, optimizable, unimplemented, + extension, perm_only, absorb_miss, no_abbrev): + self.name = name + self.category = category + self.returns = returns + self.args = args + self.aux = aux + self.expression = expression + self.block = block + self.min_valid = min_valid + self.optimizable = optimizable + self.unimplemented = unimplemented + self.extension = extension + self.perm_only = perm_only + self.absorb_miss = absorb_miss + self.no_abbrev = no_abbrev + + self.opname = ('OP_%s' % name).replace('.', '_') + if category == Category.FUNCTION: + self.opname += '_%s' % (''.join([a.type_.mangle for a in args])) + + def array_arg(self): + """If this operation has an array argument, returns it. Otherwise, + returns None. + """ + if self.args and self.args[-1].idx is not None: + return self.args[-1] + else: + return None + + def sysmis_decl(self, min_valid_src): + """Returns a declaration for a boolean variable called `force_sysmis', + which will be true when this operation should be + system-missing. Returns None if there are no such + circumstances. + + If this operation has a minimum number of valid arguments, + 'min_valid_src' should be an an expression that evaluates to + the minimum number of valid arguments for this operation. + + """ + sysmis_cond = [] + if not self.absorb_miss: + for arg in self.args: + arg_name = 'arg_%s' % arg.name + if arg.idx is None: + if arg.type_.name in ['number', 'boolean']: + sysmis_cond += ['!is_valid (%s)' % arg_name] + elif arg.type_.name == 'number': + a = arg_name + n = 'arg_%s' % arg.idx + sysmis_cond += ['count_valid (%s, %s) < %s' % (a, n, n)] + elif self.min_valid > 0: + args = self.args + arg = args[-1] + a = 'arg_%s' % arg.name + n = 'arg_%s' % arg.idx + sysmis_cond += ['count_valid (%s, %s) < %s' + % (a, n, min_valid_src)] + for arg in self.args: + if arg.condition is not None: + sysmis_cond += ['!(%s)' % arg.condition] + if sysmis_cond: + return 'bool force_sysmis = %s' % ' || '.join(sysmis_cond) + return None + + def prototype(self): + """Composes and returns a string that describes the function in a way + suitable for a human to understand, something like a C + function prototype, e.g. "ABS(number)" or "ANY(number, + number[, number]...)". + + This doesn't make sense for operators so this function just + returns None for them. + + """ + if self.category == Category.FUNCTION: + args = [] + opt_args = [] + for arg in self.args: + if arg.idx is None: + args += [arg.type_.human_name] + + array = self.array_arg() + if array is not None: + if self.min_valid == 0: + array_args = [] + for i in range(array.times): + array_args += [array.type_.human_name] + args += array_args + opt_args = array_args + else: + for i in range(self.min_valid): + args += [array.type_.human_name] + opt_args += [array.type_.human_name] + + prototype = '%s(%s' % (self.name, ', '.join(args)) + if opt_args: + prototype += '[, %s]...' % ', '.join(opt_args) + prototype += ')' + return prototype + else: + return None + + def flags(self): + """Returns the OPF_* flags that apply to 'self'.""" + flags = [] + if self.absorb_miss: + flags += ['OPF_ABSORB_MISS'] + if self.array_arg(): + flags += ['OPF_ARRAY_OPERAND'] + if self.min_valid > 0: + flags += ['OPF_MIN_VALID'] + if not self.optimizable: + flags += ['OPF_NONOPTIMIZABLE'] + if self.extension: + flags += ['OPF_EXTENSION'] + if self.unimplemented: + flags += ['OPF_UNIMPLEMENTED'] + if self.perm_only: + flags += ['OPF_PERM_ONLY'] + if self.no_abbrev: + flags += ['OPF_NO_ABBREV'] + return ' | '.join(flags) if flags else '0' + + +def parse_input(): + """Parses the entire input. + + Initializes ops, funcs, opers.""" + + global token + global toktype + global line_number + token = None + toktype = None + line_number = 0 + get_line() + get_token() + + global funcs + global opers + global order + ops = {} + funcs = [] + opers = [] + + while toktype != 'eof': + optimizable = True + unimplemented = False + extension = False + perm_only = False + absorb_miss = False + no_abbrev = False + while True: + if match('extension'): + extension = True + elif match('no_opt'): + optimizable = False + elif match('absorb_miss'): + absorb_miss = True + elif match('perm_only'): + perm_only = True + elif match('no_abbrev'): + no_abbrev = True + else: + break + + return_type = Type.parse() + if return_type is None: + return_type = types['number'] + if return_type.name not in ['number', 'string', 'boolean']: + die('%s is not a valid return type' % return_type.name) + + if token == 'operator': + category = Category.OPERATOR + elif token == 'function': + category = Category.FUNCTION + else: + die("'operator' or 'function' expected at '%s'" % token) + get_token() + + name = force('id') + if category == Category.FUNCTION and '_' in name: + die("function name '%s' may not contain underscore" % name) + elif category == Category.OPERATOR and '.' in name: + die("operator name '%s' may not contain period" % name) + + m = re.match(r'(.*)\.(\d+)$', name) + if m: + prefix, suffix = m.groups() + name = prefix + min_valid = int(suffix) + absorb_miss = True + else: + min_valid = 0 + + force_match('(') + args = [] + while not match(')'): + arg = Arg.parse() + args += [arg] + if arg.idx is not None: + if match(')'): + break + die('array must be last argument') + if not match(','): + force_match(')') + break + + for arg in args: + if arg.condition is not None: + any_arg = '|'.join([a.name for a in args]) + arg.condition = re.sub(r'\b(%s)\b' % any_arg, + r'arg_\1', arg.condition) + + aux = [] + while toktype == 'id': + type_ = Type.parse() + if type_ is None: + die('parse error') + if type_.role not in ['leaf', 'auxonly']: + die("'%s' is not allowed as auxiliary data" % type_.name) + aux_name = force('id') + aux += [{'TYPE': type_, 'NAME': aux_name}] + force_match(';') + + if optimizable: + if name.startswith('RV.'): + die("random variate functions must be marked 'no_opt'") + for key in ['CASE', 'CASE_IDX']: + if key in aux: + die("operators with %s aux data must be marked 'no_opt'" + % key) + + if return_type.name == 'string' and not absorb_miss: + for arg in args: + if arg.type_.name in ['number', 'boolean']: + die("'%s' returns string and has double or bool " + "argument, but is not marked ABSORB_MISS" % name) + if arg.condition is not None: + die("'%s' returns string but has " + "argument with condition") + + if toktype == 'block': + block = force('block') + expression = None + elif toktype == 'expression': + if token == 'unimplemented': + unimplemented = True + else: + expression = token + block = None + get_token() + else: + die('block or expression expected') + + op = Op(name, category, + return_type, args, aux, + expression, block, + min_valid, + optimizable, unimplemented, extension, perm_only, absorb_miss, + no_abbrev) + + if min_valid > 0: + aa = op.array_arg() + if aa is None: + die("can't have minimum valid count without array arg") + if aa.type_.name != 'number': + die('minimum valid count allowed only with double array') + if aa.times != 1: + die("can't have minimum valid count if " + "array has multiplication factor") + + if op.opname in ops: + die("duplicate operation name '%s'" % op.opname) + ops[op.opname] = op + if category == Category.FUNCTION: + funcs += [op] + else: + opers += [op] + + in_file.close() + + funcs = sorted(funcs, key=lambda f: (f.name, f.opname)) + opers = sorted(opers, key=lambda o: o.name) + order = funcs + opers + + +def get_token(): + """Reads the next token into 'token' and 'toktype'.""" + + global line + global token + global toktype + + lookahead() + if toktype == 'eof': + return + + m = re.match(r'([a-zA-Z_][a-zA-Z_.0-9]*)(.*)$', line) + if m: + token, line = m.groups() + toktype = 'id' + return + + m = re.match(r'([0-9]+)(.*)$', line) + if m: + token, line = m.groups() + token = int(token) + toktype = 'int' + return + + m = re.match(r'([][(),*;.])(.*)$', line) + if m: + token, line = m.groups() + toktype = 'punct' + return + + m = re.match(r'=\s*(.*)$', line) + if m: + toktype = 'expression' + line = m.group(1) + token = accumulate_balanced(';') + return + + m = re.match(r'{(.*)$', line) + if m: + toktype = 'block' + line = m.group(1) + token = accumulate_balanced('}') + token = token.rstrip('\n') + return + + die("bad character '%s' in input" % line[0]) + + +def lookahead(): + """Skip whitespace.""" + global line + if line is None: + die('unexpected end of file') + + while True: + line = line.lstrip() + if line != '': + break + get_line() + if line is None: + global token + global toktype + token = 'eof' + toktype = 'eof' + return + + +def accumulate_balanced(end, swallow_end=True): + """Accumulates input until a character in 'end' is encountered, + except that balanced pairs of (), [], or {} cause 'end' to be + ignored. Returns the input read. + """ + s = '' + nest = 0 + global line + while True: + for idx, c in enumerate(line): + if c in end and nest == 0: + line = line[idx:] + if swallow_end: + line = line[1:] + s = s.strip('\r\n') + return s + elif c in '[({': + nest += 1 + elif c in '])}': + if nest > 0: + nest -= 1 + else: + die('unbalanced parentheses') + s += c + s += '\n' + get_line() + + +def get_line(): + """Reads the next line from INPUT into 'line'.""" + global line + global line_number + line = in_file.readline() + line_number += 1 + if line == '': + line = None + else: + line = line.rstrip('\r\n') + comment_ofs = line.find('//') + if comment_ofs >= 0: + line = line[:comment_ofs] + + +def force(type_): + """Makes sure that 'toktype' equals 'type', reads the next token, and + returns the previous 'token'. + + """ + if type_ != toktype: + die("parse error at `%s' expecting %s" % (token, type_)) + tok = token + get_token() + return tok + + +def match(tok): + """If 'token' equals 'tok', reads the next token and returns true. + Otherwise, returns false.""" + if token == tok: + get_token() + return True + else: + return False + + +def force_match(tok): + """If 'token' equals 'tok', reads the next token. Otherwise, flags an + error in the input. + """ + if not match(tok): + die("parse error at `%s' expecting `%s'" % (token, tok)) + + +class Arg: + def __init__(self, name, type_, idx, times, condition): + self.name = name + self.type_ = type_ + self.idx = idx + self.times = times + self.condition = condition + + def parse(): + """Parses and returns a function argument.""" + type_ = Type.parse() + if type_ is None: + type_ = types['number'] + + if toktype != 'id': + die("argument name expected at `%s'" % token) + name = token + + lookahead() + global line + + idx = None + times = 1 + + if line[0] in '[,)': + get_token() + if match('['): + if type_.name not in ('number', 'string'): + die('only double and string arrays supported') + idx = force('id') + if match('*'): + times = force('int') + if times != 2: + die('multiplication factor must be two') + force_match(']') + condition = None + else: + condition = name + ' ' + condition += accumulate_balanced(',)', swallow_end=False) + get_token() + + return Arg(name, type_, idx, times, condition) + + +def print_header(): + """Prints the output file header.""" + out_file.write("""\ +/* %s + Generated from %s by generate.py. + Do not modify! */ + +""" % (out_file_name, in_file_name)) + + +def print_trailer(): + """Prints the output file trailer.""" + out_file.write("""\ + +/* + Local Variables: + mode: c + buffer-read-only: t + End: +*/ +""") + + +def generate_evaluate_h(): + out_file.write('#include "helpers.h"\n\n') + + for op in order: + if op.unimplemented: + continue + + args = [] + for arg in op.args: + if arg.idx is None: + args += [arg.type_.c_type + arg.name] + else: + args += [arg.type_.c_type + arg.name + '[]'] + args += ['size_t %s' % arg.idx] + for aux in op.aux: + args += [aux['TYPE'].c_type + aux['NAME']] + if not args: + args += ['void'] + + if op.block: + statements = op.block + '\n' + else: + statements = ' return %s;\n' % op.expression + + out_file.write('static inline %s\n' % op.returns.c_type) + out_file.write('eval_%s (%s)\n' % (op.opname, ', '.join(args))) + out_file.write('{\n') + out_file.write(statements) + out_file.write('}\n\n') + + +def generate_evaluate_inc(): + for op in order: + if op.unimplemented: + out_file.write('case %s:\n' % op.opname) + out_file.write(' NOT_REACHED ();\n\n') + continue + + decls = [] + args = [] + for arg in op.args: + type_ = arg.type_ + c_type = type_.c_type + args += ['arg_%s' % arg.name] + if arg.idx is None: + decl = '%sarg_%s' % (c_type, arg.name) + if type_.role == 'any': + decls = ['%s = *--%s' % (decl, type_.stack)] + decls + elif type_.role == 'leaf': + decls += ['%s = op++->%s' % (decl, type_.atom)] + else: + assert False + else: + idx = arg.idx + decls = ['%s*arg_%s = %s -= arg_%s' + % (c_type, arg.name, type_.stack, idx)] + decls + decls = ['size_t arg_%s = op++->integer' % idx] + decls + + idx = 'arg_%s' % idx + if arg.times != 1: + idx += ' / %s' % arg.times + args += [idx] + for aux in op.aux: + type_ = aux['TYPE'] + name = aux['NAME'] + if type_.role == 'leaf': + decls += ['%saux_%s = op++->%s' + % (type_.c_type, name, type_.atom)] + args += ['aux_%s' % name] + elif type_.role == 'auxonly': + args += [type_.auxonly_value] + + sysmis_cond = op.sysmis_decl('op++->integer') + if sysmis_cond is not None: + decls += [sysmis_cond] + + result = 'eval_%s (%s)' % (op.opname, ', '.join(args)) + + stack = op.returns.stack + + out_file.write('case %s:\n' % op.opname) + if decls: + out_file.write(' {\n') + for decl in decls: + out_file.write(' %s;\n' % decl) + if sysmis_cond is not None: + miss_ret = op.returns.missing_value + out_file.write(' *%s++ = force_sysmis ? %s : %s;\n' + % (stack, miss_ret, result)) + else: + out_file.write(' *%s++ = %s;\n' % (stack, result)) + out_file.write(' }\n') + else: + out_file.write(' *%s++ = %s;\n' % (stack, result)) + out_file.write(' break;\n\n') + + +def generate_operations_h(): + out_file.write('#include \n') + out_file.write('#include \n\n') + + out_file.write('typedef enum') + out_file.write(' {\n') + atoms = [] + for type_ in types.values(): + if type_.role != 'auxonly': + atoms += ['OP_%s' % type_.name] + + print_operations('atom', 1, atoms) + print_operations('function', 'OP_atom_last + 1', + [f.opname for f in funcs]) + print_operations('operator', 'OP_function_last + 1', + [o.opname for o in opers]) + print_range('OP_composite', 'OP_function_first', 'OP_operator_last') + out_file.write(',\n\n') + print_range('OP', 'OP_atom_first', 'OP_composite_last') + out_file.write('\n }\n') + out_file.write('operation_type, atom_type;\n') + + print_predicate('is_operation', 'OP') + for key in ('atom', 'composite', 'function', 'operator'): + print_predicate('is_%s' % key, 'OP_%s' % key) + + +def print_operations(type_, first, names): + out_file.write(' /* %s types. */\n' % type_.title()) + out_file.write(' %s = %s,\n' % (names[0], first)) + for name in names[1:]: + out_file.write(' %s,\n' % name) + print_range('OP_%s' % type_, names[0], names[-1]) + out_file.write(',\n\n') + + +def print_range(prefix, first, last): + out_file.write(' %s_first = %s,\n' % (prefix, first)) + out_file.write(' %s_last = %s,\n' % (prefix, last)) + out_file.write(' n_%s = %s_last - %s_first + 1' + % (prefix, prefix, prefix)) + + +def print_predicate(function, category): + out_file.write('\nstatic inline bool\n') + out_file.write('%s (operation_type op)\n' % function) + out_file.write('{\n') + if function != 'is_operation': + out_file.write(' assert (is_operation (op));\n') + out_file.write(' return op >= %s_first && op <= %s_last;\n' + % (category, category)) + out_file.write('}\n') + + +def generate_optimize_inc(): + for op in order: + if not op.optimizable or op.unimplemented: + out_file.write('case %s:\n' % op.opname) + out_file.write(' NOT_REACHED ();\n\n') + continue + + decls = [] + arg_idx = 0 + for arg in op.args: + name = arg.name + type_ = arg.type_ + c_type = type_.c_type + if arg.idx is None: + func = 'get_%s_arg' % type_.atom + decls += ['%sarg_%s = %s (node, %s)' + % (c_type, name, func, arg_idx)] + else: + decl = 'size_t arg_%s = node->n_args' % arg.idx + if arg_idx > 0: + decl += ' - %s' % arg_idx + decls += [decl] + + decls += ['%s*arg_%s = get_%s_args ' + '(node, %s, arg_%s, e)' + % (c_type, name, type_.atom, arg_idx, arg.idx)] + arg_idx += 1 + + sysmis_cond = op.sysmis_decl('node->min_valid') + if sysmis_cond is not None: + decls += [sysmis_cond] + + args = [] + for arg in op.args: + args += ['arg_%s' % arg.name] + if arg.idx is not None: + idx = 'arg_%s' % arg.idx + if arg.times != 1: + idx += ' / %s' % arg.times + args += [idx] + + for aux in op.aux: + type_ = aux['TYPE'] + if type_.role == 'leaf': + func = 'get_%s_arg' % type_.atom + args += '%s (node, %s)' % (func, arg_idx) + arg_idx += 1 + elif type_.role == 'auxonly': + args += [type_.auxonly_value] + else: + assert False + + result = 'eval_%s (%s)' % (op.opname, ', '.join(args)) + if decls and sysmis_cond is not None: + miss_ret = op.returns.missing_value + decls += ['%sresult = force_sysmis ? %s : %s' + % (op.returns.c_type, miss_ret, result)] + result = 'result' + + out_file.write('case %s:\n' % op.opname) + alloc_func = 'expr_allocate_%s' % op.returns.name + if decls: + out_file.write(' {\n') + for decl in decls: + out_file.write(' %s;\n' % decl) + out_file.write(' return %s (e, %s);\n' % (alloc_func, result)) + out_file.write(' }\n') + else: + out_file.write(' return %s (e, %s);\n' % (alloc_func, result)) + out_file.write('\n') + + +def generate_parse_inc(): + members = ['""', '""', '0', '0', '0', '{}', '0', '0'] + out_file.write('{%s},\n' % ', '.join(members)) + + for type_ in types.values(): + if type_.role != 'auxonly': + members = ('"%s"' % type_.name, '"%s"' % type_.human_name, + '0', 'OP_%s' % type_.name, '0', '{}', '0', '0') + out_file.write('{%s},\n' % ', '.join(members)) + + for op in order: + members = [] + members += ['"%s"' % op.name] + + prototype = op.prototype() + members += ['"%s"' % prototype if prototype else 'NULL'] + + members += [op.flags()] + + members += ['OP_%s' % op.returns.name] + + members += ['%s' % len(op.args)] + + arg_types = ['OP_%s' % arg.type_.name for arg in op.args] + members += ['{%s}' % ', '.join(arg_types)] + + members += ['%s' % op.min_valid] + + members += ['%s' % (op.array_arg().times if op.array_arg() else 0)] + + out_file.write('{%s},\n' % ', '.join(members)) + + +def usage(): + print("""\ +%s, for generating expression parsers and evaluators from definitions +usage: generate.py -o OUTPUT [-i INPUT] [-h] + -i INPUT input file containing definitions (default: operations.def) + -o OUTPUT output file + -h display this help message +""" % argv0) + sys.exit(0) + + +if __name__ == '__main__': + try: + options, args = getopt.gnu_getopt(sys.argv[1:], 'hi:o:', + ['input=s', + 'output=s', + 'help']) + except getopt.GetoptError as geo: + die('%s: %s' % (argv0, geo.msg)) + + in_file_name = 'operations.def' + out_file_name = None + for key, value in options: + if key in ['-h', '--help']: + usage() + elif key in ['-i', '--input']: + in_file_name = value + elif key in ['-o', '--output']: + out_file_name = value + else: + sys.exit(0) + + if out_file_name is None: + die('%s: output file must be specified ' + '(use --help for help)' % argv0) + + in_file = open(in_file_name, 'r') + out_file = open(out_file_name, 'w') + + init_all_types() + parse_input() + + print_header() + if out_file_name.endswith('evaluate.h'): + generate_evaluate_h() + elif out_file_name.endswith('evaluate.inc'): + generate_evaluate_inc() + elif out_file_name.endswith('operations.h'): + generate_operations_h() + elif out_file_name.endswith('optimize.inc'): + generate_optimize_inc() + elif out_file_name.endswith('parse.inc'): + generate_parse_inc() + else: + die('%s: unknown output type' % argv0) + print_trailer() -- 2.30.2