7 our ($default_output_file) = $0;
8 $default_output_file =~ s/\.pl//;
13 # Initialize type system.
24 # Parses the command line.
26 # Initializes $input_file, $output_file.
28 GetOptions ("i|input=s" => \$input_file,
29 "o|output=s" => \$output_file,
30 "h|help" => sub { usage (); })
33 $input_file = "operations.def" if !defined $input_file;
34 $output_file = $default_output_file if !defined $output_file;
36 open (INPUT, "<$input_file") or die "$input_file: open: $!\n";
37 open (OUTPUT, ">$output_file") or die "$output_file: create: $!\n";
44 $0, for generating $default_output_file from definitions
45 usage: generate.pl [-i INPUT] [-o OUTPUT] [-h]
46 -i INPUT input file containing definitions (default: operations.def)
47 -o OUTPUT output file (default: $default_output_file)
48 -h display this help message
58 # Defines all our types.
60 # Initializes %type, @types.
62 # Common user-visible types used throughout evaluation trees.
63 init_type ('number', 'any', C_TYPE => 'double',
64 ATOM => 'number', MANGLE => 'n', HUMAN_NAME => 'num',
65 STACK => 'ns', MISSING_VALUE => 'SYSMIS');
66 init_type ('string', 'any', C_TYPE => 'struct fixed_string',
67 ATOM => 'string', MANGLE => 's', HUMAN_NAME => 'string',
68 STACK => 'ss', MISSING_VALUE => 'empty_string');
69 init_type ('boolean', 'any', C_TYPE => 'double',
70 ATOM => 'number', MANGLE => 'n', HUMAN_NAME => 'boolean',
71 STACK => 'ns', MISSING_VALUE => 'SYSMIS');
74 init_type ('format', 'atom');
75 init_type ('ni_format', 'leaf', C_TYPE => 'const struct fmt_spec *',
76 ATOM => 'format', MANGLE => 'f',
77 HUMAN_NAME => 'num_input_format');
78 init_type ('no_format', 'leaf', C_TYPE => 'const struct fmt_spec *',
79 ATOM => 'format', MANGLE => 'f',
80 HUMAN_NAME => 'num_output_format');
83 init_type ('integer', 'leaf', C_TYPE => 'int',
84 ATOM => 'integer', MANGLE => 'n', HUMAN_NAME => 'integer');
85 init_type ('pos_int', 'leaf', C_TYPE => 'int',
86 ATOM => 'integer', MANGLE => 'n',
87 HUMAN_NAME => 'positive_integer_constant');
90 init_type ('variable', 'atom');
91 init_type ('num_var', 'leaf', C_TYPE => 'const struct variable *',
92 ATOM => 'variable', MANGLE => 'Vn',
93 HUMAN_NAME => 'num_variable');
94 init_type ('str_var', 'leaf', C_TYPE => 'const struct variable *',
95 ATOM => 'variable', MANGLE => 'Vs',
96 HUMAN_NAME => 'string_variable');
99 init_type ('vector', 'leaf', C_TYPE => 'const struct vector *',
100 ATOM => 'vector', MANGLE => 'v', HUMAN_NAME => 'vector');
103 init_type ('expression', 'fixed', C_TYPE => 'struct expression *',
105 init_type ('case', 'fixed', C_TYPE => 'const struct ccase *',
107 init_type ('case_idx', 'fixed', C_TYPE => 'size_t',
108 FIXED_VALUE => 'case_idx');
110 # One of these is emitted at the end of each expression as a sentinel
111 # that tells expr_evaluate() to return the value on the stack.
112 init_type ('return_number', 'atom');
113 init_type ('return_string', 'atom');
115 # Used only for debugging purposes.
116 init_type ('operation', 'atom');
119 # init_type has 2 required arguments:
123 # `$name' is the type's name in operations.def.
125 # `OP_$name' is the terminal's type in operations.h.
127 # `expr_allocate_$name()' allocates a node of the given type.
129 # ROLE: How the type may be used:
131 # "any": Usable as operands and function arguments, and
132 # function and operator results.
134 # "leaf": Usable as operands and function arguments, but
135 # not function arguments or results. (Thus, they appear
136 # only in leaf nodes in the parse type.)
138 # "fixed": Not allowed either as an operand or argument
139 # type or a result type. Used only as auxiliary data.
141 # "atom": Not allowed anywhere; just adds the name to
144 # All types except those with "atom" as their role also require:
146 # C_TYPE: The C type that represents this abstract type.
148 # Types with "any" or "leaf" role require:
152 # `$atom' is the `struct operation_data' member name.
154 # get_$atom_name() obtains the corresponding data from a
157 # MANGLE: Short string for name mangling. Use identical strings
158 # if two types should not be overloaded.
160 # HUMAN_NAME: Name for a type when we describe it to the user.
162 # Types with role "any" require:
164 # STACK: Name of the local variable in expr_evaluate(), used for
165 # maintaining the stack for this type.
167 # MISSING_VALUE: Expression used for the missing value of this
170 # Types with role "fixed" require:
172 # FIXED_VALUE: Expression used for the value of this type.
174 my ($name, $role, %rest) = @_;
175 my ($type) = $type{"\U$name"} = {NAME => $name, ROLE => $role, %rest};
177 my (@need_keys) = qw (NAME ROLE);
178 if ($role eq 'any') {
179 push (@need_keys, qw (C_TYPE ATOM MANGLE HUMAN_NAME STACK MISSING_VALUE));
180 } elsif ($role eq 'leaf') {
181 push (@need_keys, qw (C_TYPE ATOM MANGLE HUMAN_NAME));
182 } elsif ($role eq 'fixed') {
183 push (@need_keys, qw (C_TYPE FIXED_VALUE));
184 } elsif ($role eq 'atom') {
186 die "no role `$role'";
190 $have_keys{$_} = 1 foreach keys %$type;
191 for my $key (@need_keys) {
192 defined $type->{$key} or die "$name lacks $key";
193 delete $have_keys{$key};
195 scalar (keys (%have_keys)) == 0
196 or die "$name has superfluous key(s) " . join (', ', keys (%have_keys));
198 push (@types, $type);
203 # Returns the C type of the given type as a string designed to be
204 # prepended to a variable name to produce a declaration. (That won't
205 # work in general but it works well enough for our types.)
208 my ($c_type) = $type->{C_TYPE};
209 defined $c_type or die;
211 # Append a space unless (typically) $c_type ends in `*'.
212 $c_type .= ' ' if $c_type =~ /\w$/;
219 # Parses the entire input.
221 # Initializes %ops, @funcs, @opers.
225 while ($toktype ne 'eof') {
228 $op{OPTIMIZABLE} = 1;
229 $op{UNIMPLEMENTED} = 0;
232 if (match ('extension')) {
234 } elsif (match ('no_opt')) {
235 $op{OPTIMIZABLE} = 0;
236 } elsif (match ('absorb_miss')) {
237 $op{ABSORB_MISS} = 1;
243 $op{RETURNS} = parse_type () || $type{NUMBER};
244 die "$op{RETURNS} is not a valid return type"
245 if !any ($op{RETURNS}, @type{qw (NUMBER STRING BOOLEAN)});
247 $op{CATEGORY} = $token;
248 if (!any ($op{CATEGORY}, qw (operator function))) {
249 die "`operator' or `function' expected at `$token'";
253 my ($name) = force ("id");
255 die "function name may not contain underscore"
256 if $op{CATEGORY} eq 'function' && $name =~ /_/;
257 die "operator name may not contain period"
258 if $op{CATEGORY} eq 'operator' && $name =~ /\./;
260 if (my ($prefix, $suffix) = $name =~ /^(.*)\.(\d+)$/) {
262 $op{MIN_VALID} = $suffix;
263 $op{ABSORB_MISS} = 1;
269 while (!match (')')) {
270 my ($arg) = parse_arg ();
271 push (@{$op{ARGS}}, $arg);
272 if (defined ($arg->{IDX})) {
274 die "array must be last argument";
282 for my $arg (@{$op{ARGS}}) {
283 next if !defined $arg->{CONDITION};
284 my ($any_arg) = join ('|', map ($_->{NAME}, @{$op{ARGS}}));
285 $arg->{CONDITION} =~ s/\b($any_arg)\b/arg_$1/g;
288 my ($opname) = "OP_$op{NAME}";
290 if ($op{CATEGORY} eq 'function') {
291 my ($mangle) = join ('', map ($_->{TYPE}{MANGLE}, @{$op{ARGS}}));
292 $op{MANGLE} = $mangle;
293 $opname .= "_$mangle";
295 $op{OPNAME} = $opname;
297 if ($op{MIN_VALID}) {
298 my ($array_arg) = array_arg (\%op);
299 die "can't have minimum valid count without array arg"
300 if !defined $array_arg;
301 die "minimum valid count allowed only with double array"
302 if $array_arg->{TYPE} ne $type{NUMBER};
303 die "can't have minimum valid count if array has multiplication factor"
304 if $array_arg->{TIMES} != 1;
307 while ($toktype eq 'id') {
308 my ($type) = parse_type () or die "parse error";
309 die "`$type->{NAME}' is not allowed as auxiliary data"
310 unless $type->{ROLE} eq 'leaf' || $type->{ROLE} eq 'fixed';
311 my ($name) = force ("id");
312 push (@{$op{AUX}}, {TYPE => $type, NAME => $name});
316 if ($op{OPTIMIZABLE}) {
317 die "random variate functions must be marked `no_opt'"
318 if $op{NAME} =~ /^RV\./;
319 for my $aux (@{$op{AUX}}) {
320 if (any ($aux->{TYPE}, @type{qw (CASE CASE_IDX)})) {
321 die "operators with $aux->{TYPE} aux data must be "
327 if ($op{RETURNS} eq $type{STRING} && !defined ($op{ABSORB_MISS})) {
329 for my $arg (@{$op{ARGS}}) {
330 if (any ($arg->{TYPE}, @type{qw (NUMBER BOOLEAN)})) {
331 die "$op{NAME} returns string and has double or bool "
332 . "argument, but is not marked ABSORB_MISS";
334 if (defined $arg->{CONDITION}) {
335 die "$op{NAME} returns string but has argument with condition";
340 if ($toktype eq 'block') {
341 $op{BLOCK} = force ('block');
342 } elsif ($toktype eq 'expression') {
343 if ($token eq 'unimplemented') {
344 $op{UNIMPLEMENTED} = 1;
346 $op{EXPRESSION} = $token;
350 die "block or expression expected";
353 die "duplicate operation name $opname" if defined $ops{$opname};
354 $ops{$opname} = \%op;
355 if ($op{CATEGORY} eq 'function') {
356 push (@funcs, $opname);
358 push (@opers, $opname);
363 @funcs = sort {$ops{$a}->{NAME} cmp $ops{$b}->{NAME}
365 $ops{$a}->{OPNAME} cmp $ops{$b}->{OPNAME}}
367 @opers = sort {$ops{$a}->{NAME} cmp $ops{$b}->{NAME}} @opers;
368 our (@order) = (@funcs, @opers);
371 # Reads the next token into $token, $toktype.
375 return if defined ($toktype) && $toktype eq 'eof';
376 $toktype = 'id', return
377 if ($token) = $line =~ /\G([a-zA-Z_][a-zA-Z_.0-9]*)/gc;
378 $toktype = 'int', return if ($token) = $line =~ /\G[0-9]+/gc;
379 $toktype = 'punct', $token = $1, return if $line =~ /\G([][(),*;.])/gc;
380 if ($line =~ /\G=/gc) {
381 $toktype = "expression";
383 $token = accumulate_balanced (';');
384 } elsif ($line =~ /\G\{/gc) {
386 $token = accumulate_balanced ('}');
389 die "bad character `" . substr ($line, pos $line, 1) . "' in input";
393 # Skip whitespace, then return the remainder of the line.
396 die "unexpected end of file" if !defined ($line);
399 last if pos ($line) < length ($line);
401 $token = $toktype = 'eof', return if !defined ($line);
403 return substr ($line, pos ($line));
406 # accumulate_balanced($chars)
408 # Accumulates input until a character in $chars is encountered, except
409 # that balanced pairs of (), [], or {} cause $chars to be ignored.
411 # Returns the input read.
412 sub accumulate_balanced {
418 my ($start) = pos ($line);
419 if ($line =~ /\G([^][(){};,]*)([][(){};,])/gc) {
420 $s .= substr ($line, $start, pos ($line) - $start - 1)
421 if pos ($line) > $start;
422 my ($last) = substr ($line, pos ($line) - 1, 1);
423 if ($last =~ /[[({]/) {
426 } elsif ($last =~ /[])}]/) {
430 } elsif (index ($end, $last) >= 0) {
433 die "unbalanced parentheses";
435 } elsif (index ($end, $last) >= 0) {
442 $s .= substr ($line, pos ($line)) . "\n";
448 # Reads the next line from INPUT into $line.
452 if (defined ($line)) {
459 # If the current token is an identifier that names a type,
460 # returns the type and skips to the next token.
461 # Otherwise, returns undef.
463 if ($toktype eq 'id') {
464 foreach my $type (values (%type)) {
465 get_token (), return $type
466 if defined ($type->{NAME}) && $type->{NAME} eq $token;
474 # Makes sure that $toktype equals $type, reads the next token, and
475 # returns the previous $token.
478 die "parse error at `$token' expecting $type"
479 if $type ne $toktype;
487 # If $token equals $tok, reads the next token and returns true.
488 # Otherwise, returns false.
491 if ($token eq $tok) {
501 # If $token equals $tok, reads the next token.
502 # Otherwise, flags an error in the input.
505 die "parse error at `$token' expecting `$tok'" if !match ($tok);
508 # Parses and returns a function argument.
511 $arg{TYPE} = parse_type () || $type{NUMBER};
512 die "argument name expected at `$token'" if $toktype ne 'id';
515 if (lookahead () =~ /^[[,)]/) {
518 die "only double and string arrays supported"
519 if !any ($arg{TYPE}, @type{qw (NUMBER STRING)});
520 $arg{IDX} = force ('id');
522 $arg{TIMES} = force ('int');
523 die "multiplication factor must be positive"
531 $arg{CONDITION} = $arg{NAME} . ' ' . accumulate_balanced (',)');
541 # Prints the output file header.
545 Generated from $input_file by generate.pl.
551 # Prints the output file trailer.
566 # any($target, @list)
568 # Returns true if $target appears in @list,
571 $_ eq $_[0] and return 1 foreach @_[1...$#_];
575 # make_sysmis_decl($op, $min_valid_src)
577 # Returns a declaration for a boolean variable called `force_sysmis',
578 # which will be true when operation $op should be system-missing.
579 # Returns undef if there are no such circumstances.
581 # If $op has a minimum number of valid arguments, $min_valid_src
582 # should be an an expression that evaluates to the minimum number of
583 # valid arguments for $op.
584 sub make_sysmis_decl {
585 my ($op, $min_valid_src) = @_;
587 if (!$op->{ABSORB_MISS}) {
588 for my $arg (@{$op->{ARGS}}) {
589 my ($arg_name) = "arg_$arg->{NAME}";
590 if (!defined $arg->{IDX}) {
591 if (any ($arg->{TYPE}, @type{qw (NUMBER BOOLEAN)})) {
592 push (@sysmis_cond, "!is_valid ($arg_name)");
594 } elsif ($arg->{TYPE} eq $type{NUMBER}) {
595 my ($a) = "$arg_name";
596 my ($n) = "arg_$arg->{IDX}";
597 push (@sysmis_cond, "count_valid ($a, $n) < $n");
600 } elsif (defined $op->{MIN_VALID}) {
601 my ($args) = $op->{ARGS};
602 my ($arg) = ${$args}[$#{$args}];
603 my ($a) = "arg_$arg->{NAME}";
604 my ($n) = "arg_$arg->{IDX}";
605 push (@sysmis_cond, "count_valid ($a, $n) < $min_valid_src");
607 for my $arg (@{$op->{ARGS}}) {
608 push (@sysmis_cond, "!($arg->{CONDITION})")
609 if defined $arg->{CONDITION};
611 return "bool force_sysmis = " . join (' || ', @sysmis_cond)
618 # If $op has an array argument, return it.
619 # Otherwise, returns undef.
622 my ($args) = $op->{ARGS};
624 my ($last_arg) = $args->[@$args - 1];
625 return $last_arg if defined $last_arg->{IDX};