7 our ($default_output_file) = $0;
8 $default_output_file =~ s/\.pl//;
13 # Initialize type system.
19 our (@funcs, @opers, @order);
29 # Parses the command line.
31 # Initializes $input_file, $output_file.
33 GetOptions ("i|input=s" => \$input_file,
34 "o|output=s" => \$output_file,
35 "h|help" => sub { usage (); })
38 $input_file = "operations.def" if !defined $input_file;
39 $output_file = $default_output_file if !defined $output_file;
41 open (INPUT, "<$input_file") or die "$input_file: open: $!\n";
42 open (OUTPUT, ">$output_file") or die "$output_file: create: $!\n";
49 $0, for generating $default_output_file from definitions
50 usage: generate.pl [-i INPUT] [-o OUTPUT] [-h]
51 -i INPUT input file containing definitions (default: operations.def)
52 -o OUTPUT output file (default: $default_output_file)
53 -h display this help message
63 # Defines all our types.
65 # Initializes %type, @types.
67 # Common user-visible types used throughout evaluation trees.
68 init_type ('number', 'any', C_TYPE => 'double',
69 ATOM => 'number', MANGLE => 'n', HUMAN_NAME => 'number',
70 STACK => 'ns', MISSING_VALUE => 'SYSMIS');
71 init_type ('string', 'any', C_TYPE => 'struct substring',
72 ATOM => 'string', MANGLE => 's', HUMAN_NAME => 'string',
73 STACK => 'ss', MISSING_VALUE => 'empty_string');
74 init_type ('boolean', 'any', C_TYPE => 'double',
75 ATOM => 'number', MANGLE => 'n', HUMAN_NAME => 'boolean',
76 STACK => 'ns', MISSING_VALUE => 'SYSMIS');
79 init_type ('format', 'atom');
80 init_type ('ni_format', 'leaf', C_TYPE => 'const struct fmt_spec *',
81 ATOM => 'format', MANGLE => 'f',
82 HUMAN_NAME => 'num_input_format');
83 init_type ('no_format', 'leaf', C_TYPE => 'const struct fmt_spec *',
84 ATOM => 'format', MANGLE => 'f',
85 HUMAN_NAME => 'num_output_format');
88 init_type ('integer', 'leaf', C_TYPE => 'int',
89 ATOM => 'integer', MANGLE => 'n', HUMAN_NAME => 'integer');
90 init_type ('pos_int', 'leaf', C_TYPE => 'int',
91 ATOM => 'integer', MANGLE => 'n',
92 HUMAN_NAME => 'positive_integer_constant');
95 init_type ('variable', 'atom');
96 init_type ('num_var', 'leaf', C_TYPE => 'const struct variable *',
97 ATOM => 'variable', MANGLE => 'Vn',
98 HUMAN_NAME => 'num_variable');
99 init_type ('str_var', 'leaf', C_TYPE => 'const struct variable *',
100 ATOM => 'variable', MANGLE => 'Vs',
101 HUMAN_NAME => 'string_variable');
102 init_type ('var', 'leaf', C_TYPE => 'const struct variable *',
103 ATOM => 'variable', MANGLE => 'V',
104 HUMAN_NAME => 'variable');
107 init_type ('vector', 'leaf', C_TYPE => 'const struct vector *',
108 ATOM => 'vector', MANGLE => 'v', HUMAN_NAME => 'vector');
111 init_type ('expression', 'fixed', C_TYPE => 'struct expression *',
113 init_type ('case', 'fixed', C_TYPE => 'const struct ccase *',
115 init_type ('case_idx', 'fixed', C_TYPE => 'size_t',
116 FIXED_VALUE => 'case_idx');
117 init_type ('dataset', 'fixed', C_TYPE => 'struct dataset *',
118 FIXED_VALUE => 'ds');
120 # One of these is emitted at the end of each expression as a sentinel
121 # that tells expr_evaluate() to return the value on the stack.
122 init_type ('return_number', 'atom');
123 init_type ('return_string', 'atom');
125 # Used only for debugging purposes.
126 init_type ('operation', 'atom');
129 # init_type has 2 required arguments:
133 # `$name' is the type's name in operations.def.
135 # `OP_$name' is the terminal's type in operations.h.
137 # `expr_allocate_$name()' allocates a node of the given type.
139 # ROLE: How the type may be used:
141 # "any": Usable as operands and function arguments, and
142 # function and operator results.
144 # "leaf": Usable as operands and function arguments, but
145 # not function arguments or results. (Thus, they appear
146 # only in leaf nodes in the parse type.)
148 # "fixed": Not allowed either as an operand or argument
149 # type or a result type. Used only as auxiliary data.
151 # "atom": Not allowed anywhere; just adds the name to
154 # All types except those with "atom" as their role also require:
156 # C_TYPE: The C type that represents this abstract type.
158 # Types with "any" or "leaf" role require:
162 # `$atom' is the `struct operation_data' member name.
164 # get_$atom_name() obtains the corresponding data from a
167 # MANGLE: Short string for name mangling. Use identical strings
168 # if two types should not be overloaded.
170 # HUMAN_NAME: Name for a type when we describe it to the user.
172 # Types with role "any" require:
174 # STACK: Name of the local variable in expr_evaluate(), used for
175 # maintaining the stack for this type.
177 # MISSING_VALUE: Expression used for the missing value of this
180 # Types with role "fixed" require:
182 # FIXED_VALUE: Expression used for the value of this type.
184 my ($name, $role, %rest) = @_;
185 my ($type) = $type{"\U$name"} = {NAME => $name, ROLE => $role, %rest};
187 my (@need_keys) = qw (NAME ROLE);
188 if ($role eq 'any') {
189 push (@need_keys, qw (C_TYPE ATOM MANGLE HUMAN_NAME STACK MISSING_VALUE));
190 } elsif ($role eq 'leaf') {
191 push (@need_keys, qw (C_TYPE ATOM MANGLE HUMAN_NAME));
192 } elsif ($role eq 'fixed') {
193 push (@need_keys, qw (C_TYPE FIXED_VALUE));
194 } elsif ($role eq 'atom') {
196 die "no role `$role'";
200 $have_keys{$_} = 1 foreach keys %$type;
201 for my $key (@need_keys) {
202 defined $type->{$key} or die "$name lacks $key";
203 delete $have_keys{$key};
205 scalar (keys (%have_keys)) == 0
206 or die "$name has superfluous key(s) " . join (', ', keys (%have_keys));
208 push (@types, $type);
213 # Returns the C type of the given type as a string designed to be
214 # prepended to a variable name to produce a declaration. (That won't
215 # work in general but it works well enough for our types.)
218 my ($c_type) = $type->{C_TYPE};
219 defined $c_type or die;
221 # Append a space unless (typically) $c_type ends in `*'.
222 $c_type .= ' ' if $c_type =~ /\w$/;
229 # Parses the entire input.
231 # Initializes %ops, @funcs, @opers.
235 while ($toktype ne 'eof') {
238 $op{OPTIMIZABLE} = 1;
239 $op{UNIMPLEMENTED} = 0;
243 if (match ('extension')) {
245 } elsif (match ('no_opt')) {
246 $op{OPTIMIZABLE} = 0;
247 } elsif (match ('absorb_miss')) {
248 $op{ABSORB_MISS} = 1;
249 } elsif (match ('perm_only')) {
251 } elsif (match ('no_abbrev')) {
258 $op{RETURNS} = parse_type () || $type{NUMBER};
259 die "$op{RETURNS} is not a valid return type"
260 if !any ($op{RETURNS}, @type{qw (NUMBER STRING BOOLEAN)});
262 $op{CATEGORY} = $token;
263 if (!any ($op{CATEGORY}, qw (operator function))) {
264 die "`operator' or `function' expected at `$token'";
268 my ($name) = force ("id");
270 die "function name may not contain underscore"
271 if $op{CATEGORY} eq 'function' && $name =~ /_/;
272 die "operator name may not contain period"
273 if $op{CATEGORY} eq 'operator' && $name =~ /\./;
275 if (my ($prefix, $suffix) = $name =~ /^(.*)\.(\d+)$/) {
277 $op{MIN_VALID} = $suffix;
278 $op{ABSORB_MISS} = 1;
284 while (!match (')')) {
285 my ($arg) = parse_arg ();
286 push (@{$op{ARGS}}, $arg);
287 if (defined ($arg->{IDX})) {
289 die "array must be last argument";
297 for my $arg (@{$op{ARGS}}) {
298 next if !defined $arg->{CONDITION};
299 my ($any_arg) = join ('|', map ($_->{NAME}, @{$op{ARGS}}));
300 $arg->{CONDITION} =~ s/\b($any_arg)\b/arg_$1/g;
303 my ($opname) = "OP_$op{NAME}";
305 if ($op{CATEGORY} eq 'function') {
306 my ($mangle) = join ('', map ($_->{TYPE}{MANGLE}, @{$op{ARGS}}));
307 $op{MANGLE} = $mangle;
308 $opname .= "_$mangle";
310 $op{OPNAME} = $opname;
312 if ($op{MIN_VALID}) {
313 my ($array_arg) = array_arg (\%op);
314 die "can't have minimum valid count without array arg"
315 if !defined $array_arg;
316 die "minimum valid count allowed only with double array"
317 if $array_arg->{TYPE} ne $type{NUMBER};
318 die "can't have minimum valid count if array has multiplication factor"
319 if $array_arg->{TIMES} != 1;
322 while ($toktype eq 'id') {
323 my ($type) = parse_type () or die "parse error";
324 die "`$type->{NAME}' is not allowed as auxiliary data"
325 unless $type->{ROLE} eq 'leaf' || $type->{ROLE} eq 'fixed';
326 my ($name) = force ("id");
327 push (@{$op{AUX}}, {TYPE => $type, NAME => $name});
331 if ($op{OPTIMIZABLE}) {
332 die "random variate functions must be marked `no_opt'"
333 if $op{NAME} =~ /^RV\./;
334 for my $aux (@{$op{AUX}}) {
335 if (any ($aux->{TYPE}, @type{qw (CASE CASE_IDX)})) {
336 die "operators with $aux->{TYPE} aux data must be "
342 if ($op{RETURNS} eq $type{STRING} && !defined ($op{ABSORB_MISS})) {
344 for my $arg (@{$op{ARGS}}) {
345 if (any ($arg->{TYPE}, @type{qw (NUMBER BOOLEAN)})) {
346 die "$op{NAME} returns string and has double or bool "
347 . "argument, but is not marked ABSORB_MISS";
349 if (defined $arg->{CONDITION}) {
350 die "$op{NAME} returns string but has argument with condition";
355 if ($toktype eq 'block') {
356 $op{BLOCK} = force ('block');
357 } elsif ($toktype eq 'expression') {
358 if ($token eq 'unimplemented') {
359 $op{UNIMPLEMENTED} = 1;
361 $op{EXPRESSION} = $token;
365 die "block or expression expected";
368 die "duplicate operation name $opname" if defined $ops{$opname};
369 $ops{$opname} = \%op;
370 if ($op{CATEGORY} eq 'function') {
371 push (@funcs, $opname);
373 push (@opers, $opname);
378 @funcs = sort {$ops{$a}->{NAME} cmp $ops{$b}->{NAME}
380 $ops{$a}->{OPNAME} cmp $ops{$b}->{OPNAME}}
382 @opers = sort {$ops{$a}->{NAME} cmp $ops{$b}->{NAME}} @opers;
383 @order = (@funcs, @opers);
386 # Reads the next token into $token, $toktype.
390 return if defined ($toktype) && $toktype eq 'eof';
391 $toktype = 'id', $token = $1, return
392 if $line =~ /\G([a-zA-Z_][a-zA-Z_.0-9]*)/gc;
393 $toktype = 'int', $token = $1, return if $line =~ /\G([0-9]+)/gc;
394 $toktype = 'punct', $token = $1, return if $line =~ /\G([][(),*;.])/gc;
395 if ($line =~ /\G=/gc) {
396 $toktype = "expression";
398 $token = accumulate_balanced (';');
399 } elsif ($line =~ /\G\{/gc) {
401 $token = accumulate_balanced ('}');
404 die "bad character `" . substr ($line, pos $line, 1) . "' in input";
408 # Skip whitespace, then return the remainder of the line.
411 die "unexpected end of file" if !defined ($line);
414 last if pos ($line) < length ($line);
416 $token = $toktype = 'eof', return if !defined ($line);
418 return substr ($line, pos ($line));
421 # accumulate_balanced($chars)
423 # Accumulates input until a character in $chars is encountered, except
424 # that balanced pairs of (), [], or {} cause $chars to be ignored.
426 # Returns the input read.
427 sub accumulate_balanced {
433 my ($start) = pos ($line);
434 if ($line =~ /\G([^][(){};,]*)([][(){};,])/gc) {
435 $s .= substr ($line, $start, pos ($line) - $start - 1)
436 if pos ($line) > $start;
437 my ($last) = substr ($line, pos ($line) - 1, 1);
438 if ($last =~ /[[({]/) {
441 } elsif ($last =~ /[])}]/) {
445 } elsif (index ($end, $last) >= 0) {
448 die "unbalanced parentheses";
450 } elsif (index ($end, $last) >= 0) {
457 $s .= substr ($line, pos ($line)) . "\n";
463 # Reads the next line from INPUT into $line.
467 if (defined ($line)) {
474 # If the current token is an identifier that names a type,
475 # returns the type and skips to the next token.
476 # Otherwise, returns undef.
478 if ($toktype eq 'id') {
479 foreach my $type (values (%type)) {
480 get_token (), return $type
481 if defined ($type->{NAME}) && $type->{NAME} eq $token;
489 # Makes sure that $toktype equals $type, reads the next token, and
490 # returns the previous $token.
493 die "parse error at `$token' expecting $type"
494 if $type ne $toktype;
502 # If $token equals $tok, reads the next token and returns true.
503 # Otherwise, returns false.
506 if ($token eq $tok) {
516 # If $token equals $tok, reads the next token.
517 # Otherwise, flags an error in the input.
520 die "parse error at `$token' expecting `$tok'" if !match ($tok);
523 # Parses and returns a function argument.
526 $arg{TYPE} = parse_type () || $type{NUMBER};
527 die "argument name expected at `$token'" if $toktype ne 'id';
530 if (lookahead () =~ /^[[,)]/) {
533 die "only double and string arrays supported"
534 if !any ($arg{TYPE}, @type{qw (NUMBER STRING)});
535 $arg{IDX} = force ('id');
537 $arg{TIMES} = force ('int');
538 die "multiplication factor must be positive"
546 $arg{CONDITION} = $arg{NAME} . ' ' . accumulate_balanced (',)');
556 # Prints the output file header.
560 Generated from $input_file by generate.pl.
566 # Prints the output file trailer.
581 # any($target, @list)
583 # Returns true if $target appears in @list,
586 $_ eq $_[0] and return 1 foreach @_[1...$#_];
590 # make_sysmis_decl($op, $min_valid_src)
592 # Returns a declaration for a boolean variable called `force_sysmis',
593 # which will be true when operation $op should be system-missing.
594 # Returns undef if there are no such circumstances.
596 # If $op has a minimum number of valid arguments, $min_valid_src
597 # should be an an expression that evaluates to the minimum number of
598 # valid arguments for $op.
599 sub make_sysmis_decl {
600 my ($op, $min_valid_src) = @_;
602 if (!$op->{ABSORB_MISS}) {
603 for my $arg (@{$op->{ARGS}}) {
604 my ($arg_name) = "arg_$arg->{NAME}";
605 if (!defined $arg->{IDX}) {
606 if (any ($arg->{TYPE}, @type{qw (NUMBER BOOLEAN)})) {
607 push (@sysmis_cond, "!is_valid ($arg_name)");
609 } elsif ($arg->{TYPE} eq $type{NUMBER}) {
610 my ($a) = "$arg_name";
611 my ($n) = "arg_$arg->{IDX}";
612 push (@sysmis_cond, "count_valid ($a, $n) < $n");
615 } elsif (defined $op->{MIN_VALID}) {
616 my ($args) = $op->{ARGS};
617 my ($arg) = ${$args}[$#{$args}];
618 my ($a) = "arg_$arg->{NAME}";
619 my ($n) = "arg_$arg->{IDX}";
620 push (@sysmis_cond, "count_valid ($a, $n) < $min_valid_src");
622 for my $arg (@{$op->{ARGS}}) {
623 push (@sysmis_cond, "!($arg->{CONDITION})")
624 if defined $arg->{CONDITION};
626 return "bool force_sysmis = " . join (' || ', @sysmis_cond)
633 # If $op has an array argument, return it.
634 # Otherwise, returns undef.
637 my ($args) = $op->{ARGS};
639 my ($last_arg) = $args->[@$args - 1];
640 return $last_arg if defined $last_arg->{IDX};