1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
27 #include "algorithm.h"
40 /* Recursive descent parser in order of increasing precedence. */
41 typedef enum expr_type parse_recursively_func (union any_node **);
42 static parse_recursively_func parse_or, parse_and, parse_not;
43 static parse_recursively_func parse_rel, parse_add, parse_mul;
44 static parse_recursively_func parse_neg, parse_exp;
45 static parse_recursively_func parse_primary, parse_function;
47 /* Utility functions. */
48 static const char *expr_type_name (enum expr_type type);
49 static const char *var_type_name (int var_type);
50 static void make_bool (union any_node **n);
51 static union any_node *allocate_nonterminal (int op, union any_node *n);
52 static union any_node *allocate_binary_nonterminal (int op, union any_node *,
54 static union any_node *allocate_num_con (double value);
55 static union any_node *allocate_str_con (const char *string, size_t length);
56 static union any_node *allocate_var_node (int type, struct variable *);
57 static int type_check (union any_node **n,
58 enum expr_type actual_type,
59 enum expr_type expected_type);
61 static algo_compare_func compare_functions;
62 static void init_func_tab (void);
65 static void debug_print_tree (union any_node *, int);
68 /* Public functions. */
71 expr_free (struct expression *e)
81 pool_destroy (e->pool);
86 expr_parse (enum expr_type expected_type)
90 enum expr_type actual_type;
91 int optimize = (expected_type & EXPR_NO_OPTIMIZE) == 0;
93 expected_type &= ~EXPR_NO_OPTIMIZE;
95 /* Make sure the table of functions is initialized. */
98 /* Parse the expression. */
99 actual_type = parse_or (&n);
100 if (actual_type == EXPR_ERROR)
103 /* Enforce type rules. */
104 if (!type_check (&n, actual_type, expected_type))
110 /* Optimize the expression as best we can. */
112 optimize_expression (&n);
114 /* Dump the tree-based expression to a postfix representation for
115 best evaluation speed, and destroy the tree. */
116 e = xmalloc (sizeof *e);
117 e->type = actual_type;
118 dump_expression (n, e);
124 /* Returns the type of EXPR. */
126 expr_get_type (const struct expression *expr)
128 assert (expr != NULL);
133 type_check (union any_node **n, enum expr_type actual_type, enum expr_type expected_type)
135 switch (expected_type)
139 if (actual_type == EXPR_STRING)
141 msg (SE, _("Type mismatch: expression has string type, "
142 "but a numeric value is required here."));
145 if (actual_type == EXPR_NUMERIC && expected_type == EXPR_BOOLEAN)
146 *n = allocate_nonterminal (OP_NUM_TO_BOOL, *n);
150 if (actual_type != EXPR_STRING)
152 msg (SE, _("Type mismatch: expression has numeric type, "
153 "but a string value is required here."));
168 /* Recursive-descent expression parser. */
170 /* Coerces *NODE, of type ACTUAL_TYPE, to type REQUIRED_TYPE, and
171 returns success. If ACTUAL_TYPE cannot be coerced to the
172 desired type then we issue an error message about operator
173 OPERATOR_NAME and free *NODE. */
175 type_coercion (enum expr_type actual_type, enum expr_type required_type,
176 union any_node **node,
177 const char *operator_name)
179 assert (required_type == EXPR_NUMERIC
180 || required_type == EXPR_BOOLEAN
181 || required_type == EXPR_STRING);
183 if (actual_type == required_type)
188 else if (actual_type == EXPR_ERROR)
190 /* Error already reported. */
194 else if (actual_type == EXPR_BOOLEAN && required_type == EXPR_NUMERIC)
196 /* Boolean -> numeric: nothing to do. */
199 else if (actual_type == EXPR_NUMERIC && required_type == EXPR_BOOLEAN)
201 /* Numeric -> Boolean: insert conversion. */
207 /* We want a string and got a number/Boolean, or vice versa. */
208 assert ((actual_type == EXPR_STRING) != (required_type == EXPR_STRING));
210 if (required_type == EXPR_STRING)
211 msg (SE, _("Type mismatch: operands of %s operator must be strings."),
214 msg (SE, _("Type mismatch: operands of %s operator must be numeric."),
225 int token; /* Operator token. */
226 int type; /* Operator node type. */
227 const char *name; /* Operator name. */
230 /* Attempts to match the current token against the tokens for the
231 OP_CNT operators in OPS[]. If successful, returns nonzero
232 and, if OPERATOR is non-null, sets *OPERATOR to the operator.
233 On failure, returns zero and, if OPERATOR is non-null, sets
234 *OPERATOR to a null pointer. */
236 match_operator (const struct operator ops[], size_t op_cnt,
237 const struct operator **operator)
239 const struct operator *op;
241 for (op = ops; op < ops + op_cnt; op++)
243 if (op->token == '-')
244 lex_negative_to_dash ();
245 if (lex_match (op->token))
247 if (operator != NULL)
252 if (operator != NULL)
257 /* Parses a chain of left-associative operator/operand pairs.
258 The operators' operands uniformly must be type REQUIRED_TYPE.
259 There are OP_CNT operators, specified in OPS[]. The next
260 higher level is parsed by PARSE_NEXT_LEVEL. If CHAIN_WARNING
261 is non-null, then it will be issued as a warning if more than
262 one operator/operand pair is parsed. */
263 static enum expr_type
264 parse_binary_operators (union any_node **node,
265 enum expr_type actual_type,
266 enum expr_type required_type,
267 enum expr_type result_type,
268 const struct operator ops[], size_t op_cnt,
269 parse_recursively_func *parse_next_level,
270 const char *chain_warning)
273 const struct operator *operator;
275 if (actual_type == EXPR_ERROR)
278 for (op_count = 0; match_operator (ops, op_cnt, &operator); op_count++)
282 /* Convert the left-hand side to type REQUIRED_TYPE. */
283 if (!type_coercion (actual_type, required_type, node, operator->name))
286 /* Parse the right-hand side and coerce to type
288 if (!type_coercion (parse_next_level (&rhs), required_type,
289 &rhs, operator->name))
295 *node = allocate_binary_nonterminal (operator->type, *node, rhs);
297 /* The result is of type RESULT_TYPE. */
298 actual_type = result_type;
301 if (op_count > 1 && chain_warning != NULL)
302 msg (SW, chain_warning);
307 static enum expr_type
308 parse_inverting_unary_operator (union any_node **node,
309 enum expr_type required_type,
310 const struct operator *operator,
311 parse_recursively_func *parse_next_level)
316 while (match_operator (operator, 1, NULL))
319 return parse_next_level (node);
321 if (!type_coercion (parse_next_level (node), required_type,
322 node, operator->name))
324 if (op_count % 2 != 0)
325 *node = allocate_nonterminal (operator->type, *node);
326 return required_type;
329 /* Parses the OR level. */
330 static enum expr_type
331 parse_or (union any_node **n)
333 static const struct operator ops[] =
335 { T_OR, OP_OR, "logical disjunction (\"OR\")" },
338 return parse_binary_operators (n, parse_and (n), EXPR_BOOLEAN, EXPR_BOOLEAN,
339 ops, sizeof ops / sizeof *ops,
343 /* Parses the AND level. */
344 static enum expr_type
345 parse_and (union any_node ** n)
347 static const struct operator ops[] =
349 { T_AND, OP_AND, "logical conjunction (\"AND\")" },
352 return parse_binary_operators (n, parse_not (n), EXPR_BOOLEAN, EXPR_BOOLEAN,
353 ops, sizeof ops / sizeof *ops,
357 /* Parses the NOT level. */
358 static enum expr_type
359 parse_not (union any_node ** n)
361 static const struct operator op
362 = { T_NOT, OP_NOT, "logical negation (\"NOT-\")" };
363 return parse_inverting_unary_operator (n, EXPR_BOOLEAN, &op, parse_rel);
366 /* Parse relational operators. */
367 static enum expr_type
368 parse_rel (union any_node **n)
370 static const struct operator numeric_ops[] =
372 { '=', OP_EQ, "numeric equality (\"=\")" },
373 { T_EQ, OP_EQ, "numeric equality (\"EQ\")" },
374 { T_GE, OP_GE, "numeric greater-than-or-equal-to (\">=\")" },
375 { T_GT, OP_GT, "numeric greater than (\">\")" },
376 { T_LE, OP_LE, "numeric less-than-or-equal-to (\"<=\")" },
377 { T_LT, OP_LT, "numeric less than (\"<\")" },
378 { T_NE, OP_NE, "numeric inequality (\"<>\")" },
381 static const struct operator string_ops[] =
383 { '=', OP_EQ_STRING, "string equality (\"=\")" },
384 { T_EQ, OP_EQ_STRING, "string equality (\"EQ\")" },
385 { T_GE, OP_GE_STRING, "string greater-than-or-equal-to (\">=\")" },
386 { T_GT, OP_GT_STRING, "string greater than (\">\")" },
387 { T_LE, OP_LE_STRING, "string less-than-or-equal-to (\"<=\")" },
388 { T_LT, OP_LT_STRING, "string less than (\"<\")" },
389 { T_NE, OP_NE_STRING, "string inequality (\"<>\")" },
392 int type = parse_add (n);
394 const char *chain_warning =
395 _("Chaining relational operators (e.g. \"a < b < c\") will "
396 "not produce the mathematically expected result. "
397 "Use the AND logical operator to fix the problem "
398 "(e.g. \"a < b AND b < c\"). "
399 "If chaining is really intended, parentheses will disable "
400 "this warning (e.g. \"(a < b) < c\".)");
409 return parse_binary_operators (n,
410 type, EXPR_NUMERIC, EXPR_BOOLEAN,
412 sizeof numeric_ops / sizeof *numeric_ops,
413 parse_add, chain_warning);
416 return parse_binary_operators (n,
417 type, EXPR_STRING, EXPR_BOOLEAN,
419 sizeof string_ops / sizeof *string_ops,
420 parse_add, chain_warning);
428 /* Parses the addition and subtraction level. */
429 static enum expr_type
430 parse_add (union any_node **n)
432 static const struct operator ops[] =
434 { '+', OP_ADD, "addition (\"+\")" },
435 { '-', OP_SUB, "subtraction (\"-\")-" },
438 return parse_binary_operators (n, parse_mul (n), EXPR_NUMERIC, EXPR_NUMERIC,
439 ops, sizeof ops / sizeof *ops,
443 /* Parses the multiplication and division level. */
444 static enum expr_type
445 parse_mul (union any_node ** n)
447 static const struct operator ops[] =
449 { '*', OP_MUL, "multiplication (\"*\")" },
450 { '/', OP_DIV, "division (\"/\")" },
453 return parse_binary_operators (n, parse_neg (n), EXPR_NUMERIC, EXPR_NUMERIC,
454 ops, sizeof ops / sizeof *ops,
458 /* Parses the unary minus level. */
459 static enum expr_type
460 parse_neg (union any_node **n)
462 static const struct operator op = { '-', OP_NEG, "negation (\"-\")" };
463 return parse_inverting_unary_operator (n, EXPR_NUMERIC, &op, parse_exp);
466 static enum expr_type
467 parse_exp (union any_node **n)
469 static const struct operator ops[] =
471 { T_EXP, OP_POW, "exponentiation (\"**\")" },
474 const char *chain_warning =
475 _("The exponentiation operator (\"**\") is left-associative, "
476 "even though right-associative semantics are more useful. "
477 "That is, \"a**b**c\" equals \"(a**b)**c\", not as \"a**(b**c)\". "
478 "To disable this warning, insert parentheses.");
480 return parse_binary_operators (n,
481 parse_primary (n), EXPR_NUMERIC, EXPR_NUMERIC,
482 ops, sizeof ops / sizeof *ops,
483 parse_primary, chain_warning);
486 /* Parses system variables. */
487 static enum expr_type
488 parse_sysvar (union any_node **n)
490 if (!strcmp (tokid, "$CASENUM"))
492 *n = xmalloc (sizeof (struct casenum_node));
493 (*n)->casenum.type = OP_CASENUM;
496 else if (!strcmp (tokid, "$DATE"))
498 static const char *months[12] =
500 "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
501 "JUL", "AUG", "SEP", "OCT", "NOV", "DEC",
507 time = localtime (&last_vfm_invocation);
508 sprintf (temp_buf, "%02d %s %02d", abs (time->tm_mday) % 100,
509 months[abs (time->tm_mon) % 12], abs (time->tm_year) % 100);
511 *n = xmalloc (sizeof (struct str_con_node) + 8);
512 (*n)->str_con.type = OP_STR_CON;
513 (*n)->str_con.len = 9;
514 memcpy ((*n)->str_con.s, temp_buf, 9);
523 if (!strcmp (tokid, "$TRUE"))
528 else if (!strcmp (tokid, "$FALSE"))
533 else if (!strcmp (tokid, "$SYSMIS"))
535 else if (!strcmp (tokid, "$JDATE"))
537 struct tm *time = localtime (&last_vfm_invocation);
538 d = yrmoda (time->tm_year + 1900, time->tm_mon + 1, time->tm_mday);
540 else if (!strcmp (tokid, "$TIME"))
543 time = localtime (&last_vfm_invocation);
544 d = (yrmoda (time->tm_year + 1900, time->tm_mon + 1,
545 time->tm_mday) * 60. * 60. * 24.
546 + time->tm_hour * 60 * 60.
550 else if (!strcmp (tokid, "$LENGTH"))
551 d = get_viewlength ();
552 else if (!strcmp (tokid, "$WIDTH"))
553 d = get_viewwidth ();
556 msg (SE, _("Unknown system variable %s."), tokid);
560 *n = xmalloc (sizeof (struct num_con_node));
561 (*n)->num_con.type = OP_NUM_CON;
562 (*n)->num_con.value = d;
567 /* Parses numbers, varnames, etc. */
568 static enum expr_type
569 parse_primary (union any_node **n)
577 /* An identifier followed by a left parenthesis is a function
579 if (lex_look_ahead () == '(')
580 return parse_function (n);
582 /* $ at the beginning indicates a system variable. */
585 enum expr_type type = parse_sysvar (n);
590 /* Otherwise, it must be a user variable. */
591 v = dict_lookup_var (default_dict, tokid);
595 lex_error (_("expecting variable name"));
599 if (v->type == NUMERIC)
601 *n = allocate_var_node (OP_NUM_VAR, v);
606 *n = allocate_var_node (OP_STR_VAR, v);
612 *n = allocate_num_con (tokval);
618 *n = allocate_str_con (ds_value (&tokstr), ds_length (&tokstr));
628 if (!lex_match (')'))
630 lex_error (_("expecting `)'"));
638 lex_error (_("in expression"));
643 /* Individual function parsing. */
649 enum expr_type (*func) (const struct function *, int, union any_node **);
652 static struct function func_tab[];
653 static int func_count;
655 static int get_num_args (const struct function *, int, union any_node **);
657 static enum expr_type
658 unary_func (const struct function *f, int x UNUSED, union any_node ** n)
660 if (!get_num_args (f, 1, n))
665 static enum expr_type
666 binary_func (const struct function *f, int x UNUSED, union any_node ** n)
668 if (!get_num_args (f, 2, n))
673 static enum expr_type
674 ternary_func (const struct function *f, int x UNUSED, union any_node **n)
676 if (!get_num_args (f, 3, n))
681 static enum expr_type
682 MISSING_func (const struct function *f, int x UNUSED, union any_node **n)
684 if (!get_num_args (f, 1, n))
689 static enum expr_type
690 SYSMIS_func (const struct function *f, int x UNUSED, union any_node **n)
692 if (!get_num_args (f, 1, n))
694 if ((*n)->nonterm.arg[0]->type == OP_NUM_VAR)
696 struct variable *v = (*n)->nonterm.arg[0]->var.v;
698 *n = allocate_var_node (OP_NUM_SYS, v);
703 static enum expr_type
704 VALUE_func (const struct function *f UNUSED, int x UNUSED, union any_node **n)
706 struct variable *v = parse_variable ();
710 if (v->type == NUMERIC)
712 *n = allocate_var_node (OP_NUM_VAL, v);
717 *n = allocate_var_node (OP_STR_VAR, v);
722 static enum expr_type
723 LAG_func (const struct function *f UNUSED, int x UNUSED, union any_node **n)
725 struct variable *v = parse_variable ();
732 if (!lex_integer_p () || lex_integer () <= 0 || lex_integer () > 1000)
734 msg (SE, _("Argument 2 to LAG must be a small positive "
735 "integer constant."));
739 nlag = lex_integer ();
742 n_lag = max (nlag, n_lag);
743 *n = xmalloc (sizeof (struct lag_node));
744 (*n)->lag.type = (v->type == NUMERIC ? OP_NUM_LAG : OP_STR_LAG);
746 (*n)->lag.lag = nlag;
747 return (v->type == NUMERIC ? EXPR_NUMERIC : EXPR_STRING);
750 /* This screwball function parses n-ary operators:
752 1. NMISS, NVALID, SUM, MEAN: any number of numeric
755 2. SD, VARIANCE, CFVAR: at least two numeric arguments.
757 3. RANGE: An odd number of arguments, but at least three, and
758 all of the same type.
760 4. ANY: At least two arguments, all of the same type.
762 5. MIN, MAX: Any number of arguments, all of the same type.
764 static enum expr_type
765 nary_num_func (const struct function *f, int min_args, union any_node **n)
767 /* Argument number of current argument (used for error messages). */
770 /* Number of arguments. */
773 /* Number of arguments allocated. */
776 /* Type of arguments. */
777 int type = (f->t == OP_ANY || f->t == OP_RANGE
778 || f->t == OP_MIN || f->t == OP_MAX) ? -1 : NUMERIC;
780 *n = xmalloc (sizeof (struct nonterm_node) + sizeof (union any_node *[15]));
781 (*n)->nonterm.type = f->t;
785 /* Special case: vara TO varb. */
787 /* FIXME: Is this condition failsafe? Can we _ever_ have two
788 juxtaposed identifiers otherwise? */
789 if (token == T_ID && dict_lookup_var (default_dict, tokid) != NULL
790 && toupper (lex_look_ahead ()) == 'T')
795 int opts = PV_SINGLE;
799 else if (type == ALPHA)
801 if (!parse_variables (default_dict, &v, &nv, opts))
803 if (nv + (*n)->nonterm.n >= m)
806 *n = xrealloc (*n, (sizeof (struct nonterm_node)
807 + (m - 1) * sizeof (union any_node *)));
812 for (j = 1; j < nv; j++)
813 if (type != v[j]->type)
815 msg (SE, _("Type mismatch in argument %d of %s, which was "
816 "expected to be of %s type. It was actually "
818 arg_idx, f->s, var_type_name (type), var_type_name (v[j]->type));
823 for (j = 0; j < nv; j++)
825 union any_node **c = &(*n)->nonterm.arg[(*n)->nonterm.n++];
826 *c = allocate_var_node ((type == NUMERIC
827 ? OP_NUM_VAR : OP_STR_VAR),
834 int t = parse_or (&c);
838 if (t == EXPR_BOOLEAN)
841 msg (SE, _("%s cannot take Boolean operands."), f->s);
846 if (t == EXPR_NUMERIC)
848 else if (t == EXPR_STRING)
851 else if ((t == EXPR_NUMERIC) ^ (type == NUMERIC))
854 msg (SE, _("Type mismatch in argument %d of %s, which was "
855 "expected to be of %s type. It was actually "
857 arg_idx, f->s, var_type_name (type), expr_type_name (t));
860 if ((*n)->nonterm.n + 1 >= m)
863 *n = xrealloc (*n, (sizeof (struct nonterm_node)
864 + (m - 1) * sizeof (union any_node *)));
866 (*n)->nonterm.arg[(*n)->nonterm.n++] = c;
871 if (!lex_match (','))
873 lex_error (_("in function call"));
879 *n = xrealloc (*n, (sizeof (struct nonterm_node)
880 + ((*n)->nonterm.n) * sizeof (union any_node *)));
882 nargs = (*n)->nonterm.n;
883 if (f->t == OP_RANGE)
885 if (nargs < 3 || (nargs & 1) == 0)
887 msg (SE, _("RANGE requires an odd number of arguments, but "
892 else if (f->t == OP_SD || f->t == OP_VARIANCE
893 || f->t == OP_CFVAR || f->t == OP_ANY)
897 msg (SE, _("%s requires at least two arguments."), f->s);
902 if (f->t == OP_CFVAR || f->t == OP_SD || f->t == OP_VARIANCE)
903 min_args = max (min_args, 2);
905 min_args = max (min_args, 1);
907 /* Yes, this is admittedly a terrible crock, but it works. */
908 (*n)->nonterm.arg[(*n)->nonterm.n] = (union any_node *) min_args;
910 if (min_args > nargs)
912 msg (SE, _("%s.%d requires at least %d arguments."),
913 f->s, min_args, min_args);
917 if (f->t == OP_MIN || f->t == OP_MAX)
922 (*n)->type = OP_MIN_STRING;
923 else if (f->t == OP_MAX)
924 (*n)->type = OP_MAX_STRING;
932 else if (f->t == OP_ANY || f->t == OP_RANGE)
937 (*n)->type = OP_ANY_STRING;
938 else if (f->t == OP_RANGE)
939 (*n)->type = OP_RANGE_STRING;
953 static enum expr_type
954 CONCAT_func (const struct function *f UNUSED, int x UNUSED, union any_node **n)
960 *n = xmalloc (sizeof (struct nonterm_node) + sizeof (union any_node *[15]));
961 (*n)->nonterm.type = OP_CONCAT;
965 if ((*n)->nonterm.n >= m)
968 *n = xrealloc (*n, (sizeof (struct nonterm_node)
969 + (m - 1) * sizeof (union any_node *)));
971 type = parse_or (&(*n)->nonterm.arg[(*n)->nonterm.n]);
972 if (type == EXPR_ERROR)
975 if (type != EXPR_STRING)
977 msg (SE, _("Argument %d to CONCAT is type %s. All arguments "
978 "to CONCAT must be strings."),
979 (*n)->nonterm.n + 1, expr_type_name (type));
983 if (!lex_match (','))
986 *n = xrealloc (*n, (sizeof (struct nonterm_node)
987 + ((*n)->nonterm.n - 1) * sizeof (union any_node *)));
995 /* Parses a string function according to f->desc. f->desc[0] is the
996 return type of the function. Succeeding characters represent
997 successive args. Optional args are separated from the required
998 args by a slash (`/'). Codes are `n', numeric arg; `s', string
999 arg; and `f', format spec (this must be the last arg). If the
1000 optional args are included, the type becomes f->t+1. */
1001 static enum expr_type
1002 generic_str_func (const struct function *f, int x UNUSED, union any_node **n)
1004 struct string_function
1007 enum expr_type return_type;
1008 const char *arg_types;
1011 static const struct string_function string_func_tab[] =
1013 {OP_INDEX_2, OP_INDEX_3, EXPR_NUMERIC, "ssN"},
1014 {OP_RINDEX_2, OP_RINDEX_3, EXPR_NUMERIC, "ssN"},
1015 {OP_LENGTH, 0, EXPR_NUMERIC, "s"},
1016 {OP_LOWER, 0, EXPR_STRING, "s"},
1017 {OP_UPPER, 0, EXPR_STRING, "s"},
1018 {OP_LPAD, 0, EXPR_STRING, "snS"},
1019 {OP_RPAD, 0, EXPR_STRING, "snS"},
1020 {OP_LTRIM, 0, EXPR_STRING, "sS"},
1021 {OP_RTRIM, 0, EXPR_STRING, "sS"},
1022 {OP_NUMBER, 0, EXPR_NUMERIC, "sf"},
1023 {OP_STRING, 0, EXPR_STRING, "nf"},
1024 {OP_SUBSTR_2, OP_SUBSTR_3, EXPR_STRING, "snN"},
1027 const int string_func_cnt = sizeof string_func_tab / sizeof *string_func_tab;
1029 const struct string_function *sf;
1032 struct nonterm_node *nonterm;
1034 /* Find string_function that corresponds to f. */
1035 for (sf = string_func_tab; sf < string_func_tab + string_func_cnt; sf++)
1038 assert (sf < string_func_tab + string_func_cnt);
1040 /* Count max number of arguments. */
1042 for (cp = sf->arg_types; *cp != '\0'; cp++)
1050 /* Allocate node. */
1051 *n = xmalloc (sizeof (struct nonterm_node)
1052 + (arg_cnt - 1) * sizeof (union any_node *));
1053 nonterm = &(*n)->nonterm;
1054 nonterm->type = sf->t1;
1057 /* Parse arguments. */
1061 if (*cp == 'n' || *cp == 's' || *cp == 'N' || *cp == 'S')
1063 enum expr_type wanted_type
1064 = *cp == 'n' || *cp == 'N' ? EXPR_NUMERIC : EXPR_STRING;
1065 enum expr_type actual_type = parse_or (&nonterm->arg[nonterm->n]);
1067 if (actual_type == EXPR_ERROR)
1069 else if (actual_type == EXPR_BOOLEAN)
1070 actual_type = EXPR_NUMERIC;
1072 if (actual_type != wanted_type)
1074 msg (SE, _("Argument %d to %s was expected to be of %s type. "
1075 "It was actually of type %s."),
1076 nonterm->n + 1, f->s,
1077 expr_type_name (actual_type), expr_type_name (wanted_type));
1081 else if (*cp == 'f')
1083 /* This is always the very last argument. Also, this code
1084 is a crock. However, it works. */
1085 struct fmt_spec fmt;
1087 if (!parse_format_specifier (&fmt, 0))
1089 if (formats[fmt.type].cat & FCAT_STRING)
1091 msg (SE, _("%s is not a numeric format."), fmt_to_string (&fmt));
1094 nonterm->arg[nonterm->n + 0] = (union any_node *) fmt.type;
1095 nonterm->arg[nonterm->n + 1] = (union any_node *) fmt.w;
1096 nonterm->arg[nonterm->n + 2] = (union any_node *) fmt.d;
1102 /* We're done if no args are left. */
1107 /* Optional arguments are named with capital letters. */
1108 if (isupper ((unsigned char) *cp))
1110 if (!lex_match (','))
1115 nonterm->arg[nonterm->n++] = allocate_num_con (SYSMIS);
1116 else if (*cp == 'S')
1117 nonterm->arg[nonterm->n++] = allocate_str_con (" ", 1);
1125 nonterm->type = sf->t2;
1127 else if (!lex_match (','))
1129 msg (SE, _("Too few arguments to function %s."), f->s);
1134 return sf->return_type;
1141 /* General function parsing. */
1144 get_num_args (const struct function *f, int num_args, union any_node **n)
1149 *n = xmalloc (sizeof (struct nonterm_node)
1150 + (num_args - 1) * sizeof (union any_node *));
1151 (*n)->nonterm.type = f->t;
1152 (*n)->nonterm.n = 0;
1155 t = parse_or (&(*n)->nonterm.arg[i]);
1156 if (t == EXPR_ERROR)
1160 if (t == EXPR_STRING)
1162 msg (SE, _("Type mismatch in argument %d of %s. A string "
1163 "expression was supplied where only a numeric expression "
1168 if (++i >= num_args)
1170 if (!lex_match (','))
1172 msg (SE, _("Missing comma following argument %d of %s."), i + 1, f->s);
1182 static enum expr_type
1183 parse_function (union any_node ** n)
1185 const struct function *fp;
1186 char fname[32], *cp;
1189 const struct vector *v;
1191 /* Check for a vector with this name. */
1192 v = dict_lookup_vector (default_dict, tokid);
1196 assert (token == '(');
1199 *n = xmalloc (sizeof (struct nonterm_node)
1200 + sizeof (union any_node *[2]));
1201 (*n)->nonterm.type = (v->var[0]->type == NUMERIC
1202 ? OP_VEC_ELEM_NUM : OP_VEC_ELEM_STR);
1203 (*n)->nonterm.n = 0;
1205 t = parse_or (&(*n)->nonterm.arg[0]);
1206 if (t == EXPR_ERROR)
1208 if (t != EXPR_NUMERIC)
1210 msg (SE, _("The index value after a vector name must be numeric."));
1215 if (!lex_match (')'))
1217 msg (SE, _("`)' expected after a vector index value."));
1220 ((*n)->nonterm.arg[1]) = (union any_node *) v->idx;
1222 return v->var[0]->type == NUMERIC ? EXPR_NUMERIC : EXPR_STRING;
1225 ds_truncate (&tokstr, 31);
1226 strcpy (fname, ds_value (&tokstr));
1227 cp = strrchr (fname, '.');
1228 if (cp && isdigit ((unsigned char) cp[1]))
1230 min_args = atoi (&cp[1]);
1237 if (!lex_force_match ('('))
1244 fp = binary_search (func_tab, func_count, sizeof *func_tab, &f,
1245 compare_functions, NULL);
1250 msg (SE, _("There is no function named %s."), fname);
1253 if (min_args && fp->func != nary_num_func)
1255 msg (SE, _("Function %s may not be given a minimum number of "
1256 "arguments."), fname);
1259 t = fp->func (fp, min_args, n);
1260 if (t == EXPR_ERROR)
1262 if (!lex_match (')'))
1264 lex_error (_("expecting `)' after %s function"), fname);
1275 /* Utility functions. */
1278 expr_type_name (enum expr_type type)
1286 return _("Boolean");
1289 return _("numeric");
1301 var_type_name (int type)
1306 return _("numeric");
1316 make_bool (union any_node **n)
1320 c = xmalloc (sizeof (struct nonterm_node));
1321 c->nonterm.type = OP_NUM_TO_BOOL;
1323 c->nonterm.arg[0] = *n;
1328 free_node (union any_node *n)
1332 if (IS_NONTERMINAL (n->type))
1336 for (i = 0; i < n->nonterm.n; i++)
1337 free_node (n->nonterm.arg[i]);
1343 static union any_node *
1344 allocate_num_con (double value)
1348 c = xmalloc (sizeof (struct num_con_node));
1349 c->num_con.type = OP_NUM_CON;
1350 c->num_con.value = value;
1355 static union any_node *
1356 allocate_str_con (const char *string, size_t length)
1360 c = xmalloc (sizeof (struct str_con_node) + length - 1);
1361 c->str_con.type = OP_STR_CON;
1362 c->str_con.len = length;
1363 memcpy (c->str_con.s, string, length);
1368 static union any_node *
1369 allocate_var_node (int type, struct variable *variable)
1373 c = xmalloc (sizeof (struct var_node));
1375 c->var.v = variable;
1381 allocate_nonterminal (int op, union any_node *n)
1385 c = xmalloc (sizeof c->nonterm);
1386 c->nonterm.type = op;
1388 c->nonterm.arg[0] = n;
1393 static union any_node *
1394 allocate_binary_nonterminal (int op, union any_node *lhs, union any_node *rhs)
1396 union any_node *node;
1398 node = xmalloc (sizeof node->nonterm + sizeof *node->nonterm.arg);
1399 node->nonterm.type = op;
1400 node->nonterm.n = 2;
1401 node->nonterm.arg[0] = lhs;
1402 node->nonterm.arg[1] = rhs;
1407 static struct function func_tab[] =
1409 {"ABS", OP_ABS, unary_func},
1410 {"ACOS", OP_ARCOS, unary_func},
1411 {"ARCOS", OP_ARCOS, unary_func},
1412 {"ARSIN", OP_ARSIN, unary_func},
1413 {"ARTAN", OP_ARTAN, unary_func},
1414 {"ASIN", OP_ARSIN, unary_func},
1415 {"ATAN", OP_ARTAN, unary_func},
1416 {"COS", OP_COS, unary_func},
1417 {"EXP", OP_EXP, unary_func},
1418 {"LG10", OP_LG10, unary_func},
1419 {"LN", OP_LN, unary_func},
1420 {"MOD10", OP_MOD10, unary_func},
1421 {"NORMAL", OP_NORMAL, unary_func},
1422 {"RND", OP_RND, unary_func},
1423 {"SIN", OP_SIN, unary_func},
1424 {"SQRT", OP_SQRT, unary_func},
1425 {"TAN", OP_TAN, unary_func},
1426 {"TRUNC", OP_TRUNC, unary_func},
1427 {"UNIFORM", OP_UNIFORM, unary_func},
1429 {"TIME.DAYS", OP_TIME_DAYS, unary_func},
1430 {"TIME.HMS", OP_TIME_HMS, ternary_func},
1432 {"CTIME.DAYS", OP_CTIME_DAYS, unary_func},
1433 {"CTIME.HOURS", OP_CTIME_HOURS, unary_func},
1434 {"CTIME.MINUTES", OP_CTIME_MINUTES, unary_func},
1435 {"CTIME.SECONDS", OP_CTIME_SECONDS, unary_func},
1437 {"DATE.DMY", OP_DATE_DMY, ternary_func},
1438 {"DATE.MDY", OP_DATE_MDY, ternary_func},
1439 {"DATE.MOYR", OP_DATE_MOYR, binary_func},
1440 {"DATE.QYR", OP_DATE_QYR, binary_func},
1441 {"DATE.WKYR", OP_DATE_WKYR, binary_func},
1442 {"DATE.YRDAY", OP_DATE_YRDAY, binary_func},
1444 {"XDATE.DATE", OP_XDATE_DATE, unary_func},
1445 {"XDATE.HOUR", OP_XDATE_HOUR, unary_func},
1446 {"XDATE.JDAY", OP_XDATE_JDAY, unary_func},
1447 {"XDATE.MDAY", OP_XDATE_MDAY, unary_func},
1448 {"XDATE.MINUTE", OP_XDATE_MINUTE, unary_func},
1449 {"XDATE.MONTH", OP_XDATE_MONTH, unary_func},
1450 {"XDATE.QUARTER", OP_XDATE_QUARTER, unary_func},
1451 {"XDATE.SECOND", OP_XDATE_SECOND, unary_func},
1452 {"XDATE.TDAY", OP_XDATE_TDAY, unary_func},
1453 {"XDATE.TIME", OP_XDATE_TIME, unary_func},
1454 {"XDATE.WEEK", OP_XDATE_WEEK, unary_func},
1455 {"XDATE.WKDAY", OP_XDATE_WKDAY, unary_func},
1456 {"XDATE.YEAR", OP_XDATE_YEAR, unary_func},
1458 {"MISSING", OP_SYSMIS, MISSING_func},
1459 {"MOD", OP_MOD, binary_func},
1460 {"SYSMIS", OP_SYSMIS, SYSMIS_func},
1461 {"VALUE", OP_NUM_VAL, VALUE_func},
1462 {"LAG", OP_NUM_LAG, LAG_func},
1463 {"YRMODA", OP_YRMODA, ternary_func},
1465 {"ANY", OP_ANY, nary_num_func},
1466 {"CFVAR", OP_CFVAR, nary_num_func},
1467 {"MAX", OP_MAX, nary_num_func},
1468 {"MEAN", OP_MEAN, nary_num_func},
1469 {"MIN", OP_MIN, nary_num_func},
1470 {"NMISS", OP_NMISS, nary_num_func},
1471 {"NVALID", OP_NVALID, nary_num_func},
1472 {"RANGE", OP_RANGE, nary_num_func},
1473 {"SD", OP_SD, nary_num_func},
1474 {"SUM", OP_SUM, nary_num_func},
1475 {"VAR", OP_VARIANCE, nary_num_func},
1476 {"VARIANCE", OP_VARIANCE, nary_num_func},
1478 {"CONCAT", OP_CONCAT, CONCAT_func},
1480 {"INDEX", OP_INDEX_2, generic_str_func},
1481 {"RINDEX", OP_RINDEX_2, generic_str_func},
1482 {"LENGTH", OP_LENGTH, generic_str_func},
1483 {"LOWER", OP_LOWER, generic_str_func},
1484 {"UPCASE", OP_UPPER, generic_str_func},
1485 {"LPAD", OP_LPAD, generic_str_func},
1486 {"RPAD", OP_RPAD, generic_str_func},
1487 {"LTRIM", OP_LTRIM, generic_str_func},
1488 {"RTRIM", OP_RTRIM, generic_str_func},
1489 {"NUMBER", OP_NUMBER, generic_str_func},
1490 {"STRING", OP_STRING, generic_str_func},
1491 {"SUBSTR", OP_SUBSTR_2, generic_str_func},
1494 /* An algo_compare_func that compares functions A and B based on
1497 compare_functions (const void *a_, const void *b_, void *aux UNUSED)
1499 const struct function *a = a_;
1500 const struct function *b = b_;
1502 return strcmp (a->s, b->s);
1506 init_func_tab (void)
1516 func_count = sizeof func_tab / sizeof *func_tab;
1517 sort (func_tab, func_count, sizeof *func_tab, compare_functions, NULL);
1524 print_type (union any_node * n)
1529 s = ops[n->type].name;
1531 if (ops[n->type].flags & OP_MIN_ARGS)
1532 printf ("%s.%d\n", s, (int) n->nonterm.arg[n->nonterm.n]);
1533 else if (ops[n->type].flags & OP_FMT_SPEC)
1537 f.type = (int) n->nonterm.arg[n->nonterm.n + 0];
1538 f.w = (int) n->nonterm.arg[n->nonterm.n + 1];
1539 f.d = (int) n->nonterm.arg[n->nonterm.n + 2];
1540 printf ("%s(%s)\n", s, fmt_to_string (&f));
1547 debug_print_tree (union any_node * n, int level)
1550 for (i = 0; i < level; i++)
1552 if (n->type < OP_TERMINAL)
1555 for (i = 0; i < n->nonterm.n; i++)
1556 debug_print_tree (n->nonterm.arg[i], level + 1);
1563 printf (_("!!TERMINAL!!"));
1566 if (n->num_con.value == SYSMIS)
1569 printf ("%f", n->num_con.value);
1572 printf ("\"%.*s\"", n->str_con.len, n->str_con.s);
1576 printf ("%s", n->var.v->name);
1580 printf ("LAG(%s,%d)", n->lag.v->name, n->lag.lag);
1583 printf ("SYSMIS(%s)", n->var.v->name);
1586 printf ("VALUE(%s)", n->var.v->name);
1589 printf (_("!!SENTINEL!!"));
1592 printf (_("!!ERROR%d!!"), n->type);
1598 #endif /* DEBUGGING */
1601 expr_debug_print_postfix (const struct expression *e)
1603 const unsigned char *o;
1604 const double *num = e->num;
1605 const unsigned char *str = e->str;
1606 struct variable *const *v = e->var;
1609 printf ("postfix:");
1610 for (o = e->op; *o != OP_SENTINEL;)
1613 if (IS_NONTERMINAL (t))
1615 printf (" %s", ops[t].name);
1617 if (ops[t].flags & OP_VAR_ARGS)
1619 printf ("(%d)", *o);
1622 if (ops[t].flags & OP_MIN_ARGS)
1627 if (ops[t].flags & OP_FMT_SPEC)
1630 f.type = (int) *o++;
1633 printf ("(%s)", fmt_to_string (&f));
1636 else if (t == OP_NUM_CON)
1641 printf (" %f", *num);
1644 else if (t == OP_STR_CON)
1646 printf (" \"%.*s\"", *str, &str[1]);
1649 else if (t == OP_NUM_VAR || t == OP_STR_VAR)
1651 printf (" %s", (*v)->name);
1654 else if (t == OP_NUM_SYS)
1656 printf (" SYSMIS(#%d)", *o);
1659 else if (t == OP_NUM_VAL)
1661 printf (" VALUE(#%d)", *o);
1664 else if (t == OP_NUM_LAG || t == OP_STR_LAG)
1666 printf (" LAG(%s,%d)", (*v)->name, *o);
1672 printf ("%d unknown\n", t);
1679 #define DEFINE_OPERATOR(NAME, STACK_DELTA, FLAGS, ARGS) \
1680 {#NAME, STACK_DELTA, FLAGS, ARGS},
1681 struct op_desc ops[OP_SENTINEL] =
1686 #include "command.h"
1689 cmd_debug_evaluate (void)
1691 struct expression *expr;
1693 enum expr_type expr_flags;
1694 int dump_postfix = 0;
1696 discard_variables ();
1699 if (lex_match_id ("NOOPTIMIZE"))
1700 expr_flags |= EXPR_NO_OPTIMIZE;
1701 if (lex_match_id ("POSTFIX"))
1705 lex_force_match ('/');
1708 fprintf (stderr, "%s => ", lex_rest_of_line (NULL));
1711 expr = expr_parse (EXPR_ANY | expr_flags);
1712 if (!expr || token != '.')
1716 fprintf (stderr, "error\n");
1721 expr_debug_print_postfix (expr);
1724 expr_evaluate (expr, NULL, 0, &value);
1725 switch (expr_get_type (expr))
1728 if (value.f == SYSMIS)
1729 fprintf (stderr, "sysmis\n");
1731 fprintf (stderr, "%.2f\n", value.f);
1735 if (value.f == SYSMIS)
1736 fprintf (stderr, "sysmis\n");
1737 else if (value.f == 0.0)
1738 fprintf (stderr, "false\n");
1740 fprintf (stderr, "true\n");
1744 fputc ('"', stderr);
1745 fwrite (value.c + 1, value.c[0], 1, stderr);
1746 fputs ("\"\n", stderr);