1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
27 #include "algorithm.h"
38 /* Lowest precedence. */
39 static int parse_or (union any_node **n);
40 static int parse_and (union any_node **n);
41 static int parse_not (union any_node **n);
42 static int parse_rel (union any_node **n);
43 static int parse_add (union any_node **n);
44 static int parse_mul (union any_node **n);
45 static int parse_neg (union any_node **n);
46 static int parse_exp (union any_node **n);
47 static int parse_primary (union any_node **n);
48 static int parse_function (union any_node **n);
49 /* Highest precedence. */
51 /* Utility functions. */
52 static const char *expr_type_name (int type);
53 static const char *type_name (int type);
54 static void make_bool (union any_node **n);
55 static union any_node *allocate_nonterminal (int op, union any_node *n);
56 static union any_node *append_nonterminal_arg (union any_node *,
58 static int type_check (union any_node **n, int type, int flags);
60 static algo_compare_func compare_functions;
61 static void init_func_tab (void);
64 static void debug_print_tree (union any_node *, int);
68 static void debug_print_postfix (struct expression *);
71 /* Public functions. */
74 expr_free (struct expression *e)
84 pool_destroy (e->pool);
89 expr_parse (int flags)
95 /* Make sure the table of functions is initialized. */
98 /* Parse the expression. */
100 if (type == EX_ERROR)
103 /* Enforce type rules. */
104 if (!type_check (&n, type, flags))
110 /* Optimize the expression as best we can. */
111 n = (union any_node *) optimize_expression ((struct nonterm_node *) n);
113 /* Dump the tree-based expression to a postfix representation for
114 best evaluation speed, and destroy the tree. */
115 e = xmalloc (sizeof *e);
117 dump_expression (n, e);
120 /* If we're debugging or the user requested it, print the postfix
124 if (flags & PXP_DUMP)
126 debug_print_postfix (e);
133 type_check (union any_node **n, int type, int flags)
135 /* Enforce PXP_BOOLEAN flag. */
136 if (flags & PXP_BOOLEAN)
138 if (type == EX_STRING)
140 msg (SE, _("A string expression was supplied in a place "
141 "where a Boolean expression was expected."));
144 else if (type == EX_NUMERIC)
145 *n = allocate_nonterminal (OP_NUM_TO_BOOL, *n);
148 /* Enforce PXP_NUMERIC flag. */
149 if ((flags & PXP_NUMERIC) && (type != EX_NUMERIC))
151 msg (SE, _("A numeric expression was expected in a place "
152 "where one was not supplied."));
156 /* Enforce PXP_STRING flag. */
157 if ((flags & PXP_STRING) && (type != EX_STRING))
159 msg (SE, _("A string expression was expected in a place "
160 "where one was not supplied."));
167 /* Recursive-descent expression parser. */
169 /* Parses the OR level. */
171 parse_or (union any_node **n)
173 char typ[] = N_("The OR operator cannot take string operands.");
177 type = parse_and (n);
178 if (type == EX_ERROR || token != T_OR)
180 if (type == EX_STRING)
183 msg (SE, gettext (typ));
186 else if (type == EX_NUMERIC)
189 c = allocate_nonterminal (OP_OR, *n);
193 type = parse_and (n);
194 if (type == EX_ERROR)
196 else if (type == EX_STRING)
198 msg (SE, gettext (typ));
201 else if (type == EX_NUMERIC)
203 c = append_nonterminal_arg (c, *n);
216 /* Parses the AND level. */
218 parse_and (union any_node ** n)
220 static const char typ[]
221 = N_("The AND operator cannot take string operands.");
223 int type = parse_not (n);
225 if (type == EX_ERROR)
229 if (type == EX_STRING)
232 msg (SE, gettext (typ));
235 else if (type == EX_NUMERIC)
238 c = allocate_nonterminal (OP_AND, *n);
242 type = parse_not (n);
243 if (type == EX_ERROR)
245 else if (type == EX_STRING)
247 msg (SE, gettext (typ));
250 else if (type == EX_NUMERIC)
252 c = append_nonterminal_arg (c, *n);
265 /* Parses the NOT level. */
267 parse_not (union any_node ** n)
269 static const char typ[]
270 = N_("The NOT operator cannot take a string operand.");
274 while (lex_match (T_NOT))
276 type = parse_rel (n);
277 if (!not || type == EX_ERROR)
280 if (type == EX_STRING)
283 msg (SE, gettext (typ));
286 else if (type == EX_NUMERIC)
289 *n = allocate_nonterminal (OP_NOT, *n);
294 parse_rel (union any_node ** n)
296 static const char typ[]
297 = N_("Strings cannot be compared with numeric or Boolean "
298 "values with the relational operators "
301 int type = parse_add (n);
303 if (type == EX_ERROR)
307 if (token < T_EQ || token > T_NE)
314 c = allocate_nonterminal (token - T_EQ
315 + (type == EX_NUMERIC ? OP_EQ : OP_STRING_EQ),
322 if (t == EX_BOOLEAN && type == EX_NUMERIC)
323 make_bool (&c->nonterm.arg[0]);
324 else if (t == EX_NUMERIC && type == EX_BOOLEAN)
328 msg (SE, gettext (typ));
332 c = append_nonterminal_arg (c, *n);
337 if (token < T_EQ || token > T_NE)
349 /* Parses the addition and subtraction level. */
351 parse_add (union any_node **n)
353 static const char typ[]
354 = N_("The `+' and `-' operators may only be used with "
355 "numeric operands.");
360 type = parse_mul (n);
361 lex_negative_to_dash ();
362 if (type == EX_ERROR || (token != '+' && token != '-'))
364 if (type != EX_NUMERIC)
367 msg (SE, gettext (typ));
371 c = allocate_nonterminal (OP_PLUS, *n);
377 type = parse_mul (n);
378 if (type == EX_ERROR)
380 else if (type != EX_NUMERIC)
382 msg (SE, gettext (typ));
386 *n = allocate_nonterminal (OP_NEG, *n);
387 c = append_nonterminal_arg (c, *n);
389 lex_negative_to_dash ();
390 if (token != '+' && token != '-')
401 /* Parses the multiplication and division level. */
403 parse_mul (union any_node ** n)
405 static const char typ[]
406 = N_("The `*' and `/' operators may only be used with "
407 "numeric operands.");
413 type = parse_neg (n);
414 if (type == EX_ERROR || (token != '*' && token != '/'))
416 if (type != EX_NUMERIC)
419 msg (SE, gettext (typ));
423 c = allocate_nonterminal (OP_MUL, *n);
429 type = parse_neg (n);
430 if (type == EX_ERROR)
432 else if (type != EX_NUMERIC)
434 msg (SE, gettext (typ));
438 *n = allocate_nonterminal (OP_INV, *n);
439 c = append_nonterminal_arg (c, *n);
441 if (token != '*' && token != '/')
452 /* Parses the unary minus level. */
454 parse_neg (union any_node **n)
456 static const char typ[]
457 = N_("The unary minus (-) operator can only take a numeric operand.");
464 lex_negative_to_dash ();
465 if (!lex_match ('-'))
469 type = parse_exp (n);
470 if (!neg || type == EX_ERROR)
472 if (type != EX_NUMERIC)
475 msg (SE, gettext (typ));
479 *n = allocate_nonterminal (OP_NEG, *n);
484 parse_exp (union any_node **n)
486 static const char typ[]
487 = N_("Both operands to the ** operator must be numeric.");
492 type = parse_primary (n);
493 if (type == EX_ERROR || token != T_EXP)
495 if (type != EX_NUMERIC)
498 msg (SE, gettext (typ));
504 c = allocate_nonterminal (OP_POW, *n);
507 type = parse_primary (n);
508 if (type == EX_ERROR)
510 else if (type != EX_NUMERIC)
512 msg (SE, gettext (typ));
515 *n = append_nonterminal_arg (c, *n);
527 /* Parses system variables. */
529 parse_sysvar (union any_node **n)
531 if (!strcmp (tokid, "$CASENUM"))
533 *n = xmalloc (sizeof (struct casenum_node));
534 (*n)->casenum.type = OP_CASENUM;
541 if (!strcmp (tokid, "$SYSMIS"))
543 else if (!strcmp (tokid, "$JDATE"))
545 struct tm *time = localtime (&last_vfm_invocation);
546 d = yrmoda (time->tm_year + 1900, time->tm_mon + 1, time->tm_mday);
548 else if (!strcmp (tokid, "$DATE"))
550 static const char *months[12] =
552 "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
553 "JUL", "AUG", "SEP", "OCT", "NOV", "DEC",
559 time = localtime (&last_vfm_invocation);
560 sprintf (temp_buf, "%02d %s %02d", abs (time->tm_mday) % 100,
561 months[abs (time->tm_mon) % 12], abs (time->tm_year) % 100);
563 *n = xmalloc (sizeof (struct str_con_node) + 8);
564 (*n)->str_con.type = OP_STR_CON;
565 (*n)->str_con.len = 9;
566 memcpy ((*n)->str_con.s, temp_buf, 9);
569 else if (!strcmp (tokid, "$TIME"))
572 time = localtime (&last_vfm_invocation);
573 d = (yrmoda (time->tm_year + 1900, time->tm_mon + 1,
574 time->tm_mday) * 60. * 60. * 24.
575 + time->tm_hour * 60 * 60.
579 else if (!strcmp (tokid, "$LENGTH"))
581 msg (SW, _("Use of $LENGTH is obsolete, returning default of 66."));
584 else if (!strcmp (tokid, "$WIDTH"))
586 msg (SW, _("Use of $WIDTH is obsolete, returning default of 131."));
591 msg (SE, _("Unknown system variable %s."), tokid);
595 *n = xmalloc (sizeof (struct num_con_node));
596 (*n)->num_con.type = OP_NUM_CON;
597 (*n)->num_con.value = d;
602 /* Parses numbers, varnames, etc. */
604 parse_primary (union any_node **n)
612 /* An identifier followed by a left parenthesis is a function
614 if (lex_look_ahead () == '(')
615 return parse_function (n);
617 /* $ at the beginning indicates a system variable. */
620 int type = parse_sysvar (n);
625 /* Otherwise, it must be a user variable. */
626 v = dict_lookup_var (default_dict, tokid);
630 lex_error (_("expecting variable name"));
634 *n = xmalloc (sizeof (struct var_node));
635 (*n)->var.type = v->type == NUMERIC ? OP_NUM_VAR : OP_STR_VAR;
637 return v->type == NUMERIC ? EX_NUMERIC : EX_STRING;
641 *n = xmalloc (sizeof (struct num_con_node));
642 (*n)->num_con.type = OP_NUM_CON;
643 (*n)->num_con.value = tokval;
649 *n = xmalloc (sizeof (struct str_con_node) + ds_length (&tokstr) - 1);
650 (*n)->str_con.type = OP_STR_CON;
651 (*n)->str_con.len = ds_length (&tokstr);
652 memcpy ((*n)->str_con.s, ds_value (&tokstr), ds_length (&tokstr));
662 if (!lex_match (')'))
664 lex_error (_("expecting `)'"));
672 lex_error (_("in expression"));
677 /* Individual function parsing. */
683 int (*func) (struct function *, int, union any_node **);
687 static struct function func_tab[];
688 static int func_count;
690 static int get_num_args (struct function *, int, union any_node **);
693 unary_func (struct function * f, int x UNUSED, union any_node ** n)
696 struct nonterm_node *c;
698 if (!get_num_args (f, 1, n))
704 divisor = 1 / 60. / 60. / 24.;
707 divisor = 1 / 60. / 60.;
709 case OP_CTIME_MINUTES:
713 divisor = 60. * 60. * 24.;
716 case OP_CTIME_SECONDS:
718 *n = (*n)->nonterm.arg[0];
725 /* Arrive here when we encounter an operation that is just a
726 glorified version of a multiplication or division. Converts the
727 operation directly into that multiplication. */
728 c = xmalloc (sizeof (struct nonterm_node) + sizeof (union any_node *));
731 c->arg[0] = (*n)->nonterm.arg[0];
732 c->arg[1] = xmalloc (sizeof (struct num_con_node));
733 c->arg[1]->num_con.type = OP_NUM_CON;
734 c->arg[1]->num_con.value = divisor;
736 *n = (union any_node *) c;
741 binary_func (struct function * f, int x UNUSED, union any_node ** n)
743 if (!get_num_args (f, 2, n))
749 ternary_func (struct function * f, int x UNUSED, union any_node ** n)
751 if (!get_num_args (f, 3, n))
757 MISSING_func (struct function * f, int x UNUSED, union any_node ** n)
760 && dict_lookup_var (default_dict, tokid) != NULL
761 && lex_look_ahead () == ')')
763 struct var_node *c = xmalloc (sizeof *c);
764 c->v = parse_variable ();
765 c->type = c->v->type == ALPHA ? OP_STR_MIS : OP_NUM_SYS;
766 *n = (union any_node *) c;
769 if (!get_num_args (f, 1, n))
775 SYSMIS_func (struct function * f UNUSED, int x UNUSED, union any_node ** n)
780 && dict_lookup_var (default_dict, tokid)
781 && lex_look_ahead () == ')')
784 v = parse_variable ();
785 if (v->type == ALPHA)
787 struct num_con_node *c = xmalloc (sizeof *c);
788 c->type = OP_NUM_CON;
794 struct var_node *c = xmalloc (sizeof *c);
795 c->type = OP_NUM_SYS;
804 else if (t == EX_NUMERIC)
806 *n = allocate_nonterminal (OP_SYSMIS, *n);
809 else /* EX_STRING or EX_BOOLEAN */
811 /* Return constant `true' value. */
813 *n = xmalloc (sizeof (struct num_con_node));
814 (*n)->num_con.type = OP_NUM_CON;
815 (*n)->num_con.value = 1.0;
821 VALUE_func (struct function *f UNUSED, int x UNUSED, union any_node **n)
823 struct variable *v = parse_variable ();
827 *n = xmalloc (sizeof (struct var_node));
829 if (v->type == NUMERIC)
831 (*n)->var.type = OP_NUM_VAL;
836 (*n)->var.type = OP_STR_VAR;
842 LAG_func (struct function *f UNUSED, int x UNUSED, union any_node **n)
844 struct variable *v = parse_variable ();
851 if (!lex_integer_p () || lex_integer () <= 0 || lex_integer () > 1000)
853 msg (SE, _("Argument 2 to LAG must be a small positive "
854 "integer constant."));
858 nlag = lex_integer ();
861 n_lag = max (nlag, n_lag);
862 *n = xmalloc (sizeof (struct lag_node));
863 (*n)->lag.type = (v->type == NUMERIC ? OP_NUM_LAG : OP_STR_LAG);
865 (*n)->lag.lag = nlag;
866 return (v->type == NUMERIC ? EX_NUMERIC : EX_STRING);
869 /* This screwball function parses n-ary operators:
870 1. NMISS, NVALID, SUM, MEAN, MIN, MAX: any number of (numeric) arguments.
871 2. SD, VARIANCE, CFVAR: at least two (numeric) arguments.
872 3. RANGE: An odd number of arguments, but at least three.
873 All arguments must be the same type.
874 4. ANY: At least two arguments. All arguments must be the same type.
877 nary_num_func (struct function *f, int min_args, union any_node **n)
879 /* Argument number of current argument (used for error messages). */
882 /* Number of arguments. */
885 /* Number of arguments allocated. */
888 /* Type of arguments. */
889 int type = (f->t == OP_ANY || f->t == OP_RANGE) ? -1 : NUMERIC;
891 *n = xmalloc (sizeof (struct nonterm_node) + sizeof (union any_node *[15]));
892 (*n)->nonterm.type = f->t;
896 /* Special case: vara TO varb. */
898 /* FIXME: Is this condition failsafe? Can we _ever_ have two
899 juxtaposed identifiers otherwise? */
900 if (token == T_ID && dict_lookup_var (default_dict, tokid) != NULL
901 && toupper (lex_look_ahead ()) == 'T')
906 int opts = PV_SINGLE;
910 else if (type == ALPHA)
912 if (!parse_variables (default_dict, &v, &nv, opts))
914 if (nv + (*n)->nonterm.n >= m)
917 *n = xrealloc (*n, (sizeof (struct nonterm_node)
918 + (m - 1) * sizeof (union any_node *)));
923 for (j = 1; j < nv; j++)
924 if (type != v[j]->type)
926 msg (SE, _("Type mismatch in argument %d of %s, which was "
927 "expected to be of %s type. It was actually "
929 argn, f->s, type_name (type), type_name (v[j]->type));
934 for (j = 0; j < nv; j++)
936 union any_node **c = &(*n)->nonterm.arg[(*n)->nonterm.n++];
937 *c = xmalloc (sizeof (struct var_node));
938 (*c)->var.type = (type == NUMERIC ? OP_NUM_VAR : OP_STR_VAR);
945 int t = parse_or (&c);
952 msg (SE, _("%s cannot take Boolean operands."), f->s);
959 else if (t == EX_STRING)
962 else if ((t == EX_NUMERIC) ^ (type == NUMERIC))
965 msg (SE, _("Type mismatch in argument %d of %s, which was "
966 "expected to be of %s type. It was actually "
968 argn, f->s, type_name (type), expr_type_name (t));
971 if ((*n)->nonterm.n + 1 >= m)
974 *n = xrealloc (*n, (sizeof (struct nonterm_node)
975 + (m - 1) * sizeof (union any_node *)));
977 (*n)->nonterm.arg[(*n)->nonterm.n++] = c;
982 if (!lex_match (','))
984 lex_error (_("in function call"));
990 *n = xrealloc (*n, (sizeof (struct nonterm_node)
991 + ((*n)->nonterm.n) * sizeof (union any_node *)));
993 nargs = (*n)->nonterm.n;
994 if (f->t == OP_RANGE)
996 if (nargs < 3 || (nargs & 1) == 0)
998 msg (SE, _("RANGE requires an odd number of arguments, but "
1003 else if (f->t == OP_SD || f->t == OP_VARIANCE
1004 || f->t == OP_CFVAR || f->t == OP_ANY)
1008 msg (SE, _("%s requires at least two arguments."), f->s);
1013 if (f->t == OP_CFVAR || f->t == OP_SD || f->t == OP_VARIANCE)
1014 min_args = max (min_args, 2);
1016 min_args = max (min_args, 1);
1018 /* Yes, this is admittedly a terrible crock, but it works. */
1019 (*n)->nonterm.arg[(*n)->nonterm.n] = (union any_node *) min_args;
1021 if (min_args > nargs)
1023 msg (SE, _("%s.%d requires at least %d arguments."),
1024 f->s, min_args, min_args);
1028 if (f->t == OP_ANY || f->t == OP_RANGE)
1030 if (type == T_STRING)
1043 CONCAT_func (struct function * f UNUSED, int x UNUSED, union any_node ** n)
1049 *n = xmalloc (sizeof (struct nonterm_node) + sizeof (union any_node *[15]));
1050 (*n)->nonterm.type = OP_CONCAT;
1051 (*n)->nonterm.n = 0;
1054 if ((*n)->nonterm.n >= m)
1057 *n = xrealloc (*n, (sizeof (struct nonterm_node)
1058 + (m - 1) * sizeof (union any_node *)));
1060 type = parse_or (&(*n)->nonterm.arg[(*n)->nonterm.n]);
1061 if (type == EX_ERROR)
1063 if (type != EX_STRING)
1065 msg (SE, _("Argument %d to CONCAT is type %s. All arguments "
1066 "to CONCAT must be strings."),
1067 (*n)->nonterm.n + 1, expr_type_name (type));
1072 if (!lex_match (','))
1075 *n = xrealloc (*n, (sizeof (struct nonterm_node)
1076 + ((*n)->nonterm.n - 1) * sizeof (union any_node *)));
1084 /* Parses a string function according to f->desc. f->desc[0] is the
1085 return type of the function. Succeeding characters represent
1086 successive args. Optional args are separated from the required
1087 args by a slash (`/'). Codes are `n', numeric arg; `s', string
1088 arg; and `f', format spec (this must be the last arg). If the
1089 optional args are included, the type becomes f->t+1. */
1091 generic_str_func (struct function *f, int x UNUSED, union any_node ** n)
1097 /* Count max number of arguments. */
1101 if (*cp == 'n' || *cp == 's')
1103 else if (*cp == 'f')
1109 *n = xmalloc (sizeof (struct nonterm_node)
1110 + (max_args - 1) * sizeof (union any_node *));
1111 (*n)->nonterm.type = f->t;
1112 (*n)->nonterm.n = 0;
1115 if (*cp == 'n' || *cp == 's')
1117 int t = *cp == 'n' ? EX_NUMERIC : EX_STRING;
1118 type = parse_or (&(*n)->nonterm.arg[(*n)->nonterm.n]);
1120 if (type == EX_ERROR)
1124 msg (SE, _("Argument %d to %s was expected to be of %s type. "
1125 "It was actually of type %s."),
1126 (*n)->nonterm.n + 1, f->s,
1127 *cp == 'n' ? _("numeric") : _("string"),
1128 expr_type_name (type));
1133 else if (*cp == 'f')
1135 /* This is always the very last argument. Also, this code
1136 is a crock. However, it works. */
1137 struct fmt_spec fmt;
1139 if (!parse_format_specifier (&fmt, 0))
1141 if (formats[fmt.type].cat & FCAT_STRING)
1143 msg (SE, _("%s is not a numeric format."), fmt_to_string (&fmt));
1146 (*n)->nonterm.arg[(*n)->nonterm.n + 0] = (union any_node *) fmt.type;
1147 (*n)->nonterm.arg[(*n)->nonterm.n + 1] = (union any_node *) fmt.w;
1148 (*n)->nonterm.arg[(*n)->nonterm.n + 2] = (union any_node *) fmt.d;
1159 if (lex_match (','))
1161 (*n)->nonterm.type++;
1167 else if (!lex_match (','))
1169 msg (SE, _("Too few arguments to function %s."), f->s);
1174 return f->desc[0] == 'n' ? EX_NUMERIC : EX_STRING;
1181 /* General function parsing. */
1184 get_num_args (struct function *f, int num_args, union any_node **n)
1189 *n = xmalloc (sizeof (struct nonterm_node)
1190 + (num_args - 1) * sizeof (union any_node *));
1191 (*n)->nonterm.type = f->t;
1192 (*n)->nonterm.n = 0;
1195 t = parse_or (&(*n)->nonterm.arg[i]);
1199 if (t != EX_NUMERIC)
1201 msg (SE, _("Type mismatch in argument %d of %s, which was expected "
1202 "to be numeric. It was actually type %s."),
1203 i + 1, f->s, expr_type_name (t));
1206 if (++i >= num_args)
1208 if (!lex_match (','))
1210 msg (SE, _("Missing comma following argument %d of %s."), i + 1, f->s);
1221 parse_function (union any_node ** n)
1223 struct function *fp;
1224 char fname[32], *cp;
1227 const struct vector *v;
1229 /* Check for a vector with this name. */
1230 v = dict_lookup_vector (default_dict, tokid);
1234 assert (token == '(');
1237 *n = xmalloc (sizeof (struct nonterm_node)
1238 + sizeof (union any_node *[2]));
1239 (*n)->nonterm.type = (v->var[0]->type == NUMERIC
1240 ? OP_VEC_ELEM_NUM : OP_VEC_ELEM_STR);
1241 (*n)->nonterm.n = 0;
1243 t = parse_or (&(*n)->nonterm.arg[0]);
1246 if (t != EX_NUMERIC)
1248 msg (SE, _("The index value after a vector name must be numeric."));
1253 if (!lex_match (')'))
1255 msg (SE, _("`)' expected after a vector index value."));
1258 ((*n)->nonterm.arg[1]) = (union any_node *) v->idx;
1260 return v->var[0]->type == NUMERIC ? EX_NUMERIC : EX_STRING;
1263 ds_truncate (&tokstr, 31);
1264 strcpy (fname, ds_value (&tokstr));
1265 cp = strrchr (fname, '.');
1266 if (cp && isdigit ((unsigned char) cp[1]))
1268 min_args = atoi (&cp[1]);
1275 if (!lex_force_match ('('))
1282 fp = binary_search (func_tab, func_count, sizeof *func_tab, &f,
1283 compare_functions, NULL);
1288 msg (SE, _("There is no function named %s."), fname);
1291 if (min_args && fp->func != nary_num_func)
1293 msg (SE, _("Function %s may not be given a minimum number of "
1294 "arguments."), fname);
1297 t = fp->func (fp, min_args, n);
1300 if (!lex_match (')'))
1302 lex_error (_("expecting `)' after %s function"), fname);
1313 #if GLOBAL_DEBUGGING
1314 #define op(a,b,c,d) {a,b,c,d}
1316 #define op(a,b,c,d) {b,c,d}
1321 struct op_desc ops[OP_SENTINEL + 1] =
1323 op ("!?ERROR?!", 000, 0, 0),
1325 op ("plus", 001, varies, 1),
1326 op ("mul", 011, varies, 1),
1327 op ("pow", 010, -1, 0),
1328 op ("and", 010, -1, 0),
1329 op ("or", 010, -1, 0),
1330 op ("not", 000, 0, 0),
1331 op ("eq", 000, -1, 0),
1332 op ("ge", 000, -1, 0),
1333 op ("gt", 000, -1, 0),
1334 op ("le", 000, -1, 0),
1335 op ("lt", 000, -1, 0),
1336 op ("ne", 000, -1, 0),
1338 op ("string-eq", 000, -1, 0),
1339 op ("string-ge", 000, -1, 0),
1340 op ("string-gt", 000, -1, 0),
1341 op ("string-le", 000, -1, 0),
1342 op ("string-lt", 000, -1, 0),
1343 op ("string-ne", 000, -1, 0),
1345 op ("neg", 000, 0, 0),
1346 op ("abs", 000, 0, 0),
1347 op ("arcos", 000, 0, 0),
1348 op ("arsin", 000, 0, 0),
1349 op ("artan", 000, 0, 0),
1350 op ("cos", 000, 0, 0),
1351 op ("exp", 000, 0, 0),
1352 op ("lg10", 000, 0, 0),
1353 op ("ln", 000, 0, 0),
1354 op ("mod10", 000, 0, 0),
1355 op ("rnd", 000, 0, 0),
1356 op ("sin", 000, 0, 0),
1357 op ("sqrt", 000, 0, 0),
1358 op ("tan", 000, 0, 0),
1359 op ("trunc", 000, 0, 0),
1361 op ("any", 011, varies, 1),
1362 op ("any-string", 001, varies, 1),
1363 op ("cfvar", 013, varies, 2),
1364 op ("max", 013, varies, 2),
1365 op ("mean", 013, varies, 2),
1366 op ("min", 013, varies, 2),
1367 op ("nmiss", 011, varies, 1),
1368 op ("nvalid", 011, varies, 1),
1369 op ("range", 011, varies, 1),
1370 op ("range-string", 001, varies, 1),
1371 op ("sd", 013, varies, 2),
1372 op ("sum", 013, varies, 2),
1373 op ("variance", 013, varies, 2),
1375 op ("time_hms", 000, -2, 0),
1376 op ("ctime_days?!", 000, 0, 0),
1377 op ("ctime_hours?!", 000, 0, 0),
1378 op ("ctime_minutes?!", 000, 0, 0),
1379 op ("ctime_seconds?!", 000, 0, 0),
1380 op ("time_days?!", 000, 0, 0),
1382 op ("date_dmy", 000, -2, 0),
1383 op ("date_mdy", 000, -2, 0),
1384 op ("date_moyr", 000, -1, 0),
1385 op ("date_qyr", 000, -1, 0),
1386 op ("date_wkyr", 000, -1, 0),
1387 op ("date_yrday", 000, -1, 0),
1388 op ("yrmoda", 000, -2, 0),
1390 op ("xdate_date", 000, 0, 0),
1391 op ("xdate_hour", 000, 0, 0),
1392 op ("xdate_jday", 000, 0, 0),
1393 op ("xdate_mday", 000, 0, 0),
1394 op ("xdate_minute", 000, 0, 0),
1395 op ("xdate_month", 000, 0, 0),
1396 op ("xdate_quarter", 000, 0, 0),
1397 op ("xdate_second", 000, 0, 0),
1398 op ("xdate_tday", 000, 0, 0),
1399 op ("xdate_time", 000, 0, 0),
1400 op ("xdate_week", 000, 0, 0),
1401 op ("xdate_wkday", 000, 0, 0),
1402 op ("xdate_year", 000, 0, 0),
1404 op ("concat", 001, varies, 1),
1405 op ("index-2", 000, -1, 0),
1406 op ("index-3", 000, -2, 0),
1407 op ("rindex-2", 000, -1, 0),
1408 op ("rindex-3", 000, -2, 0),
1409 op ("length", 000, 0, 0),
1410 op ("lower", 000, 0, 0),
1411 op ("upcas", 000, 0, 0),
1412 op ("lpad-2", 010, -1, 0),
1413 op ("lpad-3", 010, -2, 0),
1414 op ("rpad-2", 010, -1, 0),
1415 op ("rpad-3", 010, -2, 0),
1416 op ("ltrim-1", 000, 0, 0),
1417 op ("ltrim-2", 000, -1, 0),
1418 op ("rtrim-1", 000, 0, 0),
1419 op ("rtrim-2", 000, -1, 0),
1420 op ("number-1", 010, 0, 0),
1421 op ("number-2", 014, 0, 3),
1422 op ("string", 004, 0, 3),
1423 op ("substr-2", 010, -1, 0),
1424 op ("substr-3", 010, -2, 0),
1426 op ("inv", 000, 0, 0),
1427 op ("square", 000, 0, 0),
1428 op ("num-to-Bool", 000, 0, 0),
1430 op ("mod", 010, -1, 0),
1431 op ("normal", 000, 0, 0),
1432 op ("uniform", 000, 0, 0),
1433 op ("sysmis", 010, 0, 0),
1434 op ("vec-elem-num", 002, 0, 1),
1435 op ("vec-elem-str", 002, 0, 1),
1437 op ("!?TERMINAL?!", 000, 0, 0),
1438 op ("num-con", 000, +1, 0),
1439 op ("str-con", 000, +1, 0),
1440 op ("num-var", 000, +1, 0),
1441 op ("str-var", 000, +1, 0),
1442 op ("num-lag", 000, +1, 1),
1443 op ("str-lag", 000, +1, 1),
1444 op ("num-sys", 000, +1, 1),
1445 op ("num-val", 000, +1, 1),
1446 op ("str-mis", 000, +1, 1),
1447 op ("$casenum", 000, +1, 0),
1448 op ("!?SENTINEL?!", 000, 0, 0),
1455 /* Utility functions. */
1458 expr_type_name (int type)
1466 return _("Boolean");
1469 return _("numeric");
1481 type_name (int type)
1486 return _("numeric");
1496 make_bool (union any_node **n)
1500 c = xmalloc (sizeof (struct nonterm_node));
1501 c->nonterm.type = OP_NUM_TO_BOOL;
1503 c->nonterm.arg[0] = *n;
1508 free_node (union any_node *n)
1510 if (n->type < OP_TERMINAL)
1514 for (i = 0; i < n->nonterm.n; i++)
1515 free_node (n->nonterm.arg[i]);
1521 allocate_nonterminal (int op, union any_node *n)
1525 c = xmalloc (sizeof c->nonterm);
1526 c->nonterm.type = op;
1528 c->nonterm.arg[0] = n;
1534 append_nonterminal_arg (union any_node *a, union any_node *b)
1536 a = xrealloc (a, sizeof *a + sizeof *a->nonterm.arg * a->nonterm.n);
1537 a->nonterm.arg[a->nonterm.n++] = b;
1541 static struct function func_tab[] =
1543 {"ABS", OP_ABS, unary_func, NULL},
1544 {"ACOS", OP_ARCOS, unary_func, NULL},
1545 {"ARCOS", OP_ARCOS, unary_func, NULL},
1546 {"ARSIN", OP_ARSIN, unary_func, NULL},
1547 {"ARTAN", OP_ARTAN, unary_func, NULL},
1548 {"ASIN", OP_ARSIN, unary_func, NULL},
1549 {"ATAN", OP_ARTAN, unary_func, NULL},
1550 {"COS", OP_COS, unary_func, NULL},
1551 {"EXP", OP_EXP, unary_func, NULL},
1552 {"LG10", OP_LG10, unary_func, NULL},
1553 {"LN", OP_LN, unary_func, NULL},
1554 {"MOD10", OP_MOD10, unary_func, NULL},
1555 {"NORMAL", OP_NORMAL, unary_func, NULL},
1556 {"RND", OP_RND, unary_func, NULL},
1557 {"SIN", OP_SIN, unary_func, NULL},
1558 {"SQRT", OP_SQRT, unary_func, NULL},
1559 {"TAN", OP_TAN, unary_func, NULL},
1560 {"TRUNC", OP_TRUNC, unary_func, NULL},
1561 {"UNIFORM", OP_UNIFORM, unary_func, NULL},
1563 {"TIME.DAYS", OP_TIME_DAYS, unary_func, NULL},
1564 {"TIME.HMS", OP_TIME_HMS, ternary_func, NULL},
1566 {"CTIME.DAYS", OP_CTIME_DAYS, unary_func, NULL},
1567 {"CTIME.HOURS", OP_CTIME_HOURS, unary_func, NULL},
1568 {"CTIME.MINUTES", OP_CTIME_MINUTES, unary_func, NULL},
1569 {"CTIME.SECONDS", OP_CTIME_SECONDS, unary_func, NULL},
1571 {"DATE.DMY", OP_DATE_DMY, ternary_func, NULL},
1572 {"DATE.MDY", OP_DATE_MDY, ternary_func, NULL},
1573 {"DATE.MOYR", OP_DATE_MOYR, binary_func, NULL},
1574 {"DATE.QYR", OP_DATE_QYR, binary_func, NULL},
1575 {"DATE.WKYR", OP_DATE_WKYR, binary_func, NULL},
1576 {"DATE.YRDAY", OP_DATE_YRDAY, binary_func, NULL},
1578 {"XDATE.DATE", OP_XDATE_DATE, unary_func, NULL},
1579 {"XDATE.HOUR", OP_XDATE_HOUR, unary_func, NULL},
1580 {"XDATE.JDAY", OP_XDATE_JDAY, unary_func, NULL},
1581 {"XDATE.MDAY", OP_XDATE_MDAY, unary_func, NULL},
1582 {"XDATE.MINUTE", OP_XDATE_MINUTE, unary_func, NULL},
1583 {"XDATE.MONTH", OP_XDATE_MONTH, unary_func, NULL},
1584 {"XDATE.QUARTER", OP_XDATE_QUARTER, unary_func, NULL},
1585 {"XDATE.SECOND", OP_XDATE_SECOND, unary_func, NULL},
1586 {"XDATE.TDAY", OP_XDATE_TDAY, unary_func, NULL},
1587 {"XDATE.TIME", OP_XDATE_TIME, unary_func, NULL},
1588 {"XDATE.WEEK", OP_XDATE_WEEK, unary_func, NULL},
1589 {"XDATE.WKDAY", OP_XDATE_WKDAY, unary_func, NULL},
1590 {"XDATE.YEAR", OP_XDATE_YEAR, unary_func, NULL},
1592 {"MISSING", OP_SYSMIS, MISSING_func, NULL},
1593 {"MOD", OP_MOD, binary_func, NULL},
1594 {"SYSMIS", OP_SYSMIS, SYSMIS_func, NULL},
1595 {"VALUE", OP_NUM_VAL, VALUE_func, NULL},
1596 {"LAG", OP_NUM_LAG, LAG_func, NULL},
1597 {"YRMODA", OP_YRMODA, ternary_func, NULL},
1599 {"ANY", OP_ANY, nary_num_func, NULL},
1600 {"CFVAR", OP_CFVAR, nary_num_func, NULL},
1601 {"MAX", OP_MAX, nary_num_func, NULL},
1602 {"MEAN", OP_MEAN, nary_num_func, NULL},
1603 {"MIN", OP_MIN, nary_num_func, NULL},
1604 {"NMISS", OP_NMISS, nary_num_func, NULL},
1605 {"NVALID", OP_NVALID, nary_num_func, NULL},
1606 {"RANGE", OP_RANGE, nary_num_func, NULL},
1607 {"SD", OP_SD, nary_num_func, NULL},
1608 {"SUM", OP_SUM, nary_num_func, NULL},
1609 {"VARIANCE", OP_VARIANCE, nary_num_func, NULL},
1611 {"CONCAT", OP_CONCAT, CONCAT_func, NULL},
1612 {"INDEX", OP_INDEX, generic_str_func, "nss/n"},
1613 {"RINDEX", OP_RINDEX, generic_str_func, "nss/n"},
1614 {"LENGTH", OP_LENGTH, generic_str_func, "ns"},
1615 {"LOWER", OP_LOWER, generic_str_func, "ss"},
1616 {"UPCAS", OP_UPPER, generic_str_func, "ss"},
1617 {"LPAD", OP_LPAD, generic_str_func, "ssn/s"},
1618 {"RPAD", OP_RPAD, generic_str_func, "ssn/s"},
1619 {"LTRIM", OP_LTRIM, generic_str_func, "ss/s"},
1620 {"RTRIM", OP_RTRIM, generic_str_func, "ss/s"},
1621 {"NUMBER", OP_NUMBER, generic_str_func, "ns/f"},
1622 {"STRING", OP_STRING, generic_str_func, "snf"},
1623 {"SUBSTR", OP_SUBSTR, generic_str_func, "ssn/n"},
1626 /* An algo_compare_func that compares functions A and B based on
1629 compare_functions (const void *a_, const void *b_, void *aux UNUSED)
1631 const struct function *a = a_;
1632 const struct function *b = b_;
1634 return strcmp (a->s, b->s);
1638 init_func_tab (void)
1648 func_count = sizeof func_tab / sizeof *func_tab;
1649 sort (func_tab, func_count, sizeof *func_tab, compare_functions, NULL);
1656 print_type (union any_node * n)
1661 s = ops[n->type].name;
1663 if (ops[n->type].flags & OP_MIN_ARGS)
1664 printf ("%s.%d\n", s, (int) n->nonterm.arg[n->nonterm.n]);
1665 else if (ops[n->type].flags & OP_FMT_SPEC)
1669 f.type = (int) n->nonterm.arg[n->nonterm.n + 0];
1670 f.w = (int) n->nonterm.arg[n->nonterm.n + 1];
1671 f.d = (int) n->nonterm.arg[n->nonterm.n + 2];
1672 printf ("%s(%s)\n", s, fmt_to_string (&f));
1679 debug_print_tree (union any_node * n, int level)
1682 for (i = 0; i < level; i++)
1684 if (n->type < OP_TERMINAL)
1687 for (i = 0; i < n->nonterm.n; i++)
1688 debug_print_tree (n->nonterm.arg[i], level + 1);
1695 printf (_("!!TERMINAL!!"));
1698 if (n->num_con.value == SYSMIS)
1701 printf ("%f", n->num_con.value);
1704 printf ("\"%.*s\"", n->str_con.len, n->str_con.s);
1708 printf ("%s", n->var.v->name);
1712 printf ("LAG(%s,%d)", n->lag.v->name, n->lag.lag);
1715 printf ("SYSMIS(%s)", n->var.v->name);
1718 printf ("VALUE(%s)", n->var.v->name);
1721 printf (_("!!SENTINEL!!"));
1724 printf (_("!!ERROR%d!!"), n->type);
1730 #endif /* DEBUGGING */
1732 #if GLOBAL_DEBUGGING
1734 debug_print_postfix (struct expression * e)
1737 double *num = e->num;
1738 unsigned char *str = e->str;
1739 struct variable **v = e->var;
1742 debug_printf ((_("postfix:")));
1743 for (o = e->op; *o != OP_SENTINEL;)
1746 if (t < OP_TERMINAL)
1748 debug_printf ((" %s", ops[t].name));
1750 if (ops[t].flags & OP_VAR_ARGS)
1752 debug_printf (("(%d)", *o));
1755 if (ops[t].flags & OP_MIN_ARGS)
1757 debug_printf ((".%d", *o));
1760 if (ops[t].flags & OP_FMT_SPEC)
1763 f.type = (int) *o++;
1766 debug_printf (("(%s)", fmt_to_string (&f)));
1769 else if (t == OP_NUM_CON)
1772 debug_printf ((" SYSMIS"));
1774 debug_printf ((" %f", *num));
1777 else if (t == OP_STR_CON)
1779 debug_printf ((" \"%.*s\"", *str, &str[1]));
1782 else if (t == OP_NUM_VAR || t == OP_STR_VAR)
1784 debug_printf ((" %s", (*v)->name));
1787 else if (t == OP_NUM_SYS)
1789 debug_printf ((" SYSMIS(#%d)", *o));
1792 else if (t == OP_NUM_VAL)
1794 debug_printf ((" VALUE(#%d)", *o));
1797 else if (t == OP_NUM_LAG || t == OP_STR_LAG)
1799 debug_printf ((" LAG(%s,%d)", (*v)->name, *o));
1805 printf ("debug_print_postfix(): %d\n", t);
1809 debug_putc ('\n', stdout);
1811 #endif /* GLOBAL_DEBUGGING */