1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
27 #include "algorithm.h"
39 /* Lowest precedence. */
40 static int parse_or (union any_node **n);
41 static int parse_and (union any_node **n);
42 static int parse_not (union any_node **n);
43 static int parse_rel (union any_node **n);
44 static int parse_add (union any_node **n);
45 static int parse_mul (union any_node **n);
46 static int parse_neg (union any_node **n);
47 static int parse_exp (union any_node **n);
48 static int parse_primary (union any_node **n);
49 static int parse_function (union any_node **n);
50 /* Highest precedence. */
52 /* Utility functions. */
53 static const char *expr_type_name (int type);
54 static const char *type_name (int type);
55 static void make_bool (union any_node **n);
56 static union any_node *allocate_nonterminal (int op, union any_node *n);
57 static union any_node *append_nonterminal_arg (union any_node *,
59 static int type_check (union any_node **n, int type, int flags);
61 static algo_compare_func compare_functions;
62 static void init_func_tab (void);
65 static void debug_print_tree (union any_node *, int);
69 static void debug_print_postfix (struct expression *);
72 /* Public functions. */
75 expr_free (struct expression *e)
85 pool_destroy (e->pool);
90 expr_parse (int flags)
96 /* Make sure the table of functions is initialized. */
99 /* Parse the expression. */
100 type = parse_or (&n);
101 if (type == EX_ERROR)
104 /* Enforce type rules. */
105 if (!type_check (&n, type, flags))
111 /* Optimize the expression as best we can. */
112 n = (union any_node *) optimize_expression ((struct nonterm_node *) n);
114 /* Dump the tree-based expression to a postfix representation for
115 best evaluation speed, and destroy the tree. */
116 e = xmalloc (sizeof *e);
118 dump_expression (n, e);
121 /* If we're debugging or the user requested it, print the postfix
125 if (flags & PXP_DUMP)
127 debug_print_postfix (e);
134 type_check (union any_node **n, int type, int flags)
136 /* Enforce PXP_BOOLEAN flag. */
137 if (flags & PXP_BOOLEAN)
139 if (type == EX_STRING)
141 msg (SE, _("A string expression was supplied in a place "
142 "where a Boolean expression was expected."));
145 else if (type == EX_NUMERIC)
146 *n = allocate_nonterminal (OP_NUM_TO_BOOL, *n);
149 /* Enforce PXP_NUMERIC flag. */
150 if ((flags & PXP_NUMERIC) && (type != EX_NUMERIC))
152 msg (SE, _("A numeric expression was expected in a place "
153 "where one was not supplied."));
157 /* Enforce PXP_STRING flag. */
158 if ((flags & PXP_STRING) && (type != EX_STRING))
160 msg (SE, _("A string expression was expected in a place "
161 "where one was not supplied."));
168 /* Recursive-descent expression parser. */
170 /* Parses the OR level. */
172 parse_or (union any_node **n)
174 char typ[] = N_("The OR operator cannot take string operands.");
178 type = parse_and (n);
179 if (type == EX_ERROR || token != T_OR)
181 if (type == EX_STRING)
184 msg (SE, gettext (typ));
187 else if (type == EX_NUMERIC)
190 c = allocate_nonterminal (OP_OR, *n);
194 type = parse_and (n);
195 if (type == EX_ERROR)
197 else if (type == EX_STRING)
199 msg (SE, gettext (typ));
202 else if (type == EX_NUMERIC)
204 c = append_nonterminal_arg (c, *n);
217 /* Parses the AND level. */
219 parse_and (union any_node ** n)
221 static const char typ[]
222 = N_("The AND operator cannot take string operands.");
224 int type = parse_not (n);
226 if (type == EX_ERROR)
230 if (type == EX_STRING)
233 msg (SE, gettext (typ));
236 else if (type == EX_NUMERIC)
239 c = allocate_nonterminal (OP_AND, *n);
243 type = parse_not (n);
244 if (type == EX_ERROR)
246 else if (type == EX_STRING)
248 msg (SE, gettext (typ));
251 else if (type == EX_NUMERIC)
253 c = append_nonterminal_arg (c, *n);
266 /* Parses the NOT level. */
268 parse_not (union any_node ** n)
270 static const char typ[]
271 = N_("The NOT operator cannot take a string operand.");
275 while (lex_match (T_NOT))
277 type = parse_rel (n);
278 if (!not || type == EX_ERROR)
281 if (type == EX_STRING)
284 msg (SE, gettext (typ));
287 else if (type == EX_NUMERIC)
290 *n = allocate_nonterminal (OP_NOT, *n);
295 parse_rel (union any_node ** n)
297 static const char typ[]
298 = N_("Strings cannot be compared with numeric or Boolean "
299 "values with the relational operators "
302 int type = parse_add (n);
304 if (type == EX_ERROR)
308 if (token < T_EQ || token > T_NE)
315 c = allocate_nonterminal (token - T_EQ
316 + (type == EX_NUMERIC ? OP_EQ : OP_STRING_EQ),
323 if (t == EX_BOOLEAN && type == EX_NUMERIC)
324 make_bool (&c->nonterm.arg[0]);
325 else if (t == EX_NUMERIC && type == EX_BOOLEAN)
329 msg (SE, gettext (typ));
333 c = append_nonterminal_arg (c, *n);
338 if (token < T_EQ || token > T_NE)
350 /* Parses the addition and subtraction level. */
352 parse_add (union any_node **n)
354 static const char typ[]
355 = N_("The `+' and `-' operators may only be used with "
356 "numeric operands.");
361 type = parse_mul (n);
362 lex_negative_to_dash ();
363 if (type == EX_ERROR || (token != '+' && token != '-'))
365 if (type != EX_NUMERIC)
368 msg (SE, gettext (typ));
372 c = allocate_nonterminal (OP_PLUS, *n);
378 type = parse_mul (n);
379 if (type == EX_ERROR)
381 else if (type != EX_NUMERIC)
383 msg (SE, gettext (typ));
387 *n = allocate_nonterminal (OP_NEG, *n);
388 c = append_nonterminal_arg (c, *n);
390 lex_negative_to_dash ();
391 if (token != '+' && token != '-')
402 /* Parses the multiplication and division level. */
404 parse_mul (union any_node ** n)
406 static const char typ[]
407 = N_("The `*' and `/' operators may only be used with "
408 "numeric operands.");
414 type = parse_neg (n);
415 if (type == EX_ERROR || (token != '*' && token != '/'))
417 if (type != EX_NUMERIC)
420 msg (SE, gettext (typ));
424 c = allocate_nonterminal (OP_MUL, *n);
430 type = parse_neg (n);
431 if (type == EX_ERROR)
433 else if (type != EX_NUMERIC)
435 msg (SE, gettext (typ));
439 *n = allocate_nonterminal (OP_INV, *n);
440 c = append_nonterminal_arg (c, *n);
442 if (token != '*' && token != '/')
453 /* Parses the unary minus level. */
455 parse_neg (union any_node **n)
457 static const char typ[]
458 = N_("The unary minus (-) operator can only take a numeric operand.");
465 lex_negative_to_dash ();
466 if (!lex_match ('-'))
470 type = parse_exp (n);
471 if (!neg || type == EX_ERROR)
473 if (type != EX_NUMERIC)
476 msg (SE, gettext (typ));
480 *n = allocate_nonterminal (OP_NEG, *n);
485 parse_exp (union any_node **n)
487 static const char typ[]
488 = N_("Both operands to the ** operator must be numeric.");
493 type = parse_primary (n);
494 if (type == EX_ERROR || token != T_EXP)
496 if (type != EX_NUMERIC)
499 msg (SE, gettext (typ));
505 c = allocate_nonterminal (OP_POW, *n);
508 type = parse_primary (n);
509 if (type == EX_ERROR)
511 else if (type != EX_NUMERIC)
513 msg (SE, gettext (typ));
516 *n = append_nonterminal_arg (c, *n);
528 /* Parses system variables. */
530 parse_sysvar (union any_node **n)
532 if (!strcmp (tokid, "$CASENUM"))
534 *n = xmalloc (sizeof (struct casenum_node));
535 (*n)->casenum.type = OP_CASENUM;
542 if (!strcmp (tokid, "$SYSMIS"))
544 else if (!strcmp (tokid, "$JDATE"))
546 struct tm *time = localtime (&last_vfm_invocation);
547 d = yrmoda (time->tm_year + 1900, time->tm_mon + 1, time->tm_mday);
549 else if (!strcmp (tokid, "$DATE"))
551 static const char *months[12] =
553 "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
554 "JUL", "AUG", "SEP", "OCT", "NOV", "DEC",
560 time = localtime (&last_vfm_invocation);
561 sprintf (temp_buf, "%02d %s %02d", abs (time->tm_mday) % 100,
562 months[abs (time->tm_mon) % 12], abs (time->tm_year) % 100);
564 *n = xmalloc (sizeof (struct str_con_node) + 8);
565 (*n)->str_con.type = OP_STR_CON;
566 (*n)->str_con.len = 9;
567 memcpy ((*n)->str_con.s, temp_buf, 9);
570 else if (!strcmp (tokid, "$TIME"))
573 time = localtime (&last_vfm_invocation);
574 d = (yrmoda (time->tm_year + 1900, time->tm_mon + 1,
575 time->tm_mday) * 60. * 60. * 24.
576 + time->tm_hour * 60 * 60.
580 else if (!strcmp (tokid, "$LENGTH"))
582 msg (SW, _("Use of $LENGTH is obsolete, returning default of 66."));
585 else if (!strcmp (tokid, "$WIDTH"))
587 msg (SW, _("Use of $WIDTH is obsolete, returning default of 131."));
592 msg (SE, _("Unknown system variable %s."), tokid);
596 *n = xmalloc (sizeof (struct num_con_node));
597 (*n)->num_con.type = OP_NUM_CON;
598 (*n)->num_con.value = d;
603 /* Parses numbers, varnames, etc. */
605 parse_primary (union any_node **n)
613 /* An identifier followed by a left parenthesis is a function
615 if (lex_look_ahead () == '(')
616 return parse_function (n);
618 /* $ at the beginning indicates a system variable. */
621 int type = parse_sysvar (n);
626 /* Otherwise, it must be a user variable. */
627 v = dict_lookup_var (default_dict, tokid);
631 lex_error (_("expecting variable name"));
635 *n = xmalloc (sizeof (struct var_node));
636 (*n)->var.type = v->type == NUMERIC ? OP_NUM_VAR : OP_STR_VAR;
638 return v->type == NUMERIC ? EX_NUMERIC : EX_STRING;
642 *n = xmalloc (sizeof (struct num_con_node));
643 (*n)->num_con.type = OP_NUM_CON;
644 (*n)->num_con.value = tokval;
650 *n = xmalloc (sizeof (struct str_con_node) + ds_length (&tokstr) - 1);
651 (*n)->str_con.type = OP_STR_CON;
652 (*n)->str_con.len = ds_length (&tokstr);
653 memcpy ((*n)->str_con.s, ds_value (&tokstr), ds_length (&tokstr));
663 if (!lex_match (')'))
665 lex_error (_("expecting `)'"));
673 lex_error (_("in expression"));
678 /* Individual function parsing. */
684 int (*func) (struct function *, int, union any_node **);
688 static struct function func_tab[];
689 static int func_count;
691 static int get_num_args (struct function *, int, union any_node **);
694 unary_func (struct function * f, int x UNUSED, union any_node ** n)
697 struct nonterm_node *c;
699 if (!get_num_args (f, 1, n))
705 divisor = 1 / 60. / 60. / 24.;
708 divisor = 1 / 60. / 60.;
710 case OP_CTIME_MINUTES:
714 divisor = 60. * 60. * 24.;
717 case OP_CTIME_SECONDS:
719 *n = (*n)->nonterm.arg[0];
726 /* Arrive here when we encounter an operation that is just a
727 glorified version of a multiplication or division. Converts the
728 operation directly into that multiplication. */
729 c = xmalloc (sizeof (struct nonterm_node) + sizeof (union any_node *));
732 c->arg[0] = (*n)->nonterm.arg[0];
733 c->arg[1] = xmalloc (sizeof (struct num_con_node));
734 c->arg[1]->num_con.type = OP_NUM_CON;
735 c->arg[1]->num_con.value = divisor;
737 *n = (union any_node *) c;
742 binary_func (struct function * f, int x UNUSED, union any_node ** n)
744 if (!get_num_args (f, 2, n))
750 ternary_func (struct function * f, int x UNUSED, union any_node ** n)
752 if (!get_num_args (f, 3, n))
758 MISSING_func (struct function * f, int x UNUSED, union any_node ** n)
761 && dict_lookup_var (default_dict, tokid) != NULL
762 && lex_look_ahead () == ')')
764 struct var_node *c = xmalloc (sizeof *c);
765 c->v = parse_variable ();
766 c->type = c->v->type == ALPHA ? OP_STR_MIS : OP_NUM_SYS;
767 *n = (union any_node *) c;
770 if (!get_num_args (f, 1, n))
776 SYSMIS_func (struct function * f UNUSED, int x UNUSED, union any_node ** n)
781 && dict_lookup_var (default_dict, tokid)
782 && lex_look_ahead () == ')')
785 v = parse_variable ();
786 if (v->type == ALPHA)
788 struct num_con_node *c = xmalloc (sizeof *c);
789 c->type = OP_NUM_CON;
795 struct var_node *c = xmalloc (sizeof *c);
796 c->type = OP_NUM_SYS;
805 else if (t == EX_NUMERIC)
807 *n = allocate_nonterminal (OP_SYSMIS, *n);
810 else /* EX_STRING or EX_BOOLEAN */
812 /* Return constant `true' value. */
814 *n = xmalloc (sizeof (struct num_con_node));
815 (*n)->num_con.type = OP_NUM_CON;
816 (*n)->num_con.value = 1.0;
822 VALUE_func (struct function *f UNUSED, int x UNUSED, union any_node **n)
824 struct variable *v = parse_variable ();
828 *n = xmalloc (sizeof (struct var_node));
830 if (v->type == NUMERIC)
832 (*n)->var.type = OP_NUM_VAL;
837 (*n)->var.type = OP_STR_VAR;
843 LAG_func (struct function *f UNUSED, int x UNUSED, union any_node **n)
845 struct variable *v = parse_variable ();
852 if (!lex_integer_p () || lex_integer () <= 0 || lex_integer () > 1000)
854 msg (SE, _("Argument 2 to LAG must be a small positive "
855 "integer constant."));
859 nlag = lex_integer ();
862 n_lag = max (nlag, n_lag);
863 *n = xmalloc (sizeof (struct lag_node));
864 (*n)->lag.type = (v->type == NUMERIC ? OP_NUM_LAG : OP_STR_LAG);
866 (*n)->lag.lag = nlag;
867 return (v->type == NUMERIC ? EX_NUMERIC : EX_STRING);
870 /* This screwball function parses n-ary operators:
871 1. NMISS, NVALID, SUM, MEAN, MIN, MAX: any number of (numeric) arguments.
872 2. SD, VARIANCE, CFVAR: at least two (numeric) arguments.
873 3. RANGE: An odd number of arguments, but at least three.
874 All arguments must be the same type.
875 4. ANY: At least two arguments. All arguments must be the same type.
878 nary_num_func (struct function *f, int min_args, union any_node **n)
880 /* Argument number of current argument (used for error messages). */
883 /* Number of arguments. */
886 /* Number of arguments allocated. */
889 /* Type of arguments. */
890 int type = (f->t == OP_ANY || f->t == OP_RANGE) ? -1 : NUMERIC;
892 *n = xmalloc (sizeof (struct nonterm_node) + sizeof (union any_node *[15]));
893 (*n)->nonterm.type = f->t;
897 /* Special case: vara TO varb. */
899 /* FIXME: Is this condition failsafe? Can we _ever_ have two
900 juxtaposed identifiers otherwise? */
901 if (token == T_ID && dict_lookup_var (default_dict, tokid) != NULL
902 && toupper (lex_look_ahead ()) == 'T')
907 int opts = PV_SINGLE;
911 else if (type == ALPHA)
913 if (!parse_variables (default_dict, &v, &nv, opts))
915 if (nv + (*n)->nonterm.n >= m)
918 *n = xrealloc (*n, (sizeof (struct nonterm_node)
919 + (m - 1) * sizeof (union any_node *)));
924 for (j = 1; j < nv; j++)
925 if (type != v[j]->type)
927 msg (SE, _("Type mismatch in argument %d of %s, which was "
928 "expected to be of %s type. It was actually "
930 argn, f->s, type_name (type), type_name (v[j]->type));
935 for (j = 0; j < nv; j++)
937 union any_node **c = &(*n)->nonterm.arg[(*n)->nonterm.n++];
938 *c = xmalloc (sizeof (struct var_node));
939 (*c)->var.type = (type == NUMERIC ? OP_NUM_VAR : OP_STR_VAR);
946 int t = parse_or (&c);
953 msg (SE, _("%s cannot take Boolean operands."), f->s);
960 else if (t == EX_STRING)
963 else if ((t == EX_NUMERIC) ^ (type == NUMERIC))
966 msg (SE, _("Type mismatch in argument %d of %s, which was "
967 "expected to be of %s type. It was actually "
969 argn, f->s, type_name (type), expr_type_name (t));
972 if ((*n)->nonterm.n + 1 >= m)
975 *n = xrealloc (*n, (sizeof (struct nonterm_node)
976 + (m - 1) * sizeof (union any_node *)));
978 (*n)->nonterm.arg[(*n)->nonterm.n++] = c;
983 if (!lex_match (','))
985 lex_error (_("in function call"));
991 *n = xrealloc (*n, (sizeof (struct nonterm_node)
992 + ((*n)->nonterm.n) * sizeof (union any_node *)));
994 nargs = (*n)->nonterm.n;
995 if (f->t == OP_RANGE)
997 if (nargs < 3 || (nargs & 1) == 0)
999 msg (SE, _("RANGE requires an odd number of arguments, but "
1000 "at least three."));
1004 else if (f->t == OP_SD || f->t == OP_VARIANCE
1005 || f->t == OP_CFVAR || f->t == OP_ANY)
1009 msg (SE, _("%s requires at least two arguments."), f->s);
1014 if (f->t == OP_CFVAR || f->t == OP_SD || f->t == OP_VARIANCE)
1015 min_args = max (min_args, 2);
1017 min_args = max (min_args, 1);
1019 /* Yes, this is admittedly a terrible crock, but it works. */
1020 (*n)->nonterm.arg[(*n)->nonterm.n] = (union any_node *) min_args;
1022 if (min_args > nargs)
1024 msg (SE, _("%s.%d requires at least %d arguments."),
1025 f->s, min_args, min_args);
1029 if (f->t == OP_ANY || f->t == OP_RANGE)
1031 if (type == T_STRING)
1044 CONCAT_func (struct function * f UNUSED, int x UNUSED, union any_node ** n)
1050 *n = xmalloc (sizeof (struct nonterm_node) + sizeof (union any_node *[15]));
1051 (*n)->nonterm.type = OP_CONCAT;
1052 (*n)->nonterm.n = 0;
1055 if ((*n)->nonterm.n >= m)
1058 *n = xrealloc (*n, (sizeof (struct nonterm_node)
1059 + (m - 1) * sizeof (union any_node *)));
1061 type = parse_or (&(*n)->nonterm.arg[(*n)->nonterm.n]);
1062 if (type == EX_ERROR)
1064 if (type != EX_STRING)
1066 msg (SE, _("Argument %d to CONCAT is type %s. All arguments "
1067 "to CONCAT must be strings."),
1068 (*n)->nonterm.n + 1, expr_type_name (type));
1073 if (!lex_match (','))
1076 *n = xrealloc (*n, (sizeof (struct nonterm_node)
1077 + ((*n)->nonterm.n - 1) * sizeof (union any_node *)));
1085 /* Parses a string function according to f->desc. f->desc[0] is the
1086 return type of the function. Succeeding characters represent
1087 successive args. Optional args are separated from the required
1088 args by a slash (`/'). Codes are `n', numeric arg; `s', string
1089 arg; and `f', format spec (this must be the last arg). If the
1090 optional args are included, the type becomes f->t+1. */
1092 generic_str_func (struct function *f, int x UNUSED, union any_node ** n)
1098 /* Count max number of arguments. */
1102 if (*cp == 'n' || *cp == 's')
1104 else if (*cp == 'f')
1110 *n = xmalloc (sizeof (struct nonterm_node)
1111 + (max_args - 1) * sizeof (union any_node *));
1112 (*n)->nonterm.type = f->t;
1113 (*n)->nonterm.n = 0;
1116 if (*cp == 'n' || *cp == 's')
1118 int t = *cp == 'n' ? EX_NUMERIC : EX_STRING;
1119 type = parse_or (&(*n)->nonterm.arg[(*n)->nonterm.n]);
1121 if (type == EX_ERROR)
1125 msg (SE, _("Argument %d to %s was expected to be of %s type. "
1126 "It was actually of type %s."),
1127 (*n)->nonterm.n + 1, f->s,
1128 *cp == 'n' ? _("numeric") : _("string"),
1129 expr_type_name (type));
1134 else if (*cp == 'f')
1136 /* This is always the very last argument. Also, this code
1137 is a crock. However, it works. */
1138 struct fmt_spec fmt;
1140 if (!parse_format_specifier (&fmt, 0))
1142 if (formats[fmt.type].cat & FCAT_STRING)
1144 msg (SE, _("%s is not a numeric format."), fmt_to_string (&fmt));
1147 (*n)->nonterm.arg[(*n)->nonterm.n + 0] = (union any_node *) fmt.type;
1148 (*n)->nonterm.arg[(*n)->nonterm.n + 1] = (union any_node *) fmt.w;
1149 (*n)->nonterm.arg[(*n)->nonterm.n + 2] = (union any_node *) fmt.d;
1160 if (lex_match (','))
1162 (*n)->nonterm.type++;
1168 else if (!lex_match (','))
1170 msg (SE, _("Too few arguments to function %s."), f->s);
1175 return f->desc[0] == 'n' ? EX_NUMERIC : EX_STRING;
1182 /* General function parsing. */
1185 get_num_args (struct function *f, int num_args, union any_node **n)
1190 *n = xmalloc (sizeof (struct nonterm_node)
1191 + (num_args - 1) * sizeof (union any_node *));
1192 (*n)->nonterm.type = f->t;
1193 (*n)->nonterm.n = 0;
1196 t = parse_or (&(*n)->nonterm.arg[i]);
1200 if (t != EX_NUMERIC)
1202 msg (SE, _("Type mismatch in argument %d of %s, which was expected "
1203 "to be numeric. It was actually type %s."),
1204 i + 1, f->s, expr_type_name (t));
1207 if (++i >= num_args)
1209 if (!lex_match (','))
1211 msg (SE, _("Missing comma following argument %d of %s."), i + 1, f->s);
1222 parse_function (union any_node ** n)
1224 struct function *fp;
1225 char fname[32], *cp;
1228 const struct vector *v;
1230 /* Check for a vector with this name. */
1231 v = dict_lookup_vector (default_dict, tokid);
1235 assert (token == '(');
1238 *n = xmalloc (sizeof (struct nonterm_node)
1239 + sizeof (union any_node *[2]));
1240 (*n)->nonterm.type = (v->var[0]->type == NUMERIC
1241 ? OP_VEC_ELEM_NUM : OP_VEC_ELEM_STR);
1242 (*n)->nonterm.n = 0;
1244 t = parse_or (&(*n)->nonterm.arg[0]);
1247 if (t != EX_NUMERIC)
1249 msg (SE, _("The index value after a vector name must be numeric."));
1254 if (!lex_match (')'))
1256 msg (SE, _("`)' expected after a vector index value."));
1259 ((*n)->nonterm.arg[1]) = (union any_node *) v->idx;
1261 return v->var[0]->type == NUMERIC ? EX_NUMERIC : EX_STRING;
1264 ds_truncate (&tokstr, 31);
1265 strcpy (fname, ds_value (&tokstr));
1266 cp = strrchr (fname, '.');
1267 if (cp && isdigit ((unsigned char) cp[1]))
1269 min_args = atoi (&cp[1]);
1276 if (!lex_force_match ('('))
1283 fp = binary_search (func_tab, func_count, sizeof *func_tab, &f,
1284 compare_functions, NULL);
1289 msg (SE, _("There is no function named %s."), fname);
1292 if (min_args && fp->func != nary_num_func)
1294 msg (SE, _("Function %s may not be given a minimum number of "
1295 "arguments."), fname);
1298 t = fp->func (fp, min_args, n);
1301 if (!lex_match (')'))
1303 lex_error (_("expecting `)' after %s function"), fname);
1314 #if GLOBAL_DEBUGGING
1315 #define op(a,b,c,d) {a,b,c,d}
1317 #define op(a,b,c,d) {b,c,d}
1322 struct op_desc ops[OP_SENTINEL + 1] =
1324 op ("!?ERROR?!", 000, 0, 0),
1326 op ("plus", 001, varies, 1),
1327 op ("mul", 011, varies, 1),
1328 op ("pow", 010, -1, 0),
1329 op ("and", 010, -1, 0),
1330 op ("or", 010, -1, 0),
1331 op ("not", 000, 0, 0),
1332 op ("eq", 000, -1, 0),
1333 op ("ge", 000, -1, 0),
1334 op ("gt", 000, -1, 0),
1335 op ("le", 000, -1, 0),
1336 op ("lt", 000, -1, 0),
1337 op ("ne", 000, -1, 0),
1339 op ("string-eq", 000, -1, 0),
1340 op ("string-ge", 000, -1, 0),
1341 op ("string-gt", 000, -1, 0),
1342 op ("string-le", 000, -1, 0),
1343 op ("string-lt", 000, -1, 0),
1344 op ("string-ne", 000, -1, 0),
1346 op ("neg", 000, 0, 0),
1347 op ("abs", 000, 0, 0),
1348 op ("arcos", 000, 0, 0),
1349 op ("arsin", 000, 0, 0),
1350 op ("artan", 000, 0, 0),
1351 op ("cos", 000, 0, 0),
1352 op ("exp", 000, 0, 0),
1353 op ("lg10", 000, 0, 0),
1354 op ("ln", 000, 0, 0),
1355 op ("mod10", 000, 0, 0),
1356 op ("rnd", 000, 0, 0),
1357 op ("sin", 000, 0, 0),
1358 op ("sqrt", 000, 0, 0),
1359 op ("tan", 000, 0, 0),
1360 op ("trunc", 000, 0, 0),
1362 op ("any", 011, varies, 1),
1363 op ("any-string", 001, varies, 1),
1364 op ("cfvar", 013, varies, 2),
1365 op ("max", 013, varies, 2),
1366 op ("mean", 013, varies, 2),
1367 op ("min", 013, varies, 2),
1368 op ("nmiss", 011, varies, 1),
1369 op ("nvalid", 011, varies, 1),
1370 op ("range", 011, varies, 1),
1371 op ("range-string", 001, varies, 1),
1372 op ("sd", 013, varies, 2),
1373 op ("sum", 013, varies, 2),
1374 op ("variance", 013, varies, 2),
1376 op ("time_hms", 000, -2, 0),
1377 op ("ctime_days?!", 000, 0, 0),
1378 op ("ctime_hours?!", 000, 0, 0),
1379 op ("ctime_minutes?!", 000, 0, 0),
1380 op ("ctime_seconds?!", 000, 0, 0),
1381 op ("time_days?!", 000, 0, 0),
1383 op ("date_dmy", 000, -2, 0),
1384 op ("date_mdy", 000, -2, 0),
1385 op ("date_moyr", 000, -1, 0),
1386 op ("date_qyr", 000, -1, 0),
1387 op ("date_wkyr", 000, -1, 0),
1388 op ("date_yrday", 000, -1, 0),
1389 op ("yrmoda", 000, -2, 0),
1391 op ("xdate_date", 000, 0, 0),
1392 op ("xdate_hour", 000, 0, 0),
1393 op ("xdate_jday", 000, 0, 0),
1394 op ("xdate_mday", 000, 0, 0),
1395 op ("xdate_minute", 000, 0, 0),
1396 op ("xdate_month", 000, 0, 0),
1397 op ("xdate_quarter", 000, 0, 0),
1398 op ("xdate_second", 000, 0, 0),
1399 op ("xdate_tday", 000, 0, 0),
1400 op ("xdate_time", 000, 0, 0),
1401 op ("xdate_week", 000, 0, 0),
1402 op ("xdate_wkday", 000, 0, 0),
1403 op ("xdate_year", 000, 0, 0),
1405 op ("concat", 001, varies, 1),
1406 op ("index-2", 000, -1, 0),
1407 op ("index-3", 000, -2, 0),
1408 op ("rindex-2", 000, -1, 0),
1409 op ("rindex-3", 000, -2, 0),
1410 op ("length", 000, 0, 0),
1411 op ("lower", 000, 0, 0),
1412 op ("upcas", 000, 0, 0),
1413 op ("lpad-2", 010, -1, 0),
1414 op ("lpad-3", 010, -2, 0),
1415 op ("rpad-2", 010, -1, 0),
1416 op ("rpad-3", 010, -2, 0),
1417 op ("ltrim-1", 000, 0, 0),
1418 op ("ltrim-2", 000, -1, 0),
1419 op ("rtrim-1", 000, 0, 0),
1420 op ("rtrim-2", 000, -1, 0),
1421 op ("number-1", 010, 0, 0),
1422 op ("number-2", 014, 0, 3),
1423 op ("string", 004, 0, 3),
1424 op ("substr-2", 010, -1, 0),
1425 op ("substr-3", 010, -2, 0),
1427 op ("inv", 000, 0, 0),
1428 op ("square", 000, 0, 0),
1429 op ("num-to-Bool", 000, 0, 0),
1431 op ("mod", 010, -1, 0),
1432 op ("normal", 000, 0, 0),
1433 op ("uniform", 000, 0, 0),
1434 op ("sysmis", 010, 0, 0),
1435 op ("vec-elem-num", 002, 0, 1),
1436 op ("vec-elem-str", 002, 0, 1),
1438 op ("!?TERMINAL?!", 000, 0, 0),
1439 op ("num-con", 000, +1, 0),
1440 op ("str-con", 000, +1, 0),
1441 op ("num-var", 000, +1, 0),
1442 op ("str-var", 000, +1, 0),
1443 op ("num-lag", 000, +1, 1),
1444 op ("str-lag", 000, +1, 1),
1445 op ("num-sys", 000, +1, 1),
1446 op ("num-val", 000, +1, 1),
1447 op ("str-mis", 000, +1, 1),
1448 op ("$casenum", 000, +1, 0),
1449 op ("!?SENTINEL?!", 000, 0, 0),
1456 /* Utility functions. */
1459 expr_type_name (int type)
1467 return _("Boolean");
1470 return _("numeric");
1482 type_name (int type)
1487 return _("numeric");
1497 make_bool (union any_node **n)
1501 c = xmalloc (sizeof (struct nonterm_node));
1502 c->nonterm.type = OP_NUM_TO_BOOL;
1504 c->nonterm.arg[0] = *n;
1509 free_node (union any_node *n)
1511 if (n->type < OP_TERMINAL)
1515 for (i = 0; i < n->nonterm.n; i++)
1516 free_node (n->nonterm.arg[i]);
1522 allocate_nonterminal (int op, union any_node *n)
1526 c = xmalloc (sizeof c->nonterm);
1527 c->nonterm.type = op;
1529 c->nonterm.arg[0] = n;
1535 append_nonterminal_arg (union any_node *a, union any_node *b)
1537 a = xrealloc (a, sizeof *a + sizeof *a->nonterm.arg * a->nonterm.n);
1538 a->nonterm.arg[a->nonterm.n++] = b;
1542 static struct function func_tab[] =
1544 {"ABS", OP_ABS, unary_func, NULL},
1545 {"ACOS", OP_ARCOS, unary_func, NULL},
1546 {"ARCOS", OP_ARCOS, unary_func, NULL},
1547 {"ARSIN", OP_ARSIN, unary_func, NULL},
1548 {"ARTAN", OP_ARTAN, unary_func, NULL},
1549 {"ASIN", OP_ARSIN, unary_func, NULL},
1550 {"ATAN", OP_ARTAN, unary_func, NULL},
1551 {"COS", OP_COS, unary_func, NULL},
1552 {"EXP", OP_EXP, unary_func, NULL},
1553 {"LG10", OP_LG10, unary_func, NULL},
1554 {"LN", OP_LN, unary_func, NULL},
1555 {"MOD10", OP_MOD10, unary_func, NULL},
1556 {"NORMAL", OP_NORMAL, unary_func, NULL},
1557 {"RND", OP_RND, unary_func, NULL},
1558 {"SIN", OP_SIN, unary_func, NULL},
1559 {"SQRT", OP_SQRT, unary_func, NULL},
1560 {"TAN", OP_TAN, unary_func, NULL},
1561 {"TRUNC", OP_TRUNC, unary_func, NULL},
1562 {"UNIFORM", OP_UNIFORM, unary_func, NULL},
1564 {"TIME.DAYS", OP_TIME_DAYS, unary_func, NULL},
1565 {"TIME.HMS", OP_TIME_HMS, ternary_func, NULL},
1567 {"CTIME.DAYS", OP_CTIME_DAYS, unary_func, NULL},
1568 {"CTIME.HOURS", OP_CTIME_HOURS, unary_func, NULL},
1569 {"CTIME.MINUTES", OP_CTIME_MINUTES, unary_func, NULL},
1570 {"CTIME.SECONDS", OP_CTIME_SECONDS, unary_func, NULL},
1572 {"DATE.DMY", OP_DATE_DMY, ternary_func, NULL},
1573 {"DATE.MDY", OP_DATE_MDY, ternary_func, NULL},
1574 {"DATE.MOYR", OP_DATE_MOYR, binary_func, NULL},
1575 {"DATE.QYR", OP_DATE_QYR, binary_func, NULL},
1576 {"DATE.WKYR", OP_DATE_WKYR, binary_func, NULL},
1577 {"DATE.YRDAY", OP_DATE_YRDAY, binary_func, NULL},
1579 {"XDATE.DATE", OP_XDATE_DATE, unary_func, NULL},
1580 {"XDATE.HOUR", OP_XDATE_HOUR, unary_func, NULL},
1581 {"XDATE.JDAY", OP_XDATE_JDAY, unary_func, NULL},
1582 {"XDATE.MDAY", OP_XDATE_MDAY, unary_func, NULL},
1583 {"XDATE.MINUTE", OP_XDATE_MINUTE, unary_func, NULL},
1584 {"XDATE.MONTH", OP_XDATE_MONTH, unary_func, NULL},
1585 {"XDATE.QUARTER", OP_XDATE_QUARTER, unary_func, NULL},
1586 {"XDATE.SECOND", OP_XDATE_SECOND, unary_func, NULL},
1587 {"XDATE.TDAY", OP_XDATE_TDAY, unary_func, NULL},
1588 {"XDATE.TIME", OP_XDATE_TIME, unary_func, NULL},
1589 {"XDATE.WEEK", OP_XDATE_WEEK, unary_func, NULL},
1590 {"XDATE.WKDAY", OP_XDATE_WKDAY, unary_func, NULL},
1591 {"XDATE.YEAR", OP_XDATE_YEAR, unary_func, NULL},
1593 {"MISSING", OP_SYSMIS, MISSING_func, NULL},
1594 {"MOD", OP_MOD, binary_func, NULL},
1595 {"SYSMIS", OP_SYSMIS, SYSMIS_func, NULL},
1596 {"VALUE", OP_NUM_VAL, VALUE_func, NULL},
1597 {"LAG", OP_NUM_LAG, LAG_func, NULL},
1598 {"YRMODA", OP_YRMODA, ternary_func, NULL},
1600 {"ANY", OP_ANY, nary_num_func, NULL},
1601 {"CFVAR", OP_CFVAR, nary_num_func, NULL},
1602 {"MAX", OP_MAX, nary_num_func, NULL},
1603 {"MEAN", OP_MEAN, nary_num_func, NULL},
1604 {"MIN", OP_MIN, nary_num_func, NULL},
1605 {"NMISS", OP_NMISS, nary_num_func, NULL},
1606 {"NVALID", OP_NVALID, nary_num_func, NULL},
1607 {"RANGE", OP_RANGE, nary_num_func, NULL},
1608 {"SD", OP_SD, nary_num_func, NULL},
1609 {"SUM", OP_SUM, nary_num_func, NULL},
1610 {"VARIANCE", OP_VARIANCE, nary_num_func, NULL},
1612 {"CONCAT", OP_CONCAT, CONCAT_func, NULL},
1613 {"INDEX", OP_INDEX, generic_str_func, "nss/n"},
1614 {"RINDEX", OP_RINDEX, generic_str_func, "nss/n"},
1615 {"LENGTH", OP_LENGTH, generic_str_func, "ns"},
1616 {"LOWER", OP_LOWER, generic_str_func, "ss"},
1617 {"UPCAS", OP_UPPER, generic_str_func, "ss"},
1618 {"LPAD", OP_LPAD, generic_str_func, "ssn/s"},
1619 {"RPAD", OP_RPAD, generic_str_func, "ssn/s"},
1620 {"LTRIM", OP_LTRIM, generic_str_func, "ss/s"},
1621 {"RTRIM", OP_RTRIM, generic_str_func, "ss/s"},
1622 {"NUMBER", OP_NUMBER, generic_str_func, "ns/f"},
1623 {"STRING", OP_STRING, generic_str_func, "snf"},
1624 {"SUBSTR", OP_SUBSTR, generic_str_func, "ssn/n"},
1627 /* An algo_compare_func that compares functions A and B based on
1630 compare_functions (const void *a_, const void *b_, void *aux UNUSED)
1632 const struct function *a = a_;
1633 const struct function *b = b_;
1635 return strcmp (a->s, b->s);
1639 init_func_tab (void)
1649 func_count = sizeof func_tab / sizeof *func_tab;
1650 sort (func_tab, func_count, sizeof *func_tab, compare_functions, NULL);
1657 print_type (union any_node * n)
1662 s = ops[n->type].name;
1664 if (ops[n->type].flags & OP_MIN_ARGS)
1665 printf ("%s.%d\n", s, (int) n->nonterm.arg[n->nonterm.n]);
1666 else if (ops[n->type].flags & OP_FMT_SPEC)
1670 f.type = (int) n->nonterm.arg[n->nonterm.n + 0];
1671 f.w = (int) n->nonterm.arg[n->nonterm.n + 1];
1672 f.d = (int) n->nonterm.arg[n->nonterm.n + 2];
1673 printf ("%s(%s)\n", s, fmt_to_string (&f));
1680 debug_print_tree (union any_node * n, int level)
1683 for (i = 0; i < level; i++)
1685 if (n->type < OP_TERMINAL)
1688 for (i = 0; i < n->nonterm.n; i++)
1689 debug_print_tree (n->nonterm.arg[i], level + 1);
1696 printf (_("!!TERMINAL!!"));
1699 if (n->num_con.value == SYSMIS)
1702 printf ("%f", n->num_con.value);
1705 printf ("\"%.*s\"", n->str_con.len, n->str_con.s);
1709 printf ("%s", n->var.v->name);
1713 printf ("LAG(%s,%d)", n->lag.v->name, n->lag.lag);
1716 printf ("SYSMIS(%s)", n->var.v->name);
1719 printf ("VALUE(%s)", n->var.v->name);
1722 printf (_("!!SENTINEL!!"));
1725 printf (_("!!ERROR%d!!"), n->type);
1731 #endif /* DEBUGGING */
1733 #if GLOBAL_DEBUGGING
1735 debug_print_postfix (struct expression * e)
1738 double *num = e->num;
1739 unsigned char *str = e->str;
1740 struct variable **v = e->var;
1743 debug_printf ((_("postfix:")));
1744 for (o = e->op; *o != OP_SENTINEL;)
1747 if (t < OP_TERMINAL)
1749 debug_printf ((" %s", ops[t].name));
1751 if (ops[t].flags & OP_VAR_ARGS)
1753 debug_printf (("(%d)", *o));
1756 if (ops[t].flags & OP_MIN_ARGS)
1758 debug_printf ((".%d", *o));
1761 if (ops[t].flags & OP_FMT_SPEC)
1764 f.type = (int) *o++;
1767 debug_printf (("(%s)", fmt_to_string (&f)));
1770 else if (t == OP_NUM_CON)
1773 debug_printf ((" SYSMIS"));
1775 debug_printf ((" %f", *num));
1778 else if (t == OP_STR_CON)
1780 debug_printf ((" \"%.*s\"", *str, &str[1]));
1783 else if (t == OP_NUM_VAR || t == OP_STR_VAR)
1785 debug_printf ((" %s", (*v)->name));
1788 else if (t == OP_NUM_SYS)
1790 debug_printf ((" SYSMIS(#%d)", *o));
1793 else if (t == OP_NUM_VAL)
1795 debug_printf ((" VALUE(#%d)", *o));
1798 else if (t == OP_NUM_LAG || t == OP_STR_LAG)
1800 debug_printf ((" LAG(%s,%d)", (*v)->name, *o));
1806 printf ("debug_print_postfix(): %d\n", t);
1810 debug_putc ('\n', stdout);
1812 #endif /* GLOBAL_DEBUGGING */