1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
38 /* Lowest precedence. */
39 static int parse_or (union any_node **n);
40 static int parse_and (union any_node **n);
41 static int parse_not (union any_node **n);
42 static int parse_rel (union any_node **n);
43 static int parse_add (union any_node **n);
44 static int parse_mul (union any_node **n);
45 static int parse_neg (union any_node **n);
46 static int parse_exp (union any_node **n);
47 static int parse_primary (union any_node **n);
48 static int parse_function (union any_node **n);
49 /* Highest precedence. */
51 /* Utility functions. */
52 static const char *expr_type_name (int type);
53 static const char *type_name (int type);
54 static void make_bool (union any_node **n);
55 static union any_node *allocate_nonterminal (int op, union any_node *n);
56 static union any_node *append_nonterminal_arg (union any_node *,
58 static int type_check (union any_node **n, int type, int flags);
60 static void init_func_tab (void);
61 static int cmp_func (const void *a, const void *b);
64 static void debug_print_tree (union any_node *, int);
68 static void debug_print_postfix (struct expression *);
71 /* Public functions. */
74 expr_free (struct expression *e)
89 expr_parse (int flags)
95 /* Make sure the table of functions is initialized. */
98 /* Parse the expression. */
100 if (type == EX_ERROR)
103 /* Enforce type rules. */
104 if (!type_check (&n, type, flags))
110 /* Optimize the expression as best we can. */
111 n = (union any_node *) optimize_expression ((struct nonterm_node *) n);
113 /* Dump the tree-based expression to a postfix representation for
114 best evaluation speed, and destroy the tree. */
115 e = xmalloc (sizeof *e);
117 dump_expression (n, e);
120 /* If we're debugging or the user requested it, print the postfix
124 if (flags & PXP_DUMP)
126 debug_print_postfix (e);
133 type_check (union any_node **n, int type, int flags)
135 /* Enforce PXP_BOOLEAN flag. */
136 if (flags & PXP_BOOLEAN)
138 if (type == EX_STRING)
140 msg (SE, _("A string expression was supplied in a place "
141 "where a Boolean expression was expected."));
144 else if (type == EX_NUMERIC)
145 *n = allocate_nonterminal (OP_NUM_TO_BOOL, *n);
148 /* Enforce PXP_NUMERIC flag. */
149 if ((flags & PXP_NUMERIC) && (type != EX_NUMERIC))
151 msg (SE, _("A numeric expression was expected in a place "
152 "where one was not supplied."));
156 /* Enforce PXP_STRING flag. */
157 if ((flags & PXP_STRING) && (type != EX_STRING))
159 msg (SE, _("A string expression was expected in a place "
160 "where one was not supplied."));
167 /* Recursive-descent expression parser. */
169 /* Parses the OR level. */
171 parse_or (union any_node **n)
173 char typ[] = N_("The OR operator cannot take string operands.");
177 type = parse_and (n);
178 if (type == EX_ERROR || token != T_OR)
180 if (type == EX_STRING)
183 msg (SE, gettext (typ));
186 else if (type == EX_NUMERIC)
189 c = allocate_nonterminal (OP_OR, *n);
193 type = parse_and (n);
194 if (type == EX_ERROR)
196 else if (type == EX_STRING)
198 msg (SE, gettext (typ));
201 else if (type == EX_NUMERIC)
203 c = append_nonterminal_arg (c, *n);
216 /* Parses the AND level. */
218 parse_and (union any_node ** n)
220 static const char typ[]
221 = N_("The AND operator cannot take string operands.");
223 int type = parse_not (n);
225 if (type == EX_ERROR)
229 if (type == EX_STRING)
232 msg (SE, gettext (typ));
235 else if (type == EX_NUMERIC)
238 c = allocate_nonterminal (OP_AND, *n);
242 type = parse_not (n);
243 if (type == EX_ERROR)
245 else if (type == EX_STRING)
247 msg (SE, gettext (typ));
250 else if (type == EX_NUMERIC)
252 c = append_nonterminal_arg (c, *n);
265 /* Parses the NOT level. */
267 parse_not (union any_node ** n)
269 static const char typ[]
270 = N_("The NOT operator cannot take a string operand.");
274 while (lex_match (T_NOT))
276 type = parse_rel (n);
277 if (!not || type == EX_ERROR)
280 if (type == EX_STRING)
283 msg (SE, gettext (typ));
286 else if (type == EX_NUMERIC)
289 *n = allocate_nonterminal (OP_NOT, *n);
294 parse_rel (union any_node ** n)
296 static const char typ[]
297 = N_("Strings cannot be compared with numeric or Boolean "
298 "values with the relational operators "
301 int type = parse_add (n);
303 if (type == EX_ERROR)
307 if (token < T_EQ || token > T_NE)
314 c = allocate_nonterminal (token - T_EQ
315 + (type == EX_NUMERIC ? OP_EQ : OP_STRING_EQ),
322 if (t == EX_BOOLEAN && type == EX_NUMERIC)
323 make_bool (&c->nonterm.arg[0]);
324 else if (t == EX_NUMERIC && type == EX_BOOLEAN)
328 msg (SE, gettext (typ));
332 c = append_nonterminal_arg (c, *n);
337 if (token < T_EQ || token > T_NE)
349 /* Parses the addition and subtraction level. */
351 parse_add (union any_node **n)
353 static const char typ[]
354 = N_("The `+' and `-' operators may only be used with "
355 "numeric operands.");
360 type = parse_mul (n);
361 lex_negative_to_dash ();
362 if (type == EX_ERROR || (token != '+' && token != '-'))
364 if (type != EX_NUMERIC)
367 msg (SE, gettext (typ));
371 c = allocate_nonterminal (OP_PLUS, *n);
377 type = parse_mul (n);
378 if (type == EX_ERROR)
380 else if (type != EX_NUMERIC)
382 msg (SE, gettext (typ));
386 *n = allocate_nonterminal (OP_NEG, *n);
387 c = append_nonterminal_arg (c, *n);
389 lex_negative_to_dash ();
390 if (token != '+' && token != '-')
401 /* Parses the multiplication and division level. */
403 parse_mul (union any_node ** n)
405 static const char typ[]
406 = N_("The `*' and `/' operators may only be used with "
407 "numeric operands.");
413 type = parse_neg (n);
414 if (type == EX_ERROR || (token != '*' && token != '/'))
416 if (type != EX_NUMERIC)
419 msg (SE, gettext (typ));
423 c = allocate_nonterminal (OP_MUL, *n);
429 type = parse_neg (n);
430 if (type == EX_ERROR)
432 else if (type != EX_NUMERIC)
434 msg (SE, gettext (typ));
438 *n = allocate_nonterminal (OP_INV, *n);
439 c = append_nonterminal_arg (c, *n);
441 if (token != '*' && token != '/')
452 /* Parses the unary minus level. */
454 parse_neg (union any_node **n)
456 static const char typ[]
457 = N_("The unary minus (-) operator can only take a numeric operand.");
464 lex_negative_to_dash ();
465 if (!lex_match ('-'))
469 type = parse_exp (n);
470 if (!neg || type == EX_ERROR)
472 if (type != EX_NUMERIC)
475 msg (SE, gettext (typ));
479 *n = allocate_nonterminal (OP_NEG, *n);
484 parse_exp (union any_node **n)
486 static const char typ[]
487 = N_("Both operands to the ** operator must be numeric.");
492 type = parse_primary (n);
493 if (type == EX_ERROR || token != T_EXP)
495 if (type != EX_NUMERIC)
498 msg (SE, gettext (typ));
504 c = allocate_nonterminal (OP_POW, *n);
507 type = parse_primary (n);
508 if (type == EX_ERROR)
510 else if (type != EX_NUMERIC)
512 msg (SE, gettext (typ));
515 *n = append_nonterminal_arg (c, *n);
527 /* Parses system variables. */
529 parse_sysvar (union any_node **n)
531 if (!strcmp (tokid, "$CASENUM"))
533 *n = xmalloc (sizeof (struct casenum_node));
534 (*n)->casenum.type = OP_CASENUM;
541 if (!strcmp (tokid, "$SYSMIS"))
543 else if (!strcmp (tokid, "$JDATE"))
545 struct tm *time = localtime (&last_vfm_invocation);
546 d = yrmoda (time->tm_year + 1900, time->tm_mon + 1, time->tm_mday);
548 else if (!strcmp (tokid, "$DATE"))
550 static const char *months[12] =
552 "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
553 "JUL", "AUG", "SEP", "OCT", "NOV", "DEC",
559 time = localtime (&last_vfm_invocation);
560 sprintf (temp_buf, "%02d %s %02d", abs (time->tm_mday) % 100,
561 months[abs (time->tm_mon) % 12], abs (time->tm_year) % 100);
563 *n = xmalloc (sizeof (struct str_con_node) + 8);
564 (*n)->str_con.type = OP_STR_CON;
565 (*n)->str_con.len = 9;
566 memcpy ((*n)->str_con.s, temp_buf, 9);
569 else if (!strcmp (tokid, "$TIME"))
572 time = localtime (&last_vfm_invocation);
573 d = (yrmoda (time->tm_year + 1900, time->tm_mon + 1,
574 time->tm_mday) * 60. * 60. * 24.
575 + time->tm_hour * 60 * 60.
579 else if (!strcmp (tokid, "$LENGTH"))
581 msg (SW, _("Use of $LENGTH is obsolete, returning default of 66."));
584 else if (!strcmp (tokid, "$WIDTH"))
586 msg (SW, _("Use of $WIDTH is obsolete, returning default of 131."));
591 msg (SE, _("Unknown system variable %s."), tokid);
595 *n = xmalloc (sizeof (struct num_con_node));
596 (*n)->num_con.type = OP_NUM_CON;
597 (*n)->num_con.value = d;
602 /* Parses numbers, varnames, etc. */
604 parse_primary (union any_node **n)
612 /* An identifier followed by a left parenthesis is a function
614 if (lex_look_ahead () == '(')
615 return parse_function (n);
617 /* $ at the beginning indicates a system variable. */
620 int type = parse_sysvar (n);
625 /* Otherwise, it must be a user variable. */
626 v = find_variable (tokid);
630 lex_error (_("expecting variable name"));
634 *n = xmalloc (sizeof (struct var_node));
635 (*n)->var.type = v->type == NUMERIC ? OP_NUM_VAR : OP_STR_VAR;
637 return v->type == NUMERIC ? EX_NUMERIC : EX_STRING;
641 *n = xmalloc (sizeof (struct num_con_node));
642 (*n)->num_con.type = OP_NUM_CON;
643 (*n)->num_con.value = tokval;
649 *n = xmalloc (sizeof (struct str_con_node) + ds_length (&tokstr) - 1);
650 (*n)->str_con.type = OP_STR_CON;
651 (*n)->str_con.len = ds_length (&tokstr);
652 memcpy ((*n)->str_con.s, ds_value (&tokstr), ds_length (&tokstr));
662 if (!lex_match (')'))
664 lex_error (_("expecting `)'"));
672 lex_error (_("in expression"));
677 /* Individual function parsing. */
683 int (*func) (struct function *, int, union any_node **);
687 static struct function func_tab[];
688 static int func_count;
690 static int get_num_args (struct function *, int, union any_node **);
693 unary_func (struct function * f, int x unused, union any_node ** n)
696 struct nonterm_node *c;
698 if (!get_num_args (f, 1, n))
704 divisor = 1 / 60. / 60. / 24.;
707 divisor = 1 / 60. / 60.;
709 case OP_CTIME_MINUTES:
713 divisor = 60. * 60. * 24.;
716 case OP_CTIME_SECONDS:
718 *n = (*n)->nonterm.arg[0];
725 /* Arrive here when we encounter an operation that is just a
726 glorified version of a multiplication or division. Converts the
727 operation directly into that multiplication. */
728 c = xmalloc (sizeof (struct nonterm_node) + sizeof (union any_node *));
731 c->arg[0] = (*n)->nonterm.arg[0];
732 c->arg[1] = xmalloc (sizeof (struct num_con_node));
733 c->arg[1]->num_con.type = OP_NUM_CON;
734 c->arg[1]->num_con.value = divisor;
736 *n = (union any_node *) c;
741 binary_func (struct function * f, int x unused, union any_node ** n)
743 if (!get_num_args (f, 2, n))
749 ternary_func (struct function * f, int x unused, union any_node ** n)
751 if (!get_num_args (f, 3, n))
757 MISSING_func (struct function * f, int x unused, union any_node ** n)
759 if (token == T_ID && is_varname (tokid) && lex_look_ahead () == ')')
761 struct var_node *c = xmalloc (sizeof *c);
762 c->v = parse_variable ();
763 c->type = c->v->type == ALPHA ? OP_STR_MIS : OP_NUM_SYS;
764 *n = (union any_node *) c;
767 if (!get_num_args (f, 1, n))
773 SYSMIS_func (struct function * f unused, int x unused, union any_node ** n)
777 if (token == T_ID && is_varname (tokid) && lex_look_ahead () == ')')
780 v = parse_variable ();
781 if (v->type == ALPHA)
783 struct num_con_node *c = xmalloc (sizeof *c);
784 c->type = OP_NUM_CON;
790 struct var_node *c = xmalloc (sizeof *c);
791 c->type = OP_NUM_SYS;
800 else if (t == EX_NUMERIC)
802 *n = allocate_nonterminal (OP_SYSMIS, *n);
805 else /* EX_STRING or EX_BOOLEAN */
807 /* Return constant `true' value. */
809 *n = xmalloc (sizeof (struct num_con_node));
810 (*n)->num_con.type = OP_NUM_CON;
811 (*n)->num_con.value = 1.0;
817 VALUE_func (struct function *f unused, int x unused, union any_node **n)
819 struct variable *v = parse_variable ();
823 *n = xmalloc (sizeof (struct var_node));
825 if (v->type == NUMERIC)
827 (*n)->var.type = OP_NUM_VAL;
832 (*n)->var.type = OP_STR_VAR;
838 LAG_func (struct function *f unused, int x unused, union any_node **n)
840 struct variable *v = parse_variable ();
847 if (!lex_integer_p () || lex_integer () <= 0 || lex_integer () > 1000)
849 msg (SE, _("Argument 2 to LAG must be a small positive "
850 "integer constant."));
854 nlag = lex_integer ();
857 n_lag = max (nlag, n_lag);
858 *n = xmalloc (sizeof (struct lag_node));
859 (*n)->lag.type = (v->type == NUMERIC ? OP_NUM_LAG : OP_STR_LAG);
861 (*n)->lag.lag = nlag;
862 return (v->type == NUMERIC ? EX_NUMERIC : EX_STRING);
865 /* This screwball function parses n-ary operators:
866 1. NMISS, NVALID, SUM, MEAN, MIN, MAX: any number of (numeric) arguments.
867 2. SD, VARIANCE, CFVAR: at least two (numeric) arguments.
868 3. RANGE: An odd number of arguments, but at least three.
869 All arguments must be the same type.
870 4. ANY: At least two arguments. All arguments must be the same type.
873 nary_num_func (struct function *f, int min_args, union any_node **n)
875 /* Argument number of current argument (used for error messages). */
878 /* Number of arguments. */
881 /* Number of arguments allocated. */
884 /* Type of arguments. */
885 int type = (f->t == OP_ANY || f->t == OP_RANGE) ? -1 : NUMERIC;
887 *n = xmalloc (sizeof (struct nonterm_node) + sizeof (union any_node *[15]));
888 (*n)->nonterm.type = f->t;
892 /* Special case: vara TO varb. */
894 /* FIXME: Is this condition failsafe? Can we _ever_ have two
895 juxtaposed identifiers otherwise? */
896 if (token == T_ID && is_varname (tokid)
897 && toupper (lex_look_ahead ()) == 'T')
902 int opts = PV_SINGLE;
906 else if (type == ALPHA)
908 if (!parse_variables (NULL, &v, &nv, opts))
910 if (nv + (*n)->nonterm.n >= m)
913 *n = xrealloc (*n, (sizeof (struct nonterm_node)
914 + (m - 1) * sizeof (union any_node *)));
919 for (j = 1; j < nv; j++)
920 if (type != v[j]->type)
922 msg (SE, _("Type mismatch in argument %d of %s, which was "
923 "expected to be of %s type. It was actually "
925 argn, f->s, type_name (type), type_name (v[j]->type));
930 for (j = 0; j < nv; j++)
932 union any_node **c = &(*n)->nonterm.arg[(*n)->nonterm.n++];
933 *c = xmalloc (sizeof (struct var_node));
934 (*c)->var.type = (type == NUMERIC ? OP_NUM_VAR : OP_STR_VAR);
941 int t = parse_or (&c);
948 msg (SE, _("%s cannot take Boolean operands."), f->s);
955 else if (t == EX_STRING)
958 else if ((t == EX_NUMERIC) ^ (type == NUMERIC))
961 msg (SE, _("Type mismatch in argument %d of %s, which was "
962 "expected to be of %s type. It was actually "
964 argn, f->s, type_name (type), expr_type_name (t));
967 if ((*n)->nonterm.n + 1 >= m)
970 *n = xrealloc (*n, (sizeof (struct nonterm_node)
971 + (m - 1) * sizeof (union any_node *)));
973 (*n)->nonterm.arg[(*n)->nonterm.n++] = c;
978 if (!lex_match (','))
980 lex_error (_("in function call"));
986 *n = xrealloc (*n, (sizeof (struct nonterm_node)
987 + ((*n)->nonterm.n) * sizeof (union any_node *)));
989 nargs = (*n)->nonterm.n;
990 if (f->t == OP_RANGE)
992 if (nargs < 3 || (nargs & 1) == 0)
994 msg (SE, _("RANGE requires an odd number of arguments, but "
999 else if (f->t == OP_SD || f->t == OP_VARIANCE
1000 || f->t == OP_CFVAR || f->t == OP_ANY)
1004 msg (SE, _("%s requires at least two arguments."), f->s);
1009 if (f->t == OP_CFVAR || f->t == OP_SD || f->t == OP_VARIANCE)
1010 min_args = max (min_args, 2);
1012 min_args = max (min_args, 1);
1014 /* Yes, this is admittedly a terrible crock, but it works. */
1015 (*n)->nonterm.arg[(*n)->nonterm.n] = (union any_node *) min_args;
1017 if (min_args > nargs)
1019 msg (SE, _("%s.%d requires at least %d arguments."),
1020 f->s, min_args, min_args);
1024 if (f->t == OP_ANY || f->t == OP_RANGE)
1026 if (type == T_STRING)
1039 CONCAT_func (struct function * f unused, int x unused, union any_node ** n)
1045 *n = xmalloc (sizeof (struct nonterm_node) + sizeof (union any_node *[15]));
1046 (*n)->nonterm.type = OP_CONCAT;
1047 (*n)->nonterm.n = 0;
1050 if ((*n)->nonterm.n >= m)
1053 *n = xrealloc (*n, (sizeof (struct nonterm_node)
1054 + (m - 1) * sizeof (union any_node *)));
1056 type = parse_or (&(*n)->nonterm.arg[(*n)->nonterm.n]);
1057 if (type == EX_ERROR)
1059 if (type != EX_STRING)
1061 msg (SE, _("Argument %d to CONCAT is type %s. All arguments "
1062 "to CONCAT must be strings."),
1063 (*n)->nonterm.n + 1, expr_type_name (type));
1068 if (!lex_match (','))
1071 *n = xrealloc (*n, (sizeof (struct nonterm_node)
1072 + ((*n)->nonterm.n - 1) * sizeof (union any_node *)));
1080 /* Parses a string function according to f->desc. f->desc[0] is the
1081 return type of the function. Succeeding characters represent
1082 successive args. Optional args are separated from the required
1083 args by a slash (`/'). Codes are `n', numeric arg; `s', string
1084 arg; and `f', format spec (this must be the last arg). If the
1085 optional args are included, the type becomes f->t+1. */
1087 generic_str_func (struct function *f, int x unused, union any_node ** n)
1093 /* Count max number of arguments. */
1097 if (*cp == 'n' || *cp == 's')
1099 else if (*cp == 'f')
1105 *n = xmalloc (sizeof (struct nonterm_node)
1106 + (max_args - 1) * sizeof (union any_node *));
1107 (*n)->nonterm.type = f->t;
1108 (*n)->nonterm.n = 0;
1111 if (*cp == 'n' || *cp == 's')
1113 int t = *cp == 'n' ? EX_NUMERIC : EX_STRING;
1114 type = parse_or (&(*n)->nonterm.arg[(*n)->nonterm.n]);
1116 if (type == EX_ERROR)
1120 msg (SE, _("Argument %d to %s was expected to be of %s type. "
1121 "It was actually of type %s."),
1122 (*n)->nonterm.n + 1, f->s,
1123 *cp == 'n' ? _("numeric") : _("string"),
1124 expr_type_name (type));
1129 else if (*cp == 'f')
1131 /* This is always the very last argument. Also, this code
1132 is a crock. However, it works. */
1133 struct fmt_spec fmt;
1135 if (!parse_format_specifier (&fmt, 0))
1137 if (formats[fmt.type].cat & FCAT_STRING)
1139 msg (SE, _("%s is not a numeric format."), fmt_to_string (&fmt));
1142 (*n)->nonterm.arg[(*n)->nonterm.n + 0] = (union any_node *) fmt.type;
1143 (*n)->nonterm.arg[(*n)->nonterm.n + 1] = (union any_node *) fmt.w;
1144 (*n)->nonterm.arg[(*n)->nonterm.n + 2] = (union any_node *) fmt.d;
1155 if (lex_match (','))
1157 (*n)->nonterm.type++;
1163 else if (!lex_match (','))
1165 msg (SE, _("Too few arguments to function %s."), f->s);
1170 return f->desc[0] == 'n' ? EX_NUMERIC : EX_STRING;
1177 /* General function parsing. */
1180 get_num_args (struct function *f, int num_args, union any_node **n)
1185 *n = xmalloc (sizeof (struct nonterm_node)
1186 + (num_args - 1) * sizeof (union any_node *));
1187 (*n)->nonterm.type = f->t;
1188 (*n)->nonterm.n = 0;
1191 t = parse_or (&(*n)->nonterm.arg[i]);
1195 if (t != EX_NUMERIC)
1197 msg (SE, _("Type mismatch in argument %d of %s, which was expected "
1198 "to be numeric. It was actually type %s."),
1199 i + 1, f->s, expr_type_name (t));
1202 if (++i >= num_args)
1204 if (!lex_match (','))
1206 msg (SE, _("Missing comma following argument %d of %s."), i + 1, f->s);
1217 parse_function (union any_node ** n)
1219 struct function *fp;
1220 char fname[32], *cp;
1225 /* Check for a vector with this name. */
1226 v = find_vector (tokid);
1230 assert (token == '(');
1233 *n = xmalloc (sizeof (struct nonterm_node)
1234 + sizeof (union any_node *[2]));
1235 (*n)->nonterm.type = (v->v[0]->type == NUMERIC
1236 ? OP_VEC_ELEM_NUM : OP_VEC_ELEM_STR);
1237 (*n)->nonterm.n = 0;
1239 t = parse_or (&(*n)->nonterm.arg[0]);
1242 if (t != EX_NUMERIC)
1244 msg (SE, _("The index value after a vector name must be numeric."));
1249 if (!lex_match (')'))
1251 msg (SE, _("`)' expected after a vector index value."));
1254 ((*n)->nonterm.arg[1]) = (union any_node *) v->index;
1256 return v->v[0]->type == NUMERIC ? EX_NUMERIC : EX_STRING;
1259 ds_truncate (&tokstr, 31);
1260 strcpy (fname, ds_value (&tokstr));
1261 cp = strrchr (fname, '.');
1262 if (cp && isdigit ((unsigned char) cp[1]))
1264 min_args = atoi (&cp[1]);
1271 if (!lex_force_match ('('))
1278 fp = bsearch (&f, func_tab, func_count, sizeof *func_tab, cmp_func);
1283 msg (SE, _("There is no function named %s."), fname);
1286 if (min_args && fp->func != nary_num_func)
1288 msg (SE, _("Function %s may not be given a minimum number of "
1289 "arguments."), fname);
1292 t = fp->func (fp, min_args, n);
1295 if (!lex_match (')'))
1297 lex_error (_("expecting `)' after %s function"), fname);
1308 #if GLOBAL_DEBUGGING
1309 #define op(a,b,c,d) {a,b,c,d}
1311 #define op(a,b,c,d) {b,c,d}
1316 struct op_desc ops[OP_SENTINEL + 1] =
1318 op ("!?ERROR?!", 000, 0, 0),
1320 op ("plus", 001, varies, 1),
1321 op ("mul", 011, varies, 1),
1322 op ("pow", 010, -1, 0),
1323 op ("and", 010, -1, 0),
1324 op ("or", 010, -1, 0),
1325 op ("not", 000, 0, 0),
1326 op ("eq", 000, -1, 0),
1327 op ("ge", 000, -1, 0),
1328 op ("gt", 000, -1, 0),
1329 op ("le", 000, -1, 0),
1330 op ("lt", 000, -1, 0),
1331 op ("ne", 000, -1, 0),
1333 op ("string-eq", 000, -1, 0),
1334 op ("string-ge", 000, -1, 0),
1335 op ("string-gt", 000, -1, 0),
1336 op ("string-le", 000, -1, 0),
1337 op ("string-lt", 000, -1, 0),
1338 op ("string-ne", 000, -1, 0),
1340 op ("neg", 000, 0, 0),
1341 op ("abs", 000, 0, 0),
1342 op ("arcos", 000, 0, 0),
1343 op ("arsin", 000, 0, 0),
1344 op ("artan", 000, 0, 0),
1345 op ("cos", 000, 0, 0),
1346 op ("exp", 000, 0, 0),
1347 op ("lg10", 000, 0, 0),
1348 op ("ln", 000, 0, 0),
1349 op ("mod10", 000, 0, 0),
1350 op ("rnd", 000, 0, 0),
1351 op ("sin", 000, 0, 0),
1352 op ("sqrt", 000, 0, 0),
1353 op ("tan", 000, 0, 0),
1354 op ("trunc", 000, 0, 0),
1356 op ("any", 011, varies, 1),
1357 op ("any-string", 001, varies, 1),
1358 op ("cfvar", 013, varies, 2),
1359 op ("max", 013, varies, 2),
1360 op ("mean", 013, varies, 2),
1361 op ("min", 013, varies, 2),
1362 op ("nmiss", 011, varies, 1),
1363 op ("nvalid", 011, varies, 1),
1364 op ("range", 011, varies, 1),
1365 op ("range-string", 001, varies, 1),
1366 op ("sd", 013, varies, 2),
1367 op ("sum", 013, varies, 2),
1368 op ("variance", 013, varies, 2),
1370 op ("time_hms", 000, -2, 0),
1371 op ("ctime_days?!", 000, 0, 0),
1372 op ("ctime_hours?!", 000, 0, 0),
1373 op ("ctime_minutes?!", 000, 0, 0),
1374 op ("ctime_seconds?!", 000, 0, 0),
1375 op ("time_days?!", 000, 0, 0),
1377 op ("date_dmy", 000, -2, 0),
1378 op ("date_mdy", 000, -2, 0),
1379 op ("date_moyr", 000, -1, 0),
1380 op ("date_qyr", 000, -1, 0),
1381 op ("date_wkyr", 000, -1, 0),
1382 op ("date_yrday", 000, -1, 0),
1383 op ("yrmoda", 000, -2, 0),
1385 op ("xdate_date", 000, 0, 0),
1386 op ("xdate_hour", 000, 0, 0),
1387 op ("xdate_jday", 000, 0, 0),
1388 op ("xdate_mday", 000, 0, 0),
1389 op ("xdate_minute", 000, 0, 0),
1390 op ("xdate_month", 000, 0, 0),
1391 op ("xdate_quarter", 000, 0, 0),
1392 op ("xdate_second", 000, 0, 0),
1393 op ("xdate_tday", 000, 0, 0),
1394 op ("xdate_time", 000, 0, 0),
1395 op ("xdate_week", 000, 0, 0),
1396 op ("xdate_wkday", 000, 0, 0),
1397 op ("xdate_year", 000, 0, 0),
1399 op ("concat", 001, varies, 1),
1400 op ("index-2", 000, -1, 0),
1401 op ("index-3", 000, -2, 0),
1402 op ("rindex-2", 000, -1, 0),
1403 op ("rindex-3", 000, -2, 0),
1404 op ("length", 000, 0, 0),
1405 op ("lower", 000, 0, 0),
1406 op ("upcas", 000, 0, 0),
1407 op ("lpad-2", 010, -1, 0),
1408 op ("lpad-3", 010, -2, 0),
1409 op ("rpad-2", 010, -1, 0),
1410 op ("rpad-3", 010, -2, 0),
1411 op ("ltrim-1", 000, 0, 0),
1412 op ("ltrim-2", 000, -1, 0),
1413 op ("rtrim-1", 000, 0, 0),
1414 op ("rtrim-2", 000, -1, 0),
1415 op ("number-1", 010, 0, 0),
1416 op ("number-2", 014, 0, 3),
1417 op ("string", 004, 0, 3),
1418 op ("substr-2", 010, -1, 0),
1419 op ("substr-3", 010, -2, 0),
1421 op ("inv", 000, 0, 0),
1422 op ("square", 000, 0, 0),
1423 op ("num-to-Bool", 000, 0, 0),
1425 op ("mod", 010, -1, 0),
1426 op ("normal", 000, 0, 0),
1427 op ("uniform", 000, 0, 0),
1428 op ("sysmis", 010, 0, 0),
1429 op ("vec-elem-num", 002, 0, 1),
1430 op ("vec-elem-str", 002, 0, 1),
1432 op ("!?TERMINAL?!", 000, 0, 0),
1433 op ("num-con", 000, +1, 0),
1434 op ("str-con", 000, +1, 0),
1435 op ("num-var", 000, +1, 0),
1436 op ("str-var", 000, +1, 0),
1437 op ("num-lag", 000, +1, 1),
1438 op ("str-lag", 000, +1, 1),
1439 op ("num-sys", 000, +1, 1),
1440 op ("num-val", 000, +1, 1),
1441 op ("str-mis", 000, +1, 1),
1442 op ("$casenum", 000, +1, 0),
1443 op ("!?SENTINEL?!", 000, 0, 0),
1450 /* Utility functions. */
1453 expr_type_name (int type)
1461 return _("Boolean");
1464 return _("numeric");
1476 type_name (int type)
1481 return _("numeric");
1491 make_bool (union any_node **n)
1495 c = xmalloc (sizeof (struct nonterm_node));
1496 c->nonterm.type = OP_NUM_TO_BOOL;
1498 c->nonterm.arg[0] = *n;
1503 free_node (union any_node *n)
1505 if (n->type < OP_TERMINAL)
1509 for (i = 0; i < n->nonterm.n; i++)
1510 free_node (n->nonterm.arg[i]);
1516 allocate_nonterminal (int op, union any_node *n)
1520 c = xmalloc (sizeof c->nonterm);
1521 c->nonterm.type = op;
1523 c->nonterm.arg[0] = n;
1529 append_nonterminal_arg (union any_node *a, union any_node *b)
1531 a = xrealloc (a, sizeof *a + sizeof *a->nonterm.arg * a->nonterm.n);
1532 a->nonterm.arg[a->nonterm.n++] = b;
1536 static struct function func_tab[] =
1538 {"ABS", OP_ABS, unary_func, NULL},
1539 {"ACOS", OP_ARCOS, unary_func, NULL},
1540 {"ARCOS", OP_ARCOS, unary_func, NULL},
1541 {"ARSIN", OP_ARSIN, unary_func, NULL},
1542 {"ARTAN", OP_ARTAN, unary_func, NULL},
1543 {"ASIN", OP_ARSIN, unary_func, NULL},
1544 {"ATAN", OP_ARTAN, unary_func, NULL},
1545 {"COS", OP_COS, unary_func, NULL},
1546 {"EXP", OP_EXP, unary_func, NULL},
1547 {"LG10", OP_LG10, unary_func, NULL},
1548 {"LN", OP_LN, unary_func, NULL},
1549 {"MOD10", OP_MOD10, unary_func, NULL},
1550 {"NORMAL", OP_NORMAL, unary_func, NULL},
1551 {"RND", OP_RND, unary_func, NULL},
1552 {"SIN", OP_SIN, unary_func, NULL},
1553 {"SQRT", OP_SQRT, unary_func, NULL},
1554 {"TAN", OP_TAN, unary_func, NULL},
1555 {"TRUNC", OP_TRUNC, unary_func, NULL},
1556 {"UNIFORM", OP_UNIFORM, unary_func, NULL},
1558 {"TIME.DAYS", OP_TIME_DAYS, unary_func, NULL},
1559 {"TIME.HMS", OP_TIME_HMS, ternary_func, NULL},
1561 {"CTIME.DAYS", OP_CTIME_DAYS, unary_func, NULL},
1562 {"CTIME.HOURS", OP_CTIME_HOURS, unary_func, NULL},
1563 {"CTIME.MINUTES", OP_CTIME_MINUTES, unary_func, NULL},
1564 {"CTIME.SECONDS", OP_CTIME_SECONDS, unary_func, NULL},
1566 {"DATE.DMY", OP_DATE_DMY, ternary_func, NULL},
1567 {"DATE.MDY", OP_DATE_MDY, ternary_func, NULL},
1568 {"DATE.MOYR", OP_DATE_MOYR, binary_func, NULL},
1569 {"DATE.QYR", OP_DATE_QYR, binary_func, NULL},
1570 {"DATE.WKYR", OP_DATE_WKYR, binary_func, NULL},
1571 {"DATE.YRDAY", OP_DATE_YRDAY, binary_func, NULL},
1573 {"XDATE.DATE", OP_XDATE_DATE, unary_func, NULL},
1574 {"XDATE.HOUR", OP_XDATE_HOUR, unary_func, NULL},
1575 {"XDATE.JDAY", OP_XDATE_JDAY, unary_func, NULL},
1576 {"XDATE.MDAY", OP_XDATE_MDAY, unary_func, NULL},
1577 {"XDATE.MINUTE", OP_XDATE_MINUTE, unary_func, NULL},
1578 {"XDATE.MONTH", OP_XDATE_MONTH, unary_func, NULL},
1579 {"XDATE.QUARTER", OP_XDATE_QUARTER, unary_func, NULL},
1580 {"XDATE.SECOND", OP_XDATE_SECOND, unary_func, NULL},
1581 {"XDATE.TDAY", OP_XDATE_TDAY, unary_func, NULL},
1582 {"XDATE.TIME", OP_XDATE_TIME, unary_func, NULL},
1583 {"XDATE.WEEK", OP_XDATE_WEEK, unary_func, NULL},
1584 {"XDATE.WKDAY", OP_XDATE_WKDAY, unary_func, NULL},
1585 {"XDATE.YEAR", OP_XDATE_YEAR, unary_func, NULL},
1587 {"MISSING", OP_SYSMIS, MISSING_func, NULL},
1588 {"MOD", OP_MOD, binary_func, NULL},
1589 {"SYSMIS", OP_SYSMIS, SYSMIS_func, NULL},
1590 {"VALUE", OP_NUM_VAL, VALUE_func, NULL},
1591 {"LAG", OP_NUM_LAG, LAG_func, NULL},
1592 {"YRMODA", OP_YRMODA, ternary_func, NULL},
1594 {"ANY", OP_ANY, nary_num_func, NULL},
1595 {"CFVAR", OP_CFVAR, nary_num_func, NULL},
1596 {"MAX", OP_MAX, nary_num_func, NULL},
1597 {"MEAN", OP_MEAN, nary_num_func, NULL},
1598 {"MIN", OP_MIN, nary_num_func, NULL},
1599 {"NMISS", OP_NMISS, nary_num_func, NULL},
1600 {"NVALID", OP_NVALID, nary_num_func, NULL},
1601 {"RANGE", OP_RANGE, nary_num_func, NULL},
1602 {"SD", OP_SD, nary_num_func, NULL},
1603 {"SUM", OP_SUM, nary_num_func, NULL},
1604 {"VARIANCE", OP_VARIANCE, nary_num_func, NULL},
1606 {"CONCAT", OP_CONCAT, CONCAT_func, NULL},
1607 {"INDEX", OP_INDEX, generic_str_func, "nss/n"},
1608 {"RINDEX", OP_RINDEX, generic_str_func, "nss/n"},
1609 {"LENGTH", OP_LENGTH, generic_str_func, "ns"},
1610 {"LOWER", OP_LOWER, generic_str_func, "ss"},
1611 {"UPCAS", OP_UPPER, generic_str_func, "ss"},
1612 {"LPAD", OP_LPAD, generic_str_func, "ssn/s"},
1613 {"RPAD", OP_RPAD, generic_str_func, "ssn/s"},
1614 {"LTRIM", OP_LTRIM, generic_str_func, "ss/s"},
1615 {"RTRIM", OP_RTRIM, generic_str_func, "ss/s"},
1616 {"NUMBER", OP_NUMBER, generic_str_func, "ns/f"},
1617 {"STRING", OP_STRING, generic_str_func, "snf"},
1618 {"SUBSTR", OP_SUBSTR, generic_str_func, "ssn/n"},
1622 cmp_func (const void *a, const void *b)
1624 return strcmp (*(char **) a, *(char **) b);
1628 init_func_tab (void)
1638 func_count = sizeof func_tab / sizeof *func_tab;
1639 qsort (func_tab, func_count, sizeof *func_tab, cmp_func);
1646 print_type (union any_node * n)
1651 s = ops[n->type].name;
1653 if (ops[n->type].flags & OP_MIN_ARGS)
1654 printf ("%s.%d\n", s, (int) n->nonterm.arg[n->nonterm.n]);
1655 else if (ops[n->type].flags & OP_FMT_SPEC)
1659 f.type = (int) n->nonterm.arg[n->nonterm.n + 0];
1660 f.w = (int) n->nonterm.arg[n->nonterm.n + 1];
1661 f.d = (int) n->nonterm.arg[n->nonterm.n + 2];
1662 printf ("%s(%s)\n", s, fmt_to_string (&f));
1669 debug_print_tree (union any_node * n, int level)
1672 for (i = 0; i < level; i++)
1674 if (n->type < OP_TERMINAL)
1677 for (i = 0; i < n->nonterm.n; i++)
1678 debug_print_tree (n->nonterm.arg[i], level + 1);
1685 printf (_("!!TERMINAL!!"));
1688 if (n->num_con.value == SYSMIS)
1691 printf ("%f", n->num_con.value);
1694 printf ("\"%.*s\"", n->str_con.len, n->str_con.s);
1698 printf ("%s", n->var.v->name);
1702 printf ("LAG(%s,%d)", n->lag.v->name, n->lag.lag);
1705 printf ("SYSMIS(%s)", n->var.v->name);
1708 printf ("VALUE(%s)", n->var.v->name);
1711 printf (_("!!SENTINEL!!"));
1714 printf (_("!!ERROR%d!!"), n->type);
1720 #endif /* DEBUGGING */
1722 #if GLOBAL_DEBUGGING
1724 debug_print_postfix (struct expression * e)
1727 double *num = e->num;
1728 unsigned char *str = e->str;
1729 struct variable **v = e->var;
1732 debug_printf ((_("postfix:")));
1733 for (o = e->op; *o != OP_SENTINEL;)
1736 if (t < OP_TERMINAL)
1738 debug_printf ((" %s", ops[t].name));
1740 if (ops[t].flags & OP_VAR_ARGS)
1742 debug_printf (("(%d)", *o));
1745 if (ops[t].flags & OP_MIN_ARGS)
1747 debug_printf ((".%d", *o));
1750 if (ops[t].flags & OP_FMT_SPEC)
1753 f.type = (int) *o++;
1756 debug_printf (("(%s)", fmt_to_string (&f)));
1759 else if (t == OP_NUM_CON)
1762 debug_printf ((" SYSMIS"));
1764 debug_printf ((" %f", *num));
1767 else if (t == OP_STR_CON)
1769 debug_printf ((" \"%.*s\"", *str, &str[1]));
1772 else if (t == OP_NUM_VAR || t == OP_STR_VAR)
1774 debug_printf ((" %s", (*v)->name));
1777 else if (t == OP_NUM_SYS)
1779 debug_printf ((" SYSMIS(#%d)", *o));
1782 else if (t == OP_NUM_VAL)
1784 debug_printf ((" VALUE(#%d)", *o));
1787 else if (t == OP_NUM_LAG || t == OP_STR_LAG)
1789 debug_printf ((" LAG(%s,%d)", (*v)->name, *o));
1795 printf ("debug_print_postfix(): %d\n", t);
1799 debug_putc ('\n', stdout);
1801 #endif /* GLOBAL_DEBUGGING */