1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
31 #include "julcal/julcal.h"
37 static void evaluate_tree_no_missing (union any_node **);
38 static void evaluate_tree_with_missing (union any_node **, size_t count);
39 static void optimize_tree (union any_node **);
41 static void collapse_node (union any_node **node, size_t child_idx);
42 static void set_number (union any_node **node, double);
43 static void set_number_errno (union any_node **node, double);
44 static void set_string (union any_node **node, const char *, size_t);
47 optimize_expression (union any_node **node)
49 int nonconst = 0; /* Number of nonconstant children. */
50 int sysmis = 0; /* Number of system-missing children. */
51 struct nonterm_node *nonterm;
54 /* We can't optimize a terminal node. */
55 if (IS_TERMINAL ((*node)->type))
57 nonterm = &(*node)->nonterm;
59 /* Start by optimizing all the children. */
60 for (i = 0; i < nonterm->n; i++)
62 optimize_expression (&nonterm->arg[i]);
63 if (nonterm->arg[i]->type == OP_NUM_CON)
65 if (nonterm->arg[i]->num_con.value == SYSMIS)
68 else if (nonterm->arg[i]->type != OP_STR_CON)
72 if (sysmis && !(ops[nonterm->type].flags & OP_ABSORB_MISS))
74 /* Most operation produce SYSMIS given any SYSMIS
76 set_number (node, SYSMIS);
80 /* Evaluate constant expressions. */
82 evaluate_tree_no_missing (node);
84 evaluate_tree_with_missing (node, sysmis);
88 /* A few optimization possibilities are still left. */
94 eq_num_con (union any_node *node, double number)
96 return node->type == OP_NUM_CON && node->num_con.value == number;
100 optimize_tree (union any_node **node)
102 struct nonterm_node *n = &(*node)->nonterm;
104 /* x+0, x-0, 0+x => x. */
105 if ((n->type == OP_ADD || n->type == OP_SUB) && eq_num_con (n->arg[1], 0.))
106 collapse_node (node, 1);
107 else if (n->type == OP_ADD && eq_num_con (n->arg[0], 0.))
108 collapse_node (node, 0);
110 /* x*1, x/1, 1*x => x. */
111 else if ((n->type == OP_MUL || n->type == OP_DIV)
112 && eq_num_con (n->arg[1], 1.))
113 collapse_node (node, 0);
114 else if (n->type == OP_MUL && eq_num_con (n->arg[0], 1.))
115 collapse_node (node, 1);
117 /* 0*x, 0/x, x*0, MOD(0,x) => x. */
118 else if (((n->type == OP_MUL || n->type == OP_DIV || n->type == OP_MOD)
119 && eq_num_con (n->arg[0], 0.))
120 || (n->type == OP_MUL && eq_num_con (n->arg[1], 0.)))
121 set_number (node, 0.);
124 else if (n->type == OP_POW && eq_num_con (n->arg[1], 1))
125 collapse_node (node, 0);
127 /* x**2 => SQUARE(x). */
128 else if (n->type == OP_POW && eq_num_con (n->arg[2], 2))
135 /* Finds the first NEEDLE of length NEEDLE_LEN in a HAYSTACK of length
136 HAYSTACK_LEN. Returns a 1-based index, 0 on failure. */
138 str_search (const char *haystack, int haystack_len,
139 const char *needle, int needle_len)
141 char *p = memmem (haystack, haystack_len, needle, needle_len);
142 return p ? p - haystack + 1 : 0;
145 /* Finds the last NEEDLE of length NEEDLE_LEN in a HAYSTACK of length
146 HAYSTACK_LEN. Returns a 1-based index, 0 on failure. */
148 str_rsearch (const char *haystack, int haystack_len,
149 const char *needle, int needle_len)
151 char *p = mm_find_reverse (haystack, haystack_len, needle, needle_len);
152 return p ? p - haystack + 1 : 0;
156 evaluate_tree_no_missing (union any_node **node)
158 struct nonterm_node *n = &(*node)->nonterm;
166 for (i = 0; i < n->n && i < 3; i++)
168 union any_node *arg = n->arg[i];
170 if (arg->type == OP_NUM_CON)
171 num[i] = arg->num_con.value;
172 else if (arg->type == OP_STR_CON)
174 str[i] = arg->str_con.s;
175 str_len[i] = arg->str_con.len;
182 set_number (node, num[0] + num[1]);
186 set_number (node, num[0] - num[1]);
190 set_number (node, num[0] * num[1]);
195 set_number (node, num[0] / num[1]);
199 if (num[0] == 0. && num[1] == 0.)
200 set_number (node, SYSMIS);
202 set_number_errno (node, pow (num[0], num[1]));
206 set_number (node, num[0] && num[1]);
210 set_number (node, num[0] || num[1]);
214 set_number (node, !num[0]);
218 set_number (node, num[0] == num[1]);
221 set_number (node, num[0] >= num[1]);
224 set_number (node, num[0] > num[1]);
227 set_number (node, num[0] <= num[1]);
230 set_number (node, num[0] < num[1]);
233 set_number (node, num[0] != num[1]);
236 /* String operators. */
238 set_number (node, st_compare_pad (str[0], str_len[0],
239 str[1], str_len[1]) == 0);
242 set_number (node, st_compare_pad (str[0], str_len[0],
243 str[1], str_len[1]) >= 0);
246 set_number (node, st_compare_pad (str[0], str_len[0],
247 str[1], str_len[1]) > 0);
250 set_number (node, st_compare_pad (str[0], str_len[0],
251 str[1], str_len[1]) <= 0);
254 set_number (node, st_compare_pad (str[0], str_len[0],
255 str[1], str_len[1]) < 0);
258 set_number (node, st_compare_pad (str[0], str_len[0],
259 str[1], str_len[1]) != 0);
262 /* Unary functions. */
264 set_number (node, -num[0]);
267 set_number (node, fabs (num[0]));
270 set_number_errno (node, acos (num[0]));
273 set_number_errno (node, asin (num[0]));
276 set_number_errno (node, atan (num[0]));
279 set_number_errno (node, cos (num[0]));
282 set_number_errno (node, exp (num[0]));
285 set_number_errno (node, log10 (num[0]));
288 set_number_errno (node, log (num[0]));
291 set_number_errno (node, fmod (num[0], 10));
295 set_number_errno (node, floor (num[0] + 0.5));
297 set_number_errno (node, -floor (-num[0] + 0.5));
300 set_number_errno (node, sin (num[0]));
303 set_number_errno (node, sqrt (num[0]));
306 set_number_errno (node, tan (num[0]));
310 set_number_errno (node, floor (num[0]));
312 set_number_errno (node, -floor (-num[0]));
315 /* N-ary numeric functions. */
319 for (i = 1; i < n->n; i++)
320 if (num[0] == n->arg[i]->num_con.value)
325 set_number (node, result);
331 for (i = 1; i < n->n; i++)
332 if (!st_compare_pad (n->arg[0]->str_con.s, n->arg[0]->str_con.len,
333 n->arg[i]->str_con.s, n->arg[i]->str_con.len))
338 set_number (node, result);
358 for (i = 1; i < n->n; i += 2)
360 double min = n->arg[i]->num_con.value;
361 double max = n->arg[i + 1]->num_con.value;
362 if (num[0] >= min && num[0] <= max)
368 set_number (node, result);
372 case OP_RANGE_STRING:
376 for (i = 1; i < n->n; i += 2)
378 const char *min = n->arg[i]->str_con.s;
379 size_t min_len = n->arg[i]->str_con.len;
380 const char *max = n->arg[i + 1]->str_con.s;
381 size_t max_len = n->arg[i + 1]->str_con.len;
383 if (st_compare_pad (str[0], str_len[0], min, min_len) >= 0
384 && st_compare_pad (str[0], str_len[0], max, max_len) <= 0)
390 set_number (node, result);
394 /* Time functions. */
398 min = min (num[0], min (num[1], num[2]));
399 max = max (num[0], max (num[1], num[2]));
400 if (min < 0. && max > 0.)
402 set_number (node, 60. * (60. * num[0] + num[1]) + num[2]);
406 set_number (node, num[0] / (60. * 60. * 24.));
409 set_number (node, num[0] / (60. * 60.));
411 case OP_CTIME_MINUTES:
412 set_number (node, num[0] / 60.);
415 set_number (node, num[0] * (60. * 60. * 24.));
417 case OP_CTIME_SECONDS:
418 set_number (node, num[0]);
421 /* Date construction functions. */
423 set_number (node, 60. * 60. * 24. * yrmoda (num[2], num[1], num[0]));
426 set_number (node, 60. * 60. * 24. * yrmoda (num[2], num[0], num[1]));
429 set_number (node, 60. * 60. * 24. * yrmoda (num[1], num[0], 1));
433 60. * 60. * 24. * yrmoda (num[1], 3 * (int) num[0] - 2, 1));
437 double t = yrmoda (num[1], 1, 1);
438 if (num[0] < 0. || num[0] > 53.)
441 t = 60. * 60. * 24. * (t + 7. * (num[0] - 1));
442 set_number (node, t);
447 double t = yrmoda (num[0], 1, 1);
449 t = 60. * 60. * 24. * (t + num[1] - 1);
450 set_number (node, t);
454 set_number (node, yrmoda (num[0], num[1], num[2]));
457 /* Date extraction functions. */
459 set_number_errno (node,
460 floor (num[0] / 60. / 60. / 24.) * 60. * 60. * 24.);
463 set_number_errno (node, fmod (floor (num[0] / 60. / 60.), 24.));
466 set_number (node, julian_to_jday (num[0] / 86400.));
471 julian_to_calendar (num[0] / 86400., NULL, NULL, &day);
472 set_number (node, day);
475 case OP_XDATE_MINUTE:
476 set_number_errno (node, fmod (floor (num[0] / 60.), 60.));
481 julian_to_calendar (num[0] / 86400., NULL, &month, NULL);
482 set_number (node, month);
485 case OP_XDATE_QUARTER:
488 julian_to_calendar (num[0] / 86400., NULL, &month, NULL);
489 set_number (node, (month - 1) / 3 + 1);
492 case OP_XDATE_SECOND:
493 set_number_errno (node, fmod (num[0], 60.));
496 set_number_errno (node, floor (num[0] / 60. / 60. / 24.));
499 set_number_errno (node, num[0] - (floor (num[0] / 60. / 60. / 24.)
503 set_number (node, (julian_to_jday (num[0]) - 1) / 7 + 1);
506 set_number (node, julian_to_wday (num[0]));
511 julian_to_calendar (num[0] / 86400., &year, NULL, NULL);
512 set_number (node, year);
516 /* String functions. */
520 int length = str_len[0];
521 memcpy (string, str[0], length);
522 for (i = 1; i < n->n; i++)
524 int add = n->arg[i]->str_con.len;
525 if (add + length > 255)
527 memcpy (&string[length], n->arg[i]->str_con.s, add);
530 set_string (node, string, length);
538 int result, chunk_width, chunk_cnt;
540 if (n->type == OP_INDEX_2 || n->type == OP_RINDEX_2)
541 chunk_width = str_len[1];
543 chunk_width = num[2];
544 if (chunk_width <= 0 || chunk_width > str_len[1]
545 || str_len[1] % chunk_width != 0)
547 chunk_cnt = str_len[1] / chunk_width;
550 for (i = 0; i < chunk_cnt; i++)
552 const char *chunk = str[1] + chunk_width * i;
554 if (n->type == OP_INDEX_2 || n->type == OP_INDEX_3)
556 ofs = str_search (str[0], str_len[0], chunk, chunk_width);
557 if (ofs < result || result == 0)
562 ofs = str_rsearch (str[0], str_len[0], chunk, chunk_width);
567 set_number (node, result);
571 set_number (node, str_len[0]);
576 for (cp = str[0]; cp < str[0] + str_len[0]; cp++)
577 *cp = tolower ((unsigned char) *cp);
583 for (cp = str[0]; cp < str[0] + str_len[0]; cp++)
584 *cp = toupper ((unsigned char) *cp);
596 if (len < 1 || len > 255)
602 pad_char = str[2][0];
604 if (str_len[0] >= len)
606 pad_len = len - str_len[0];
607 if (n->type == OP_LPAD)
609 memset (string, pad_char, pad_len);
610 memcpy (string + pad_len, str[0], str_len[0]);
614 memcpy (string, str[0], str_len[0]);
615 memset (string + str_len[0], pad_char, pad_len);
618 set_string (node, string, len);
625 const char *cp = str[0];
626 int len = str_len[0];
631 pad_char = str[1][0];
633 if (n->type == OP_LTRIM)
634 while (len > 0 && *cp == pad_char)
637 while (len > 0 && str[0][len - 1] == pad_char)
639 set_string (node, cp, len);
645 int pos = (int) num[1];
646 if (pos > str_len[0] || pos <= 0 || num[1] == SYSMIS
647 || (n->type == OP_SUBSTR_3 && num[2] == SYSMIS))
648 set_string (node, NULL, 0);
652 if (n->type == OP_SUBSTR_3)
655 if (len + pos - 1 > str_len[0])
656 len = str_len[0] - pos + 1;
659 len = str_len[0] - pos + 1;
660 set_string (node, &str[0][pos - 1], len);
667 if (num[0] == 0.0 && num[1] == SYSMIS)
668 set_number (node, 0.0);
670 set_number (node, fmod (num[0], num[1]));
675 else if (num[0] == 1.0)
677 else if (num[0] != SYSMIS)
679 msg (SE, _("When optimizing a constant expression, an integer "
680 "that was being used as an Boolean value was found "
681 "to have a constant value other than 0, 1, or SYSMIS."));
684 set_number (node, num[0]);
690 evaluate_tree_with_missing (union any_node **node UNUSED, size_t count UNUSED)
696 collapse_node (union any_node **node, size_t child_idx)
698 struct nonterm_node *nonterm = &(*node)->nonterm;
699 union any_node *child;
701 child = nonterm->arg[child_idx];
702 nonterm->arg[child_idx] = NULL;
709 set_number (union any_node **node, double value)
711 struct num_con_node *num;
715 *node = xmalloc (sizeof *num);
716 num = &(*node)->num_con;
717 num->type = OP_NUM_CON;
718 num->value = finite (value) ? value : SYSMIS;
722 set_number_errno (union any_node **node, double value)
724 if (errno == EDOM || errno == ERANGE)
726 set_number (node, value);
730 set_string (union any_node **node, const char *string, size_t length)
732 struct str_con_node *str;
734 /* The ordering here is important since the source string may be
735 part of a subnode of n. */
736 str = xmalloc (sizeof *str + length - 1);
737 str->type = OP_STR_CON;
739 memcpy (str->s, string, length);
741 *node = (union any_node *) str;
744 /* Returns the number of days since 10 Oct 1582 for the date
745 YEAR/MONTH/DAY, where YEAR is in range 0..199 or 1582..19999, MONTH
746 is in 1..12, and DAY is in 1..31. */
748 yrmoda (double year, double month, double day)
750 if (year == SYSMIS || month == SYSMIS || day == SYSMIS)
753 /* The addition of EPSILON avoids converting, for example,
754 1991.9999997=>1991. */
755 year = floor (year + EPSILON);
756 month = floor (month + EPSILON);
757 day = floor (day + EPSILON);
759 if (year >= 0. && year <= 29.)
761 else if (year >= 30. && year <= 99.)
763 if ((year < 1582. || year > 19999.)
764 || (year == 1582. && (month < 10. || (month == 10. && day < 15.)))
765 || (month < 0 || month > 13)
766 || (day < 0 || day > 31))
768 return calendar_to_julian (year, month, day);
771 /* Expression dumper. */
773 struct expr_dump_state
775 struct expression *expr; /* Output expression. */
776 int op_cnt, op_cap; /* Number of ops, allocated space. */
777 int dbl_cnt, dbl_cap; /* Number of doubles, allocated space. */
778 int str_cnt, str_cap; /* Number of strings, allocated space. */
779 int var_cnt, var_cap; /* Number of variables, allocated space. */
782 static void dump_node (struct expr_dump_state *, union any_node * n);
783 static void emit (struct expr_dump_state *, int);
784 static void emit_num_con (struct expr_dump_state *, double);
785 static void emit_str_con (struct expr_dump_state *, char *, int);
786 static void emit_var (struct expr_dump_state *, struct variable *);
789 dump_expression (union any_node * n, struct expression * expr)
791 struct expr_dump_state eds;
801 eds.op_cnt = eds.op_cap = 0;
802 eds.dbl_cnt = eds.dbl_cap = 0;
803 eds.str_cnt = eds.str_cap = 0;
804 eds.var_cnt = eds.var_cap = 0;
806 emit (&eds, OP_SENTINEL);
808 /* Now compute the stack height needed to evaluate the expression. */
809 for (o = expr->op; *o != OP_SENTINEL; o++)
811 if (ops[*o].flags & OP_VAR_ARGS)
814 height += ops[*o].height;
816 if (height > max_height)
820 /* We waste space for one `value' since pointers are not
821 guaranteed to be able to point to a spot before a block. */
824 expr->stack = xmalloc (max_height * sizeof *expr->stack);
826 expr->pool = pool_create ();
830 dump_node (struct expr_dump_state *eds, union any_node * n)
832 if (IS_NONTERMINAL (n->type))
835 for (i = 0; i < n->nonterm.n; i++)
836 dump_node (eds, n->nonterm.arg[i]);
838 if (ops[n->type].flags & OP_VAR_ARGS)
839 emit (eds, n->nonterm.n);
840 if (ops[n->type].flags & OP_MIN_ARGS)
841 emit (eds, (int) n->nonterm.arg[n->nonterm.n]);
842 if (ops[n->type].flags & OP_FMT_SPEC)
844 emit (eds, (int) n->nonterm.arg[n->nonterm.n]);
845 emit (eds, (int) n->nonterm.arg[n->nonterm.n + 1]);
846 emit (eds, (int) n->nonterm.arg[n->nonterm.n + 2]);
852 if (n->type == OP_NUM_CON)
853 emit_num_con (eds, n->num_con.value);
854 else if (n->type == OP_STR_CON)
855 emit_str_con (eds, n->str_con.s, n->str_con.len);
856 else if (n->type == OP_NUM_VAR || n->type == OP_STR_VAR)
857 emit_var (eds, n->var.v);
858 else if (n->type == OP_NUM_LAG || n->type == OP_STR_LAG)
860 emit_var (eds, n->lag.v);
861 emit (eds, n->lag.lag);
863 else if (n->type == OP_NUM_SYS || n->type == OP_NUM_VAL)
864 emit (eds, n->var.v->fv);
866 assert (n->type == OP_CASENUM);
871 emit (struct expr_dump_state *eds, int op)
873 if (eds->op_cnt >= eds->op_cap)
876 eds->expr->op = xrealloc (eds->expr->op,
877 eds->op_cap * sizeof *eds->expr->op);
879 eds->expr->op[eds->op_cnt++] = op;
883 emit_num_con (struct expr_dump_state *eds, double dbl)
885 if (eds->dbl_cnt >= eds->dbl_cap)
888 eds->expr->num = xrealloc (eds->expr->num,
889 eds->dbl_cap * sizeof *eds->expr->num);
891 eds->expr->num[eds->dbl_cnt++] = dbl;
895 emit_str_con (struct expr_dump_state *eds, char *str, int len)
897 if (eds->str_cnt + len + 1 > eds->str_cap)
900 eds->expr->str = xrealloc (eds->expr->str, eds->str_cap);
902 eds->expr->str[eds->str_cnt++] = len;
903 memcpy (&eds->expr->str[eds->str_cnt], str, len);
908 emit_var (struct expr_dump_state *eds, struct variable * v)
910 if (eds->var_cnt >= eds->var_cap)
913 eds->expr->var = xrealloc (eds->expr->var,
914 eds->var_cap * sizeof *eds->expr->var);
916 eds->expr->var[eds->var_cnt++] = v;