1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
31 #include "julcal/julcal.h"
38 static void evaluate_tree_no_missing (union any_node **);
39 static void evaluate_tree_with_missing (union any_node **, size_t count);
40 static void optimize_tree (union any_node **);
42 static void collapse_node (union any_node **node, size_t child_idx);
43 static void set_number (union any_node **node, double);
44 static void set_number_errno (union any_node **node, double);
45 static void set_string (union any_node **node, const char *, size_t);
48 optimize_expression (union any_node **node)
50 int nonconst = 0; /* Number of nonconstant children. */
51 int sysmis = 0; /* Number of system-missing children. */
52 struct nonterm_node *nonterm;
55 /* We can't optimize a terminal node. */
56 if (IS_TERMINAL ((*node)->type))
58 nonterm = &(*node)->nonterm;
60 /* Start by optimizing all the children. */
61 for (i = 0; i < nonterm->n; i++)
63 optimize_expression (&nonterm->arg[i]);
64 if (nonterm->arg[i]->type == OP_NUM_CON)
66 if (nonterm->arg[i]->num_con.value == SYSMIS)
69 else if (nonterm->arg[i]->type != OP_STR_CON)
73 if (sysmis && !(ops[nonterm->type].flags & OP_ABSORB_MISS))
75 /* Most operation produce SYSMIS given any SYSMIS
77 set_number (node, SYSMIS);
81 /* Evaluate constant expressions. */
83 evaluate_tree_no_missing (node);
85 evaluate_tree_with_missing (node, sysmis);
89 /* A few optimization possibilities are still left. */
95 eq_num_con (union any_node *node, double number)
97 return node->type == OP_NUM_CON && node->num_con.value == number;
101 optimize_tree (union any_node **node)
103 struct nonterm_node *n = &(*node)->nonterm;
105 /* x+0, x-0, 0+x => x. */
106 if ((n->type == OP_ADD || n->type == OP_SUB) && eq_num_con (n->arg[1], 0.))
107 collapse_node (node, 1);
108 else if (n->type == OP_ADD && eq_num_con (n->arg[0], 0.))
109 collapse_node (node, 0);
111 /* x*1, x/1, 1*x => x. */
112 else if ((n->type == OP_MUL || n->type == OP_DIV)
113 && eq_num_con (n->arg[1], 1.))
114 collapse_node (node, 0);
115 else if (n->type == OP_MUL && eq_num_con (n->arg[0], 1.))
116 collapse_node (node, 1);
118 /* 0*x, 0/x, x*0, MOD(0,x) => x. */
119 else if (((n->type == OP_MUL || n->type == OP_DIV || n->type == OP_MOD)
120 && eq_num_con (n->arg[0], 0.))
121 || (n->type == OP_MUL && eq_num_con (n->arg[1], 0.)))
122 set_number (node, 0.);
125 else if (n->type == OP_POW && eq_num_con (n->arg[1], 1))
126 collapse_node (node, 0);
128 /* x**2 => SQUARE(x). */
129 else if (n->type == OP_POW && eq_num_con (n->arg[2], 2))
136 /* Finds the first NEEDLE of length NEEDLE_LEN in a HAYSTACK of length
137 HAYSTACK_LEN. Returns a 1-based index, 0 on failure. */
139 str_search (const char *haystack, int haystack_len,
140 const char *needle, int needle_len)
142 char *p = memmem (haystack, haystack_len, needle, needle_len);
143 return p ? p - haystack + 1 : 0;
146 /* Finds the last NEEDLE of length NEEDLE_LEN in a HAYSTACK of length
147 HAYSTACK_LEN. Returns a 1-based index, 0 on failure. */
149 str_rsearch (const char *haystack, int haystack_len,
150 const char *needle, int needle_len)
152 char *p = mm_find_reverse (haystack, haystack_len, needle, needle_len);
153 return p ? p - haystack + 1 : 0;
157 evaluate_tree_no_missing (union any_node **node)
159 struct nonterm_node *n = &(*node)->nonterm;
167 for (i = 0; i < n->n && i < 3; i++)
169 union any_node *arg = n->arg[i];
171 if (arg->type == OP_NUM_CON)
172 num[i] = arg->num_con.value;
173 else if (arg->type == OP_STR_CON)
175 str[i] = arg->str_con.s;
176 str_len[i] = arg->str_con.len;
183 set_number (node, num[0] + num[1]);
187 set_number (node, num[0] - num[1]);
191 set_number (node, num[0] * num[1]);
196 set_number (node, num[0] / num[1]);
200 if (num[0] == 0. && num[1] == 0.)
201 set_number (node, SYSMIS);
203 set_number_errno (node, pow (num[0], num[1]));
207 set_number (node, num[0] && num[1]);
211 set_number (node, num[0] || num[1]);
215 set_number (node, !num[0]);
219 set_number (node, num[0] == num[1]);
222 set_number (node, num[0] >= num[1]);
225 set_number (node, num[0] > num[1]);
228 set_number (node, num[0] <= num[1]);
231 set_number (node, num[0] < num[1]);
234 set_number (node, num[0] != num[1]);
237 /* String operators. */
239 set_number (node, st_compare_pad (str[0], str_len[0],
240 str[1], str_len[1]) == 0);
243 set_number (node, st_compare_pad (str[0], str_len[0],
244 str[1], str_len[1]) >= 0);
247 set_number (node, st_compare_pad (str[0], str_len[0],
248 str[1], str_len[1]) > 0);
251 set_number (node, st_compare_pad (str[0], str_len[0],
252 str[1], str_len[1]) <= 0);
255 set_number (node, st_compare_pad (str[0], str_len[0],
256 str[1], str_len[1]) < 0);
259 set_number (node, st_compare_pad (str[0], str_len[0],
260 str[1], str_len[1]) != 0);
263 /* Unary functions. */
265 set_number (node, -num[0]);
268 set_number (node, fabs (num[0]));
271 set_number_errno (node, acos (num[0]));
274 set_number_errno (node, asin (num[0]));
277 set_number_errno (node, atan (num[0]));
280 set_number_errno (node, cos (num[0]));
283 set_number_errno (node, exp (num[0]));
286 set_number_errno (node, log10 (num[0]));
289 set_number_errno (node, log (num[0]));
292 set_number_errno (node, fmod (num[0], 10));
296 set_number_errno (node, floor (num[0] + 0.5));
298 set_number_errno (node, -floor (-num[0] + 0.5));
301 set_number_errno (node, sin (num[0]));
304 set_number_errno (node, sqrt (num[0]));
307 set_number_errno (node, tan (num[0]));
311 set_number_errno (node, floor (num[0]));
313 set_number_errno (node, -floor (-num[0]));
316 /* N-ary numeric functions. */
320 for (i = 1; i < n->n; i++)
321 if (num[0] == n->arg[i]->num_con.value)
326 set_number (node, result);
332 for (i = 1; i < n->n; i++)
333 if (!st_compare_pad (n->arg[0]->str_con.s, n->arg[0]->str_con.len,
334 n->arg[i]->str_con.s, n->arg[i]->str_con.len))
339 set_number (node, result);
359 for (i = 1; i < n->n; i += 2)
361 double min = n->arg[i]->num_con.value;
362 double max = n->arg[i + 1]->num_con.value;
363 if (num[0] >= min && num[0] <= max)
369 set_number (node, result);
373 case OP_RANGE_STRING:
377 for (i = 1; i < n->n; i += 2)
379 const char *min = n->arg[i]->str_con.s;
380 size_t min_len = n->arg[i]->str_con.len;
381 const char *max = n->arg[i + 1]->str_con.s;
382 size_t max_len = n->arg[i + 1]->str_con.len;
384 if (st_compare_pad (str[0], str_len[0], min, min_len) >= 0
385 && st_compare_pad (str[0], str_len[0], max, max_len) <= 0)
391 set_number (node, result);
395 /* Time functions. */
399 min = min (num[0], min (num[1], num[2]));
400 max = max (num[0], max (num[1], num[2]));
401 if (min < 0. && max > 0.)
403 set_number (node, 60. * (60. * num[0] + num[1]) + num[2]);
407 set_number (node, num[0] / (60. * 60. * 24.));
410 set_number (node, num[0] / (60. * 60.));
412 case OP_CTIME_MINUTES:
413 set_number (node, num[0] / 60.);
416 set_number (node, num[0] * (60. * 60. * 24.));
418 case OP_CTIME_SECONDS:
419 set_number (node, num[0]);
422 /* Date construction functions. */
424 set_number (node, 60. * 60. * 24. * yrmoda (num[2], num[1], num[0]));
427 set_number (node, 60. * 60. * 24. * yrmoda (num[2], num[0], num[1]));
430 set_number (node, 60. * 60. * 24. * yrmoda (num[1], num[0], 1));
434 60. * 60. * 24. * yrmoda (num[1], 3 * (int) num[0] - 2, 1));
438 double t = yrmoda (num[1], 1, 1);
439 if (num[0] < 0. || num[0] > 53.)
442 t = 60. * 60. * 24. * (t + 7. * (num[0] - 1));
443 set_number (node, t);
448 double t = yrmoda (num[0], 1, 1);
450 t = 60. * 60. * 24. * (t + num[1] - 1);
451 set_number (node, t);
455 set_number (node, yrmoda (num[0], num[1], num[2]));
458 /* Date extraction functions. */
460 set_number_errno (node,
461 floor (num[0] / 60. / 60. / 24.) * 60. * 60. * 24.);
464 set_number_errno (node, fmod (floor (num[0] / 60. / 60.), 24.));
467 set_number (node, julian_to_jday (num[0] / 86400.));
472 julian_to_calendar (num[0] / 86400., NULL, NULL, &day);
473 set_number (node, day);
476 case OP_XDATE_MINUTE:
477 set_number_errno (node, fmod (floor (num[0] / 60.), 60.));
482 julian_to_calendar (num[0] / 86400., NULL, &month, NULL);
483 set_number (node, month);
486 case OP_XDATE_QUARTER:
489 julian_to_calendar (num[0] / 86400., NULL, &month, NULL);
490 set_number (node, (month - 1) / 3 + 1);
493 case OP_XDATE_SECOND:
494 set_number_errno (node, fmod (num[0], 60.));
497 set_number_errno (node, floor (num[0] / 60. / 60. / 24.));
500 set_number_errno (node, num[0] - (floor (num[0] / 60. / 60. / 24.)
504 set_number (node, (julian_to_jday (num[0]) - 1) / 7 + 1);
507 set_number (node, julian_to_wday (num[0]));
512 julian_to_calendar (num[0] / 86400., &year, NULL, NULL);
513 set_number (node, year);
517 /* String functions. */
521 int length = str_len[0];
522 memcpy (string, str[0], length);
523 for (i = 1; i < n->n; i++)
525 int add = n->arg[i]->str_con.len;
526 if (add + length > 255)
528 memcpy (&string[length], n->arg[i]->str_con.s, add);
531 set_string (node, string, length);
539 int result, chunk_width, chunk_cnt;
541 if (n->type == OP_INDEX_2 || n->type == OP_RINDEX_2)
542 chunk_width = str_len[1];
544 chunk_width = num[2];
545 if (chunk_width <= 0 || chunk_width > str_len[1]
546 || str_len[1] % chunk_width != 0)
548 chunk_cnt = str_len[1] / chunk_width;
551 for (i = 0; i < chunk_cnt; i++)
553 const char *chunk = str[1] + chunk_width * i;
555 if (n->type == OP_INDEX_2 || n->type == OP_INDEX_3)
557 ofs = str_search (str[0], str_len[0], chunk, chunk_width);
558 if (ofs < result || result == 0)
563 ofs = str_rsearch (str[0], str_len[0], chunk, chunk_width);
568 set_number (node, result);
572 set_number (node, str_len[0]);
577 for (cp = str[0]; cp < str[0] + str_len[0]; cp++)
578 *cp = tolower ((unsigned char) *cp);
584 for (cp = str[0]; cp < str[0] + str_len[0]; cp++)
585 *cp = toupper ((unsigned char) *cp);
597 if (len < 1 || len > 255)
603 pad_char = str[2][0];
605 if (str_len[0] >= len)
607 pad_len = len - str_len[0];
608 if (n->type == OP_LPAD)
610 memset (string, pad_char, pad_len);
611 memcpy (string + pad_len, str[0], str_len[0]);
615 memcpy (string, str[0], str_len[0]);
616 memset (string + str_len[0], pad_char, pad_len);
619 set_string (node, string, len);
626 const char *cp = str[0];
627 int len = str_len[0];
632 pad_char = str[1][0];
634 if (n->type == OP_LTRIM)
635 while (len > 0 && *cp == pad_char)
638 while (len > 0 && str[0][len - 1] == pad_char)
640 set_string (node, cp, len);
646 int pos = (int) num[1];
647 if (pos > str_len[0] || pos <= 0 || num[1] == SYSMIS
648 || (n->type == OP_SUBSTR_3 && num[2] == SYSMIS))
649 set_string (node, NULL, 0);
653 if (n->type == OP_SUBSTR_3)
656 if (len + pos - 1 > str_len[0])
657 len = str_len[0] - pos + 1;
660 len = str_len[0] - pos + 1;
661 set_string (node, &str[0][pos - 1], len);
668 if (num[0] == 0.0 && num[1] == SYSMIS)
669 set_number (node, 0.0);
671 set_number (node, fmod (num[0], num[1]));
676 else if (num[0] == 1.0)
678 else if (num[0] != SYSMIS)
680 msg (SE, _("When optimizing a constant expression, an integer "
681 "that was being used as an Boolean value was found "
682 "to have a constant value other than 0, 1, or SYSMIS."));
685 set_number (node, num[0]);
691 evaluate_tree_with_missing (union any_node **node UNUSED, size_t count UNUSED)
697 collapse_node (union any_node **node, size_t child_idx)
699 struct nonterm_node *nonterm = &(*node)->nonterm;
700 union any_node *child;
702 child = nonterm->arg[child_idx];
703 nonterm->arg[child_idx] = NULL;
710 set_number (union any_node **node, double value)
712 struct num_con_node *num;
716 *node = xmalloc (sizeof *num);
717 num = &(*node)->num_con;
718 num->type = OP_NUM_CON;
719 num->value = finite (value) ? value : SYSMIS;
723 set_number_errno (union any_node **node, double value)
725 if (errno == EDOM || errno == ERANGE)
727 set_number (node, value);
731 set_string (union any_node **node, const char *string, size_t length)
733 struct str_con_node *str;
735 /* The ordering here is important since the source string may be
736 part of a subnode of n. */
737 str = xmalloc (sizeof *str + length - 1);
738 str->type = OP_STR_CON;
740 memcpy (str->s, string, length);
742 *node = (union any_node *) str;
745 /* Returns the number of days since 10 Oct 1582 for the date
746 YEAR/MONTH/DAY, where YEAR is in range 0..199 or 1582..19999, MONTH
747 is in 1..12, and DAY is in 1..31. */
749 yrmoda (double year, double month, double day)
751 if (year == SYSMIS || month == SYSMIS || day == SYSMIS)
754 /* The addition of EPSILON avoids converting, for example,
755 1991.9999997=>1991. */
756 year = floor (year + EPSILON);
757 month = floor (month + EPSILON);
758 day = floor (day + EPSILON);
760 if (year >= 0. && year <= 29.)
762 else if (year >= 30. && year <= 99.)
764 if ((year < 1582. || year > 19999.)
765 || (year == 1582. && (month < 10. || (month == 10. && day < 15.)))
766 || (month < 0 || month > 13)
767 || (day < 0 || day > 31))
769 return calendar_to_julian (year, month, day);
772 /* Expression dumper. */
774 struct expr_dump_state
776 struct expression *expr; /* Output expression. */
777 int op_cnt, op_cap; /* Number of ops, allocated space. */
778 int dbl_cnt, dbl_cap; /* Number of doubles, allocated space. */
779 int str_cnt, str_cap; /* Number of strings, allocated space. */
780 int var_cnt, var_cap; /* Number of variables, allocated space. */
783 static void dump_node (struct expr_dump_state *, union any_node * n);
784 static void emit (struct expr_dump_state *, int);
785 static void emit_num_con (struct expr_dump_state *, double);
786 static void emit_str_con (struct expr_dump_state *, char *, int);
787 static void emit_var (struct expr_dump_state *, struct variable *);
790 dump_expression (union any_node * n, struct expression * expr)
792 struct expr_dump_state eds;
802 eds.op_cnt = eds.op_cap = 0;
803 eds.dbl_cnt = eds.dbl_cap = 0;
804 eds.str_cnt = eds.str_cap = 0;
805 eds.var_cnt = eds.var_cap = 0;
807 emit (&eds, OP_SENTINEL);
809 /* Now compute the stack height needed to evaluate the expression. */
810 for (o = expr->op; *o != OP_SENTINEL; o++)
812 if (ops[*o].flags & OP_VAR_ARGS)
815 height += ops[*o].height;
817 if (height > max_height)
821 /* We waste space for one `value' since pointers are not
822 guaranteed to be able to point to a spot before a block. */
825 expr->stack = xmalloc (max_height * sizeof *expr->stack);
827 expr->pool = pool_create ();
831 dump_node (struct expr_dump_state *eds, union any_node * n)
833 if (IS_NONTERMINAL (n->type))
836 for (i = 0; i < n->nonterm.n; i++)
837 dump_node (eds, n->nonterm.arg[i]);
839 if (ops[n->type].flags & OP_VAR_ARGS)
840 emit (eds, n->nonterm.n);
841 if (ops[n->type].flags & OP_MIN_ARGS)
842 emit (eds, (int) n->nonterm.arg[n->nonterm.n]);
843 if (ops[n->type].flags & OP_FMT_SPEC)
845 emit (eds, (int) n->nonterm.arg[n->nonterm.n]);
846 emit (eds, (int) n->nonterm.arg[n->nonterm.n + 1]);
847 emit (eds, (int) n->nonterm.arg[n->nonterm.n + 2]);
853 if (n->type == OP_NUM_CON)
854 emit_num_con (eds, n->num_con.value);
855 else if (n->type == OP_STR_CON)
856 emit_str_con (eds, n->str_con.s, n->str_con.len);
857 else if (n->type == OP_NUM_VAR || n->type == OP_STR_VAR)
858 emit_var (eds, n->var.v);
859 else if (n->type == OP_NUM_LAG || n->type == OP_STR_LAG)
861 emit_var (eds, n->lag.v);
862 emit (eds, n->lag.lag);
864 else if (n->type == OP_NUM_SYS || n->type == OP_NUM_VAL)
865 emit (eds, n->var.v->fv);
867 assert (n->type == OP_CASENUM);
872 emit (struct expr_dump_state *eds, int op)
874 if (eds->op_cnt >= eds->op_cap)
877 eds->expr->op = xrealloc (eds->expr->op,
878 eds->op_cap * sizeof *eds->expr->op);
880 eds->expr->op[eds->op_cnt++] = op;
884 emit_num_con (struct expr_dump_state *eds, double dbl)
886 if (eds->dbl_cnt >= eds->dbl_cap)
889 eds->expr->num = xrealloc (eds->expr->num,
890 eds->dbl_cap * sizeof *eds->expr->num);
892 eds->expr->num[eds->dbl_cnt++] = dbl;
896 emit_str_con (struct expr_dump_state *eds, char *str, int len)
898 if (eds->str_cnt + len + 1 > eds->str_cap)
901 eds->expr->str = xrealloc (eds->expr->str, eds->str_cap);
903 eds->expr->str[eds->str_cnt++] = len;
904 memcpy (&eds->expr->str[eds->str_cnt], str, len);
909 emit_var (struct expr_dump_state *eds, struct variable * v)
911 if (eds->var_cnt >= eds->var_cap)
914 eds->expr->var = xrealloc (eds->expr->var,
915 eds->var_cap * sizeof *eds->expr->var);
917 eds->expr->var[eds->var_cnt++] = v;