X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fexpr-opt.c;h=5f574f9918d41b10f9a61634a9fae990b55e6249;hb=97d6c6f6b1922621ca013668eba9a9a9f71d60fe;hp=2e65b3e3b92bead21a5d82a4d1b9e1412c1bb8f7;hpb=564f1eac7ce351b863dba65b75d71fdf3c92b73e;p=pspp-builds.git diff --git a/src/expr-opt.c b/src/expr-opt.c index 2e65b3e3..5f574f99 100644 --- a/src/expr-opt.c +++ b/src/expr-opt.c @@ -35,223 +35,109 @@ #include "str.h" #include "var.h" -/* - Expression "optimizer" +static void evaluate_tree_no_missing (union any_node **); +static void evaluate_tree_with_missing (union any_node **, size_t count); +static void optimize_tree (union any_node **); - Operates on the tree representation of expressions. - optimize_expression() performs the optimizations listed below: +static void collapse_node (union any_node **node, size_t child_idx); +static void set_number (union any_node **node, double); +static void set_number_errno (union any_node **node, double); +static void set_string (union any_node **node, const char *, size_t); - 1. Constant folding - Any operation with constant operands is replaced by its value. - (Exception: random-number-generator functions.) - - 2. Strength reduction (x is any expression; a is a numeric constant) - x/0 => SYSMIS - x*0 => 0 - x**0 => 1 - x**1, x+0, x-0, x*1 => x - x**2 => sqr(x) - x/a => x*(1/a) (where 1/a is evaluated at optimization time) - - I thought about adding additional optimizations but decided that what - is here could already be considered overkill. - */ - -static struct nonterm_node *evaluate_tree (struct nonterm_node * n); -static struct nonterm_node *optimize_tree (struct nonterm_node * n); - -struct nonterm_node * -optimize_expression (struct nonterm_node * n) +void +optimize_expression (union any_node **node) { + int nonconst = 0; /* Number of nonconstant children. */ + int sysmis = 0; /* Number of system-missing children. */ + struct nonterm_node *nonterm; int i; - /* Set to 1 if a child is nonconstant. */ - int nonconst = 0; - - /* Number of system-missing children. */ - int sysmis = 0; - /* We can't optimize a terminal node. */ - if (n->type > OP_TERMINAL) - return n; + if (IS_TERMINAL ((*node)->type)) + return; + nonterm = &(*node)->nonterm; /* Start by optimizing all the children. */ - for (i = 0; i < n->n; i++) + for (i = 0; i < nonterm->n; i++) { - n->arg[i] = ((union any_node *) - optimize_expression ((struct nonterm_node *) n->arg[i])); - if (n->arg[i]->type == OP_NUM_CON) + optimize_expression (&nonterm->arg[i]); + if (nonterm->arg[i]->type == OP_NUM_CON) { - if (n->arg[i]->num_con.value == SYSMIS) + if (nonterm->arg[i]->num_con.value == SYSMIS) sysmis++; } - else if (n->arg[i]->type != OP_STR_CON) - nonconst = 1; + else if (nonterm->arg[i]->type != OP_STR_CON) + nonconst++; } - if (sysmis && !(ops[n->type].flags & OP_ABSORB_MISS)) - /* Just about any operation produces SYSMIS when given any SYSMIS - arguments. */ + if (sysmis && !(ops[nonterm->type].flags & OP_ABSORB_MISS)) + { + /* Most operation produce SYSMIS given any SYSMIS + argument. */ + set_number (node, SYSMIS); + } + else if (!nonconst) + { + /* Evaluate constant expressions. */ + if (!sysmis) + evaluate_tree_no_missing (node); + else + evaluate_tree_with_missing (node, sysmis); + } + else { - struct num_con_node *num = xmalloc (sizeof *num); - free_node ((union any_node *) n); - num->type = OP_NUM_CON; - num->value = SYSMIS; - n = (struct nonterm_node *) num; + /* A few optimization possibilities are still left. */ + optimize_tree (node); } - else if (!nonconst) - /* If all the children of this node are constants, then there are - obvious optimizations. */ - n = evaluate_tree (n); - else - /* Otherwise, we may be able to make certain optimizations - anyway. */ - n = optimize_tree (n); - return n; } -static struct nonterm_node *repl_num_con (struct nonterm_node *, double); -static struct nonterm_node *force_repl_num_con (struct nonterm_node *, double); -static struct nonterm_node *repl_str_con (struct nonterm_node *, char *, int); - -#define n0 n->arg[0]->num_con.value -#define n1 n->arg[1]->num_con.value -#define n2 n->arg[2]->num_con.value - -#define s0 n->arg[0]->str_con.s -#define s0l n->arg[0]->str_con.len -#define s1 n->arg[1]->str_con.s -#define s1l n->arg[1]->str_con.len -#define s2 n->arg[2]->str_con.s -#define s2l n->arg[2]->str_con.len -#define s(X) n->arg[X]->str_con.s -#define sl(X) n->arg[X]->str_con.len - -static struct nonterm_node * -optimize_tree (struct nonterm_node * n) +static int +eq_num_con (union any_node *node, double number) { - int i; - - errno = 0; - if (n->type == OP_PLUS || n->type == OP_MUL) - { - /* Default constant value. */ - double def = n->type == OP_MUL ? 1.0 : 0.0; - - /* Total value of all the constants. */ - double cval = def; - - /* Number of nonconst arguments. */ - int nvar = 0; - - /* New node. */ - struct nonterm_node *m = NULL; - - /* Argument copying counter. */ - int c; - - /* 1=SYSMIS encountered */ - int sysmis = 0; - - for (i = 0; i < n->n; i++) - if (n->arg[i]->type == OP_NUM_CON) - { - if (n->arg[i]->num_con.value != SYSMIS) - { - if (n->type == OP_MUL) - cval *= n->arg[i]->num_con.value; - else - cval += n->arg[i]->num_con.value; - } - else - sysmis++; - } - else - nvar++; - - /* 0*SYSMIS=0, 0/SYSMIS=0; otherwise, SYSMIS and infinities - produce SYSMIS. */ - if (cval == 0.0 && n->type == OP_MUL) - nvar = 0; - else if (sysmis || !finite (cval)) - { - nvar = 0; - cval = SYSMIS; - } - - /* If no nonconstant terms, replace with a constant node. */ - if (nvar == 0) - return force_repl_num_con (n, cval); + return node->type == OP_NUM_CON && node->num_con.value == number; +} - if (nvar == 1 && cval == def) - { - /* If there is exactly one nonconstant term and no constant - terms, replace with the nonconstant term. */ - for (i = 0; i < n->n; i++) - if (n->arg[i]->type != OP_NUM_CON) - m = (struct nonterm_node *) n->arg[i]; - else - free_node (n->arg[i]); - } - else - { - /* Otherwise consolidate all the nonconstant terms. */ - m = xmalloc (sizeof (struct nonterm_node) - + ((nvar + (cval != def) - 1) - * sizeof (union any_node *))); - for (i = c = 0; i < n->n; i++) - if (n->arg[i]->type != OP_NUM_CON) - m->arg[c++] = n->arg[i]; - else - free_node (n->arg[i]); - - if (cval != def) - { - m->arg[c] = xmalloc (sizeof (struct num_con_node)); - m->arg[c]->num_con.type = OP_NUM_CON; - m->arg[c]->num_con.value = cval; - c++; - } - - m->type = n->type; - m->n = c; - } - free (n); - n = m; - } - else if (n->type == OP_POW) +static void +optimize_tree (union any_node **node) +{ + struct nonterm_node *n = &(*node)->nonterm; + + /* x+0, x-0, 0+x => x. */ + if ((n->type == OP_ADD || n->type == OP_SUB) && eq_num_con (n->arg[1], 0.)) + collapse_node (node, 1); + else if (n->type == OP_ADD && eq_num_con (n->arg[0], 0.)) + collapse_node (node, 0); + + /* x*1, x/1, 1*x => x. */ + else if ((n->type == OP_MUL || n->type == OP_DIV) + && eq_num_con (n->arg[1], 1.)) + collapse_node (node, 0); + else if (n->type == OP_MUL && eq_num_con (n->arg[0], 1.)) + collapse_node (node, 1); + + /* 0*x, 0/x, x*0, MOD(0,x) => x. */ + else if (((n->type == OP_MUL || n->type == OP_DIV || n->type == OP_MOD) + && eq_num_con (n->arg[0], 0.)) + || (n->type == OP_MUL && eq_num_con (n->arg[1], 0.))) + set_number (node, 0.); + + /* x**1 => x. */ + else if (n->type == OP_POW && eq_num_con (n->arg[1], 1)) + collapse_node (node, 0); + + /* x**2 => SQUARE(x). */ + else if (n->type == OP_POW && eq_num_con (n->arg[2], 2)) { - if (n->arg[1]->type == OP_NUM_CON) - { - if (n1 == 1.0) - { - struct nonterm_node *m = (struct nonterm_node *) n->arg[0]; - - free_node (n->arg[1]); - free (n); - return m; - } - else if (n1 == 2.0) - { - n = xrealloc (n, sizeof (struct nonterm_node)); - n->type = OP_SQUARE; - n->n = 1; - } - } + n->type = OP_SQUARE; + n->n = 1; } - return n; } -#define rnc(D) \ - (n = repl_num_con (n, D)) - -#define frnc(D) \ - (n = force_repl_num_con (n, D)) - /* Finds the first NEEDLE of length NEEDLE_LEN in a HAYSTACK of length HAYSTACK_LEN. Returns a 1-based index, 0 on failure. */ -static inline int -str_search (char *haystack, int haystack_len, char *needle, int needle_len) +static int +str_search (const char *haystack, int haystack_len, + const char *needle, int needle_len) { char *p = memmem (haystack, haystack_len, needle, needle_len); return p ? p - haystack + 1 : 0; @@ -259,182 +145,199 @@ str_search (char *haystack, int haystack_len, char *needle, int needle_len) /* Finds the last NEEDLE of length NEEDLE_LEN in a HAYSTACK of length HAYSTACK_LEN. Returns a 1-based index, 0 on failure. */ -static inline int -str_rsearch (char *haystack, int haystack_len, char *needle, int needle_len) +static int +str_rsearch (const char *haystack, int haystack_len, + const char *needle, int needle_len) { char *p = mm_find_reverse (haystack, haystack_len, needle, needle_len); return p ? p - haystack + 1 : 0; } -static struct nonterm_node * -evaluate_tree (struct nonterm_node * n) +static void +evaluate_tree_no_missing (union any_node **node) { - static char *strbuf; - int add; - int len; + struct nonterm_node *n = &(*node)->nonterm; + double num[3]; + char *str[3]; + size_t str_len[3]; int i; - if (!strbuf) - strbuf = xmalloc (256); errno = 0; + for (i = 0; i < n->n && i < 3; i++) + { + union any_node *arg = n->arg[i]; + + if (arg->type == OP_NUM_CON) + num[i] = arg->num_con.value; + else if (arg->type == OP_STR_CON) + { + str[i] = arg->str_con.s; + str_len[i] = arg->str_con.len; + } + } + switch (n->type) { - case OP_PLUS: + case OP_ADD: + set_number (node, num[0] + num[1]); + break; + + case OP_SUB: + set_number (node, num[0] - num[1]); + break; + case OP_MUL: - return optimize_tree (n); + set_number (node, num[0] * num[1]); + break; + + case OP_DIV: + if (num[1] != 0.) + set_number (node, num[0] / num[1]); + break; case OP_POW: - if (n0 == 0.0 && n1 == 0.0) - frnc (SYSMIS); - else if (n0 == SYSMIS && n1 == 0.0) - frnc (1.0); - else if (n0 == 0.0 && n1 == SYSMIS) - frnc (0.0); + if (num[0] == 0. && num[1] == 0.) + set_number (node, SYSMIS); else - rnc (pow (n0, n1)); + set_number_errno (node, pow (num[0], num[1])); break; case OP_AND: - if (n0 == 0.0 || n1 == 0.0) - frnc (0.0); - else if (n0 == SYSMIS || n1 == SYSMIS) - frnc (SYSMIS); - else - frnc (1.0); + set_number (node, num[0] && num[1]); break; + case OP_OR: - if (n0 == 1.0 || n1 == 1.0) - frnc (1.0); - else if (n0 == SYSMIS || n1 == SYSMIS) - frnc (SYSMIS); - else - frnc (0.0); + set_number (node, num[0] || num[1]); break; + case OP_NOT: - rnc (n0 == 0.0 ? 1.0 : 0.0); + set_number (node, !num[0]); break; case OP_EQ: - rnc (n0 == n1); + set_number (node, num[0] == num[1]); break; case OP_GE: - rnc (n0 >= n1); + set_number (node, num[0] >= num[1]); break; case OP_GT: - rnc (n0 > n1); + set_number (node, num[0] > num[1]); break; case OP_LE: - rnc (n0 <= n1); + set_number (node, num[0] <= num[1]); break; case OP_LT: - rnc (n0 < n1); + set_number (node, num[0] < num[1]); break; case OP_NE: - rnc (n0 != n1); + set_number (node, num[0] != num[1]); break; /* String operators. */ - case OP_STRING_EQ: - rnc (st_compare_pad (s0, s0l, s1, s1l) == 0); + case OP_EQ_STRING: + set_number (node, st_compare_pad (str[0], str_len[0], + str[1], str_len[1]) == 0); break; - case OP_STRING_GE: - rnc (st_compare_pad (s0, s0l, s1, s1l) >= 0); + case OP_GE_STRING: + set_number (node, st_compare_pad (str[0], str_len[0], + str[1], str_len[1]) >= 0); break; - case OP_STRING_GT: - rnc (st_compare_pad (s0, s0l, s1, s1l) > 0); + case OP_GT_STRING: + set_number (node, st_compare_pad (str[0], str_len[0], + str[1], str_len[1]) > 0); break; - case OP_STRING_LE: - rnc (st_compare_pad (s0, s0l, s1, s1l) <= 0); + case OP_LE_STRING: + set_number (node, st_compare_pad (str[0], str_len[0], + str[1], str_len[1]) <= 0); break; - case OP_STRING_LT: - rnc (st_compare_pad (s0, s0l, s1, s1l) < 0); + case OP_LT_STRING: + set_number (node, st_compare_pad (str[0], str_len[0], + str[1], str_len[1]) < 0); break; - case OP_STRING_NE: - rnc (st_compare_pad (s0, s0l, s1, s1l) != 0); + case OP_NE_STRING: + set_number (node, st_compare_pad (str[0], str_len[0], + str[1], str_len[1]) != 0); break; /* Unary functions. */ case OP_NEG: - rnc (-n0); + set_number (node, -num[0]); break; case OP_ABS: - rnc (fabs (n0)); + set_number (node, fabs (num[0])); break; case OP_ARCOS: - rnc (acos (n0)); + set_number_errno (node, acos (num[0])); break; case OP_ARSIN: - rnc (asin (n0)); + set_number_errno (node, asin (num[0])); break; case OP_ARTAN: - rnc (atan (n0)); + set_number_errno (node, atan (num[0])); break; case OP_COS: - rnc (cos (n0)); + set_number_errno (node, cos (num[0])); break; case OP_EXP: - rnc (exp (n0)); + set_number_errno (node, exp (num[0])); break; case OP_LG10: - rnc (log10 (n0)); + set_number_errno (node, log10 (num[0])); break; case OP_LN: - rnc (log (n0)); + set_number_errno (node, log (num[0])); break; case OP_MOD10: - rnc (fmod (n0, 10)); + set_number_errno (node, fmod (num[0], 10)); break; case OP_RND: - rnc (n0 >= 0.0 ? floor (n0 + 0.5) : -floor (-n0 + 0.5)); + if (num[0] >= 0.0) + set_number_errno (node, floor (num[0] + 0.5)); + else + set_number_errno (node, -floor (-num[0] + 0.5)); break; case OP_SIN: - rnc (sin (n0)); + set_number_errno (node, sin (num[0])); break; case OP_SQRT: - rnc (sqrt (n0)); + set_number_errno (node, sqrt (num[0])); break; case OP_TAN: - rnc (tan (n0)); + set_number_errno (node, tan (num[0])); break; case OP_TRUNC: - rnc (n0 >= 0.0 ? floor (n0) : -floor (-n0)); + if (num[0] >= 0.0) + set_number_errno (node, floor (num[0])); + else + set_number_errno (node, -floor (-num[0])); break; /* N-ary numeric functions. */ case OP_ANY: - if (n0 == SYSMIS) - frnc (SYSMIS); - else - { - int sysmis = 1; - double ni; - - for (i = 1; i < n->n; i++) - { - ni = n->arg[i]->num_con.value; - if (n0 == ni) - { - frnc (1.0); - goto any_done; - } - if (ni != SYSMIS) - sysmis = 0; - } - frnc (sysmis ? SYSMIS : 0.0); - } - any_done: - break; - case OP_ANY_STRING: - for (i = 1; i < n->n; i++) - if (!st_compare_pad (n->arg[0]->str_con.s, n->arg[0]->str_con.len, - n->arg[i]->str_con.s, n->arg[i]->str_con.len)) - { - frnc (1.0); - goto any_string_done; - } - frnc (0.0); - any_string_done: + { + double result = 0.0; + for (i = 1; i < n->n; i++) + if (num[0] == n->arg[i]->num_con.value) + { + result = 1.0; + break; + } + set_number (node, result); + } + break; + case OP_ANY_STRING: + { + double result = 0.0; + for (i = 1; i < n->n; i++) + if (!st_compare_pad (n->arg[0]->str_con.s, n->arg[0]->str_con.len, + n->arg[i]->str_con.s, n->arg[i]->str_con.len)) + { + result = 1.0; + break; + } + set_number (node, result); + } break; case OP_CFVAR: @@ -446,507 +349,397 @@ evaluate_tree (struct nonterm_node * n) case OP_SD: case OP_SUM: case OP_VARIANCE: + /* FIXME */ + break; + + case OP_RANGE: { - double d[2] = - {0.0, 0.0}; /* sum, sum of squares */ - double min = DBL_MAX; /* minimum value */ - double max = -DBL_MAX; /* maximum value */ - double ni; /* value of i'th argument */ - int nv = 0; /* number of valid arguments */ - - for (i = 0; i < n->n; i++) - { - ni = n->arg[i]->num_con.value; - if (ni != SYSMIS) - { - nv++; - d[0] += ni; - d[1] += ni * ni; - if (ni < min) - min = ni; - if (ni > max) - max = ni; - } - } - if (n->type == OP_NMISS) - frnc (i - nv); - else if (n->type == OP_NVALID) - frnc (nv); - else if (nv >= (int) n->arg[i]) - { - switch (n->type) - { - case OP_CFVAR: - frnc (calc_cfvar (d, nv)); - break; - case OP_MAX: - frnc (max); - break; - case OP_MEAN: - frnc (calc_mean (d, nv)); - break; - case OP_MIN: - frnc (min); - break; - case OP_SD: - frnc (calc_stddev (calc_variance (d, nv))); - break; - case OP_SUM: - frnc (d[0]); - break; - case OP_VARIANCE: - frnc (calc_variance (d, nv)); - break; - } - } - else - frnc (SYSMIS); + double result = 0.0; + + for (i = 1; i < n->n; i += 2) + { + double min = n->arg[i]->num_con.value; + double max = n->arg[i + 1]->num_con.value; + if (num[0] >= min && num[0] <= max) + { + result = 1.0; + break; + } + } + set_number (node, result); } break; - case OP_RANGE: - if (n0 == SYSMIS) - frnc (SYSMIS); - else - { - double min, max; - int sysmis = 1; - - for (i = 1; i < n->n; i += 2) - { - min = n->arg[i]->num_con.value; - max = n->arg[i + 1]->num_con.value; - if (min == SYSMIS || max == SYSMIS) - continue; - sysmis = 0; - if (n0 >= min && n0 <= max) - { - frnc (1.0); - goto range_done; - } - } - frnc (sysmis ? SYSMIS : 0.0); - } - range_done: - break; - case OP_RANGE_STRING: - for (i = 1; i < n->n; i += 2) - if (st_compare_pad (n->arg[0]->str_con.s, n->arg[0]->str_con.len, - n->arg[i]->str_con.s, n->arg[i]->str_con.len) >= 0 - && st_compare_pad (n->arg[0]->str_con.s, n->arg[0]->str_con.len, - n->arg[i + 1]->str_con.s, - n->arg[i + 1]->str_con.len) <= 0) - { - frnc (1.0); - goto range_str_done; - } - frnc (0.0); - range_str_done: - break; - /* Time function. */ + case OP_RANGE_STRING: + { + double result = 0.0; + + for (i = 1; i < n->n; i += 2) + { + const char *min = n->arg[i]->str_con.s; + size_t min_len = n->arg[i]->str_con.len; + const char *max = n->arg[i + 1]->str_con.s; + size_t max_len = n->arg[i + 1]->str_con.len; + + if (st_compare_pad (str[0], str_len[0], min, min_len) >= 0 + && st_compare_pad (str[0], str_len[0], max, max_len) <= 0) + { + result = 1.0; + break; + } + } + set_number (node, result); + break; + } + + /* Time functions. */ case OP_TIME_HMS: - rnc (60. * (60. * n0 + n1) + n2); + { + double min, max; + min = min (num[0], min (num[1], num[2])); + max = max (num[0], max (num[1], num[2])); + if (min < 0. && max > 0.) + break; + set_number (node, 60. * (60. * num[0] + num[1]) + num[2]); + } + break; + case OP_CTIME_DAYS: + set_number (node, num[0] / (60. * 60. * 24.)); + break; + case OP_CTIME_HOURS: + set_number (node, num[0] / (60. * 60.)); + break; + case OP_CTIME_MINUTES: + set_number (node, num[0] / 60.); + break; + case OP_TIME_DAYS: + set_number (node, num[0] * (60. * 60. * 24.)); + break; + case OP_CTIME_SECONDS: + set_number (node, num[0]); break; /* Date construction functions. */ case OP_DATE_DMY: - rnc (60. * 60. * 24. * yrmoda (n2, n1, n0)); + set_number (node, 60. * 60. * 24. * yrmoda (num[2], num[1], num[0])); break; case OP_DATE_MDY: - rnc (60. * 60. * 24. * yrmoda (n2, n0, n1)); + set_number (node, 60. * 60. * 24. * yrmoda (num[2], num[0], num[1])); break; case OP_DATE_MOYR: - rnc (60. * 60. * 24. * yrmoda (n1, n0, 1)); + set_number (node, 60. * 60. * 24. * yrmoda (num[1], num[0], 1)); break; case OP_DATE_QYR: - rnc (60. * 60. * 24. * yrmoda (n1, 3 * (int) n0 - 2, 1)); + set_number (node, + 60. * 60. * 24. * yrmoda (num[1], 3 * (int) num[0] - 2, 1)); break; case OP_DATE_WKYR: { - double t = yrmoda (n1, 1, 1); + double t = yrmoda (num[1], 1, 1); + if (num[0] < 0. || num[0] > 53.) + break; if (t != SYSMIS) - t = 60. * 60. * 24. * (t + 7. * (n0 - 1)); - rnc (t); + t = 60. * 60. * 24. * (t + 7. * (num[0] - 1)); + set_number (node, t); } break; case OP_DATE_YRDAY: { - double t = yrmoda (n0, 1, 1); + double t = yrmoda (num[0], 1, 1); if (t != SYSMIS) - t = 60. * 60. * 24. * (t + n0 - 1); - rnc (t); + t = 60. * 60. * 24. * (t + num[1] - 1); + set_number (node, t); } break; case OP_YRMODA: - rnc (yrmoda (n0, n1, n2)); + set_number (node, yrmoda (num[0], num[1], num[2])); break; + /* Date extraction functions. */ case OP_XDATE_DATE: - rnc (floor (n0 / 60. / 60. / 24.) * 60. * 60. * 24.); + set_number_errno (node, + floor (num[0] / 60. / 60. / 24.) * 60. * 60. * 24.); break; case OP_XDATE_HOUR: - rnc (fmod (floor (n0 / 60. / 60.), 24.)); + set_number_errno (node, fmod (floor (num[0] / 60. / 60.), 24.)); break; case OP_XDATE_JDAY: - rnc (julian_to_jday (n0 / 86400.)); + set_number (node, julian_to_jday (num[0] / 86400.)); break; case OP_XDATE_MDAY: { int day; - julian_to_calendar (n0 / 86400., NULL, NULL, &day); - rnc (day); + julian_to_calendar (num[0] / 86400., NULL, NULL, &day); + set_number (node, day); } break; case OP_XDATE_MINUTE: - rnc (fmod (floor (n0 / 60.), 60.)); + set_number_errno (node, fmod (floor (num[0] / 60.), 60.)); break; case OP_XDATE_MONTH: { int month; - julian_to_calendar (n0 / 86400., NULL, &month, NULL); - rnc (month); + julian_to_calendar (num[0] / 86400., NULL, &month, NULL); + set_number (node, month); } break; case OP_XDATE_QUARTER: { int month; - julian_to_calendar (n0 / 86400., NULL, &month, NULL); - rnc ((month - 1) / 3 + 1); + julian_to_calendar (num[0] / 86400., NULL, &month, NULL); + set_number (node, (month - 1) / 3 + 1); } break; case OP_XDATE_SECOND: - rnc (fmod (n0, 60.)); + set_number_errno (node, fmod (num[0], 60.)); break; case OP_XDATE_TDAY: - rnc (floor (n0 / 60. / 60. / 24.)); + set_number_errno (node, floor (num[0] / 60. / 60. / 24.)); break; case OP_XDATE_TIME: - rnc (n0 - floor (n0 / 60. / 60. / 24.) * 60. * 60. * 24.); + set_number_errno (node, num[0] - (floor (num[0] / 60. / 60. / 24.) + * 60. * 60. * 24.)); break; case OP_XDATE_WEEK: - rnc ((julian_to_jday (n0) - 1) / 7 + 1); + set_number (node, (julian_to_jday (num[0]) - 1) / 7 + 1); break; case OP_XDATE_WKDAY: - rnc (julian_to_wday (n0)); + set_number (node, julian_to_wday (num[0])); break; case OP_XDATE_YEAR: { int year; - julian_to_calendar (n0 / 86400., &year, NULL, NULL); - rnc (year); + julian_to_calendar (num[0] / 86400., &year, NULL, NULL); + set_number (node, year); } break; /* String functions. */ case OP_CONCAT: { - len = s0l; - memcpy (strbuf, s0, len); + char string[256]; + int length = str_len[0]; + memcpy (string, str[0], length); for (i = 1; i < n->n; i++) { - add = sl (i); - if (add + len > 255) - add = 255 - len; - memcpy (&strbuf[len], s (i), add); - len += add; + int add = n->arg[i]->str_con.len; + if (add + length > 255) + add = 255 - length; + memcpy (&string[length], n->arg[i]->str_con.s, add); + length += add; } - n = repl_str_con (n, strbuf, len); + set_string (node, string, length); } break; - case OP_INDEX: - rnc (s1l ? str_search (s0, s0l, s1, s1l) : SYSMIS); - break; - case OP_INDEX_OPT: - if (n2 == SYSMIS || (int) n2 <= 0 || s1l % (int) n2) - { - msg (SW, _("While optimizing a constant expression, there was " - "a bad value for the third argument to INDEX.")); - frnc (SYSMIS); - } - else - { - int pos = 0; - int c = s1l / (int) n2; - int r; - - for (i = 0; i < c; i++) - { - r = str_search (s0, s0l, s (i), sl (i)); - if (r < pos || pos == 0) - pos = r; - } - frnc (pos); - } - break; - case OP_RINDEX: - rnc (str_rsearch (s0, s0l, s1, s1l)); - break; - case OP_RINDEX_OPT: - if (n2 == SYSMIS || (int) n2 <= 0 || s1l % (int) n2) - { - msg (SE, _("While optimizing a constant expression, there was " - "a bad value for the third argument to RINDEX.")); - frnc (SYSMIS); - } - else - { - int pos = 0; - int c = s1l / n2; - int r; - - for (i = 0; i < c; i++) - { - r = str_rsearch (s0, s0l, s (i), sl (i)); - if (r > pos) - pos = r; - } - frnc (pos); - } + case OP_INDEX_2: + case OP_INDEX_3: + case OP_RINDEX_2: + case OP_RINDEX_3: + { + int result, chunk_width, chunk_cnt; + + if (n->type == OP_INDEX_2 || n->type == OP_RINDEX_2) + chunk_width = str_len[1]; + else + chunk_width = num[2]; + if (chunk_width <= 0 || chunk_width > str_len[1] + || str_len[1] % chunk_width != 0) + break; + chunk_cnt = str_len[1] / chunk_width; + + result = 0; + for (i = 0; i < chunk_cnt; i++) + { + const char *chunk = str[1] + chunk_width * i; + int ofs; + if (n->type == OP_INDEX_2 || n->type == OP_INDEX_3) + { + ofs = str_search (str[0], str_len[0], chunk, chunk_width); + if (ofs < result || result == 0) + result = ofs; + } + else + { + ofs = str_rsearch (str[0], str_len[0], chunk, chunk_width); + if (ofs > result) + result = ofs; + } + } + set_number (node, result); + } break; case OP_LENGTH: - frnc (s0l); + set_number (node, str_len[0]); break; case OP_LOWER: { char *cp; - for (cp = &s0[s0l]; cp >= s0; cp--) - *cp = tolower ((unsigned char) (*cp)); - n = repl_str_con (n, s0, s0l); + for (cp = str[0]; cp < str[0] + str_len[0]; cp++) + *cp = tolower ((unsigned char) *cp); } break; case OP_UPPER: { char *cp; - for (cp = &s0[s0[0] + 1]; cp > s0; cp--) - *cp = toupper ((unsigned char) (*cp)); - n = repl_str_con (n, s0, s0l); + for (cp = str[0]; cp < str[0] + str_len[0]; cp++) + *cp = toupper ((unsigned char) *cp); } break; case OP_LPAD: - case OP_LPAD_OPT: case OP_RPAD: - case OP_RPAD_OPT: { - int c; - - if (n1 == SYSMIS) - { - n = repl_str_con (n, NULL, 0); - break; - } - len = n1; - len = range (len, 1, 255); - add = max (n1 - s0l, 0); - - if (n->type == OP_LPAD_OPT || n->type == OP_RPAD_OPT) - { - if (s2l < 1) - { - c = n->type == OP_LPAD_OPT ? 'L' : 'R'; - msg (SE, _("Third argument to %cPAD() must be at least one " - "character in length."), c); - c = ' '; - } - else - c = s2[0]; - } - else - c = ' '; - - if (n->type == OP_LPAD || n->type == OP_LPAD_OPT) - memmove (&s0[add], s0, len); - if (n->type == OP_LPAD || n->type == OP_LPAD_OPT) - memset (s0, c, add); - else - memset (&s0[s0l], c, add); - - n = repl_str_con (n, s0, len); + char string[256]; + int len, pad_len; + char pad_char; + + /* Target length. */ + len = num[1]; + if (len < 1 || len > 255) + break; + + /* Pad character. */ + if (str_len[2] != 1) + break; + pad_char = str[2][0]; + + if (str_len[0] >= len) + len = str_len[0]; + pad_len = len - str_len[0]; + if (n->type == OP_LPAD) + { + memset (string, pad_char, pad_len); + memcpy (string + pad_len, str[0], str_len[0]); + } + else + { + memcpy (string, str[0], str_len[0]); + memset (string + str_len[0], pad_char, pad_len); + } + + set_string (node, string, len); } break; case OP_LTRIM: - case OP_LTRIM_OPT: case OP_RTRIM: - case OP_RTRIM_OPT: { - int c = ' '; - char *cp = s0; - - if (n->type == OP_LTRIM_OPT || n->type == OP_RTRIM_OPT) - { - if (s1l < 1) - { - c = n->type == OP_LTRIM_OPT ? 'L' : 'R'; - msg (SE, _("Second argument to %cTRIM() must be at least one " - "character in length."), c); - } - else - c = s1[0]; - } - len = s0l; - if (n->type == OP_LTRIM || n->type == OP_LTRIM_OPT) - { - while (*cp == c && cp < &s0[len]) - cp++; - len -= cp - s0; - } + char pad_char; + const char *cp = str[0]; + int len = str_len[0]; + + /* Pad character. */ + if (str_len[1] != 1) + break; + pad_char = str[1][0]; + + if (n->type == OP_LTRIM) + while (len > 0 && *cp == pad_char) + cp++, len--; else - while (len > 0 && s0[len - 1] == c) + while (len > 0 && str[0][len - 1] == pad_char) len--; - n = repl_str_con (n, cp, len); - } - break; - case OP_NUMBER: - case OP_NUMBER_OPT: - { - union value v; - struct data_in di; - - di.s = s0; - di.e = s0 + s0l; - di.v = &v; - di.flags = DI_IGNORE_ERROR; - di.f1 = 1; - - if (n->type == OP_NUMBER_OPT) - { - di.format.type = (int) n->arg[1]; - di.format.w = (int) n->arg[2]; - di.format.d = (int) n->arg[3]; - } - else - { - di.format.type = FMT_F; - di.format.w = s0l; - di.format.d = 0; - } - - data_in (&di); - frnc (v.f); - } - break; - case OP_STRING: - { - union value v; - struct fmt_spec f; - f.type = (int) n->arg[1]; - f.w = (int) n->arg[2]; - f.d = (int) n->arg[3]; - v.f = n0; - - assert ((formats[f.type].cat & FCAT_STRING) == 0); - data_out (strbuf, &f, &v); - n = repl_str_con (n, strbuf, f.w); + set_string (node, cp, len); } break; - case OP_SUBSTR: - case OP_SUBSTR_OPT: + case OP_SUBSTR_2: + case OP_SUBSTR_3: { - int pos = (int) n1; - if (pos > s0l || pos <= 0 || n1 == SYSMIS - || (n->type == OP_SUBSTR_OPT && n2 == SYSMIS)) - n = repl_str_con (n, NULL, 0); + int pos = (int) num[1]; + if (pos > str_len[0] || pos <= 0 || num[1] == SYSMIS + || (n->type == OP_SUBSTR_3 && num[2] == SYSMIS)) + set_string (node, NULL, 0); else { - if (n->type == OP_SUBSTR_OPT) + int len; + if (n->type == OP_SUBSTR_3) { - len = (int) n2; - if (len + pos - 1 > s0l) - len = s0l - pos + 1; + len = (int) num[2]; + if (len + pos - 1 > str_len[0]) + len = str_len[0] - pos + 1; } else - len = s0l - pos + 1; - n = repl_str_con (n, &s0[pos - 1], len); + len = str_len[0] - pos + 1; + set_string (node, &str[0][pos - 1], len); } } break; /* Weirdness. */ - case OP_INV: - rnc (1.0 / n0); - break; case OP_MOD: - if (n0 == 0.0 && n1 == SYSMIS) - frnc (0.0); + if (num[0] == 0.0 && num[1] == SYSMIS) + set_number (node, 0.0); else - rnc (fmod (n0, n1)); + set_number (node, fmod (num[0], num[1])); break; case OP_NUM_TO_BOOL: - if (n0 == 0.0) - n0 = 0.0; - else if (n0 == 1.0) - n0 = 1.0; - else if (n0 != SYSMIS) + if (num[0] == 0.0) + num[0] = 0.0; + else if (num[0] == 1.0) + num[0] = 1.0; + else if (num[0] != SYSMIS) { msg (SE, _("When optimizing a constant expression, an integer " "that was being used as an Boolean value was found " "to have a constant value other than 0, 1, or SYSMIS.")); - n0 = 0.0; + num[0] = 0.0; } - rnc (n0); + set_number (node, num[0]); break; } - return n; } -#undef n0 -#undef n1 -#undef n2 - -#undef s0 -#undef s0l -#undef s1 -#undef s1l -#undef s2 -#undef s2l -#undef s -#undef sl - -#undef rnc -#undef frnc +static void +evaluate_tree_with_missing (union any_node **node UNUSED, size_t count UNUSED) +{ + /* FIXME */ +} -static struct nonterm_node * -repl_num_con (struct nonterm_node * n, double d) +static void +collapse_node (union any_node **node, size_t child_idx) { - int i; - if (!finite (d) || errno) - d = SYSMIS; - else - for (i = 0; i < n->n; i++) - if (n->arg[i]->type == OP_NUM_CON && n->arg[i]->num_con.value == SYSMIS) - { - d = SYSMIS; - break; - } - return force_repl_num_con (n, d); + struct nonterm_node *nonterm = &(*node)->nonterm; + union any_node *child; + + child = nonterm->arg[child_idx]; + nonterm->arg[child_idx] = NULL; + free_node (*node); + *node = child; } -static struct nonterm_node * -force_repl_num_con (struct nonterm_node * n, double d) + +static void +set_number (union any_node **node, double value) { struct num_con_node *num; + + free_node (*node); - if (!finite (d) || errno) - d = SYSMIS; - free_node ((union any_node *) n); - num = xmalloc (sizeof *num); + *node = xmalloc (sizeof *num); + num = &(*node)->num_con; num->type = OP_NUM_CON; - num->value = d; - return (struct nonterm_node *) num; + num->value = finite (value) ? value : SYSMIS; } -static struct nonterm_node * -repl_str_con (struct nonterm_node * n, char *s, int len) +static void +set_number_errno (union any_node **node, double value) +{ + if (errno == EDOM || errno == ERANGE) + value = SYSMIS; + set_number (node, value); +} + +static void +set_string (union any_node **node, const char *string, size_t length) { struct str_con_node *str; /* The ordering here is important since the source string may be part of a subnode of n. */ - str = xmalloc (sizeof *str + len - 1); + str = xmalloc (sizeof *str + length - 1); str->type = OP_STR_CON; - str->len = len; - memcpy (str->s, s, len); - free_node ((union any_node *) n); - return (struct nonterm_node *) str; + str->len = length; + memcpy (str->s, string, length); + free_node (*node); + *node = (union any_node *) str; } /* Returns the number of days since 10 Oct 1582 for the date @@ -964,12 +757,14 @@ yrmoda (double year, double month, double day) month = floor (month + EPSILON); day = floor (day + EPSILON); - if (year >= 0. && year <= 199.) + if (year >= 0. && year <= 29.) + year += 2000.; + else if (year >= 30. && year <= 99.) year += 1900.; if ((year < 1582. || year > 19999.) || (year == 1582. && (month < 10. || (month == 10. && day < 15.))) - || (month < -1 || month > 13) - || (day < -1 || day > 32)) + || (month < 0 || month > 13) + || (day < 0 || day > 31)) return SYSMIS; return calendar_to_julian (year, month, day); } @@ -1035,19 +830,7 @@ dump_expression (union any_node * n, struct expression * expr) static void dump_node (struct expr_dump_state *eds, union any_node * n) { - if (n->type == OP_AND || n->type == OP_OR) - { - int i; - - dump_node (eds, n->nonterm.arg[0]); - for (i = 1; i < n->nonterm.n; i++) - { - dump_node (eds, n->nonterm.arg[i]); - emit (eds, n->type); - } - return; - } - else if (n->type < OP_TERMINAL) + if (IS_NONTERMINAL (n->type)) { int i; for (i = 0; i < n->nonterm.n; i++) @@ -1063,26 +846,26 @@ dump_node (struct expr_dump_state *eds, union any_node * n) emit (eds, (int) n->nonterm.arg[n->nonterm.n + 1]); emit (eds, (int) n->nonterm.arg[n->nonterm.n + 2]); } - return; } - - emit (eds, n->type); - if (n->type == OP_NUM_CON) - emit_num_con (eds, n->num_con.value); - else if (n->type == OP_STR_CON) - emit_str_con (eds, n->str_con.s, n->str_con.len); - else if (n->type == OP_NUM_VAR || n->type == OP_STR_VAR - || n->type == OP_STR_MIS) - emit_var (eds, n->var.v); - else if (n->type == OP_NUM_LAG || n->type == OP_STR_LAG) + else { - emit_var (eds, n->lag.v); - emit (eds, n->lag.lag); + emit (eds, n->type); + if (n->type == OP_NUM_CON) + emit_num_con (eds, n->num_con.value); + else if (n->type == OP_STR_CON) + emit_str_con (eds, n->str_con.s, n->str_con.len); + else if (n->type == OP_NUM_VAR || n->type == OP_STR_VAR) + emit_var (eds, n->var.v); + else if (n->type == OP_NUM_LAG || n->type == OP_STR_LAG) + { + emit_var (eds, n->lag.v); + emit (eds, n->lag.lag); + } + else if (n->type == OP_NUM_SYS || n->type == OP_NUM_VAL) + emit (eds, n->var.v->fv); + else + assert (n->type == OP_CASENUM); } - else if (n->type == OP_NUM_SYS || n->type == OP_NUM_VAL) - emit (eds, n->var.v->fv); - else - assert (n->type == OP_CASENUM); } static void