#include <config.h>
#include "expr.h"
#include "exprP.h"
-#include <assert.h>
+#include "error.h"
#include <math.h>
#include <ctype.h>
#include <errno.h>
#include "julcal/julcal.h"
#include "misc.h"
#include "pool.h"
-#include "stats.h"
#include "str.h"
#include "var.h"
-/*
- Expression "optimizer"
-
- Operates on the tree representation of expressions.
- optimize_expression() performs the optimizations listed below:
-
- 1. Constant folding
- Any operation with constant operands is replaced by its value.
- (Exception: random-number-generator functions.)
+static void evaluate_tree_no_missing (union any_node **);
+static void evaluate_tree_with_missing (union any_node **, size_t count);
+static void optimize_tree (union any_node **);
- 2. Strength reduction (x is any expression; a is a numeric constant)
- x/0 => SYSMIS
- x*0 => 0
- x**0 => 1
- x**1, x+0, x-0, x*1 => x
- x**2 => sqr(x)
- x/a => x*(1/a) (where 1/a is evaluated at optimization time)
+static void collapse_node (union any_node **node, size_t child_idx);
+static void set_number (union any_node **node, double);
+static void set_number_errno (union any_node **node, double);
+static void set_string (union any_node **node, const char *, size_t);
- I thought about adding additional optimizations but decided that what
- is here could already be considered overkill.
- */
-
-static struct nonterm_node *evaluate_tree (struct nonterm_node * n);
-static struct nonterm_node *optimize_tree (struct nonterm_node * n);
-
-struct nonterm_node *
-optimize_expression (struct nonterm_node * n)
+void
+optimize_expression (union any_node **node)
{
+ int nonconst = 0; /* Number of nonconstant children. */
+ int sysmis = 0; /* Number of system-missing children. */
+ struct nonterm_node *nonterm;
int i;
- /* Set to 1 if a child is nonconstant. */
- int nonconst = 0;
-
- /* Number of system-missing children. */
- int sysmis = 0;
-
/* We can't optimize a terminal node. */
- if (n->type > OP_TERMINAL)
- return n;
+ if (IS_TERMINAL ((*node)->type))
+ return;
+ nonterm = &(*node)->nonterm;
/* Start by optimizing all the children. */
- for (i = 0; i < n->n; i++)
+ for (i = 0; i < nonterm->n; i++)
{
- n->arg[i] = ((union any_node *)
- optimize_expression ((struct nonterm_node *) n->arg[i]));
- if (n->arg[i]->type == OP_NUM_CON)
+ optimize_expression (&nonterm->arg[i]);
+ if (nonterm->arg[i]->type == OP_NUM_CON)
{
- if (n->arg[i]->num_con.value == SYSMIS)
+ if (nonterm->arg[i]->num_con.value == SYSMIS)
sysmis++;
}
- else if (n->arg[i]->type != OP_STR_CON)
- nonconst = 1;
+ else if (nonterm->arg[i]->type != OP_STR_CON)
+ nonconst++;
}
- if (sysmis && !(ops[n->type].flags & OP_ABSORB_MISS))
- /* Just about any operation produces SYSMIS when given any SYSMIS
- arguments. */
+ if (sysmis && !(ops[nonterm->type].flags & OP_ABSORB_MISS))
+ {
+ /* Most operation produce SYSMIS given any SYSMIS
+ argument. */
+ set_number (node, SYSMIS);
+ }
+ else if (!nonconst)
+ {
+ /* Evaluate constant expressions. */
+ if (!sysmis)
+ evaluate_tree_no_missing (node);
+ else
+ evaluate_tree_with_missing (node, sysmis);
+ }
+ else
{
- struct num_con_node *num = xmalloc (sizeof *num);
- free_node ((union any_node *) n);
- num->type = OP_NUM_CON;
- num->value = SYSMIS;
- n = (struct nonterm_node *) num;
+ /* A few optimization possibilities are still left. */
+ optimize_tree (node);
}
- else if (!nonconst)
- /* If all the children of this node are constants, then there are
- obvious optimizations. */
- n = evaluate_tree (n);
- else
- /* Otherwise, we may be able to make certain optimizations
- anyway. */
- n = optimize_tree (n);
- return n;
}
-static struct nonterm_node *repl_num_con (struct nonterm_node *, double);
-static struct nonterm_node *force_repl_num_con (struct nonterm_node *, double);
-static struct nonterm_node *repl_str_con (struct nonterm_node *, char *, int);
-
-#define n0 n->arg[0]->num_con.value
-#define n1 n->arg[1]->num_con.value
-#define n2 n->arg[2]->num_con.value
-
-#define s0 n->arg[0]->str_con.s
-#define s0l n->arg[0]->str_con.len
-#define s1 n->arg[1]->str_con.s
-#define s1l n->arg[1]->str_con.len
-#define s2 n->arg[2]->str_con.s
-#define s2l n->arg[2]->str_con.len
-#define s(X) n->arg[X]->str_con.s
-#define sl(X) n->arg[X]->str_con.len
-
-static struct nonterm_node *
-optimize_tree (struct nonterm_node * n)
+static int
+eq_num_con (union any_node *node, double number)
{
- int i;
-
- errno = 0;
- if (n->type == OP_PLUS || n->type == OP_MUL)
- {
- /* Default constant value. */
- double def = n->type == OP_MUL ? 1.0 : 0.0;
-
- /* Total value of all the constants. */
- double cval = def;
-
- /* Number of nonconst arguments. */
- int nvar = 0;
-
- /* New node. */
- struct nonterm_node *m = NULL;
-
- /* Argument copying counter. */
- int c;
-
- /* 1=SYSMIS encountered */
- int sysmis = 0;
-
- for (i = 0; i < n->n; i++)
- if (n->arg[i]->type == OP_NUM_CON)
- {
- if (n->arg[i]->num_con.value != SYSMIS)
- {
- if (n->type == OP_MUL)
- cval *= n->arg[i]->num_con.value;
- else
- cval += n->arg[i]->num_con.value;
- }
- else
- sysmis++;
- }
- else
- nvar++;
-
- /* 0*SYSMIS=0, 0/SYSMIS=0; otherwise, SYSMIS and infinities
- produce SYSMIS. */
- if (cval == 0.0 && n->type == OP_MUL)
- nvar = 0;
- else if (sysmis || !finite (cval))
- {
- nvar = 0;
- cval = SYSMIS;
- }
-
- /* If no nonconstant terms, replace with a constant node. */
- if (nvar == 0)
- return force_repl_num_con (n, cval);
+ return node->type == OP_NUM_CON && node->num_con.value == number;
+}
- if (nvar == 1 && cval == def)
- {
- /* If there is exactly one nonconstant term and no constant
- terms, replace with the nonconstant term. */
- for (i = 0; i < n->n; i++)
- if (n->arg[i]->type != OP_NUM_CON)
- m = (struct nonterm_node *) n->arg[i];
- else
- free_node (n->arg[i]);
- }
- else
- {
- /* Otherwise consolidate all the nonconstant terms. */
- m = xmalloc (sizeof (struct nonterm_node)
- + ((nvar + (cval != def) - 1)
- * sizeof (union any_node *)));
- for (i = c = 0; i < n->n; i++)
- if (n->arg[i]->type != OP_NUM_CON)
- m->arg[c++] = n->arg[i];
- else
- free_node (n->arg[i]);
-
- if (cval != def)
- {
- m->arg[c] = xmalloc (sizeof (struct num_con_node));
- m->arg[c]->num_con.type = OP_NUM_CON;
- m->arg[c]->num_con.value = cval;
- c++;
- }
-
- m->type = n->type;
- m->n = c;
- }
- free (n);
- n = m;
- }
- else if (n->type == OP_POW)
+static void
+optimize_tree (union any_node **node)
+{
+ struct nonterm_node *n = &(*node)->nonterm;
+
+ /* x+0, x-0, 0+x => x. */
+ if ((n->type == OP_ADD || n->type == OP_SUB) && eq_num_con (n->arg[1], 0.))
+ collapse_node (node, 1);
+ else if (n->type == OP_ADD && eq_num_con (n->arg[0], 0.))
+ collapse_node (node, 0);
+
+ /* x*1, x/1, 1*x => x. */
+ else if ((n->type == OP_MUL || n->type == OP_DIV)
+ && eq_num_con (n->arg[1], 1.))
+ collapse_node (node, 0);
+ else if (n->type == OP_MUL && eq_num_con (n->arg[0], 1.))
+ collapse_node (node, 1);
+
+ /* 0*x, 0/x, x*0, MOD(0,x) => x. */
+ else if (((n->type == OP_MUL || n->type == OP_DIV || n->type == OP_MOD)
+ && eq_num_con (n->arg[0], 0.))
+ || (n->type == OP_MUL && eq_num_con (n->arg[1], 0.)))
+ set_number (node, 0.);
+
+ /* x**1 => x. */
+ else if (n->type == OP_POW && eq_num_con (n->arg[1], 1))
+ collapse_node (node, 0);
+
+ /* x**2 => SQUARE(x). */
+ else if (n->type == OP_POW && eq_num_con (n->arg[2], 2))
{
- if (n->arg[1]->type == OP_NUM_CON)
- {
- if (n1 == 1.0)
- {
- struct nonterm_node *m = (struct nonterm_node *) n->arg[0];
-
- free_node (n->arg[1]);
- free (n);
- return m;
- }
- else if (n1 == 2.0)
- {
- n = xrealloc (n, sizeof (struct nonterm_node));
- n->type = OP_SQUARE;
- n->n = 1;
- }
- }
+ n->type = OP_SQUARE;
+ n->n = 1;
}
- return n;
}
-#define rnc(D) \
- (n = repl_num_con (n, D))
-
-#define frnc(D) \
- (n = force_repl_num_con (n, D))
-
/* Finds the first NEEDLE of length NEEDLE_LEN in a HAYSTACK of length
HAYSTACK_LEN. Returns a 1-based index, 0 on failure. */
-static inline int
-str_search (char *haystack, int haystack_len, char *needle, int needle_len)
+static int
+str_search (const char *haystack, int haystack_len,
+ const char *needle, int needle_len)
{
char *p = memmem (haystack, haystack_len, needle, needle_len);
return p ? p - haystack + 1 : 0;
/* Finds the last NEEDLE of length NEEDLE_LEN in a HAYSTACK of length
HAYSTACK_LEN. Returns a 1-based index, 0 on failure. */
-static inline int
-str_rsearch (char *haystack, int haystack_len, char *needle, int needle_len)
+static int
+str_rsearch (const char *haystack, int haystack_len,
+ const char *needle, int needle_len)
{
char *p = mm_find_reverse (haystack, haystack_len, needle, needle_len);
return p ? p - haystack + 1 : 0;
}
-static struct nonterm_node *
-evaluate_tree (struct nonterm_node * n)
+static void
+evaluate_tree_no_missing (union any_node **node)
{
- static char *strbuf;
- int add;
- int len;
+ struct nonterm_node *n = &(*node)->nonterm;
+ double num[3];
+ char *str[3];
+ size_t str_len[3];
int i;
- if (!strbuf)
- strbuf = xmalloc (256);
errno = 0;
+ for (i = 0; i < n->n && i < 3; i++)
+ {
+ union any_node *arg = n->arg[i];
+
+ if (arg->type == OP_NUM_CON)
+ num[i] = arg->num_con.value;
+ else if (arg->type == OP_STR_CON)
+ {
+ str[i] = arg->str_con.s;
+ str_len[i] = arg->str_con.len;
+ }
+ }
+
switch (n->type)
{
- case OP_PLUS:
+ case OP_ADD:
+ set_number (node, num[0] + num[1]);
+ break;
+
+ case OP_SUB:
+ set_number (node, num[0] - num[1]);
+ break;
+
case OP_MUL:
- return optimize_tree (n);
+ set_number (node, num[0] * num[1]);
+ break;
+
+ case OP_DIV:
+ if (num[1] != 0.)
+ set_number (node, num[0] / num[1]);
+ break;
case OP_POW:
- if (n0 == 0.0 && n1 == 0.0)
- frnc (SYSMIS);
- else if (n0 == SYSMIS && n1 == 0.0)
- frnc (1.0);
- else if (n0 == 0.0 && n1 == SYSMIS)
- frnc (0.0);
+ if (num[0] == 0. && num[1] == 0.)
+ set_number (node, SYSMIS);
else
- rnc (pow (n0, n1));
+ set_number_errno (node, pow (num[0], num[1]));
break;
case OP_AND:
- if (n0 == 0.0 || n1 == 0.0)
- frnc (0.0);
- else if (n0 == SYSMIS || n1 == SYSMIS)
- frnc (SYSMIS);
- else
- frnc (1.0);
+ set_number (node, num[0] && num[1]);
break;
+
case OP_OR:
- if (n0 == 1.0 || n1 == 1.0)
- frnc (1.0);
- else if (n0 == SYSMIS || n1 == SYSMIS)
- frnc (SYSMIS);
- else
- frnc (0.0);
+ set_number (node, num[0] || num[1]);
break;
+
case OP_NOT:
- rnc (n0 == 0.0 ? 1.0 : 0.0);
+ set_number (node, !num[0]);
break;
case OP_EQ:
- rnc (n0 == n1);
+ set_number (node, num[0] == num[1]);
break;
case OP_GE:
- rnc (n0 >= n1);
+ set_number (node, num[0] >= num[1]);
break;
case OP_GT:
- rnc (n0 > n1);
+ set_number (node, num[0] > num[1]);
break;
case OP_LE:
- rnc (n0 <= n1);
+ set_number (node, num[0] <= num[1]);
break;
case OP_LT:
- rnc (n0 < n1);
+ set_number (node, num[0] < num[1]);
break;
case OP_NE:
- rnc (n0 != n1);
+ set_number (node, num[0] != num[1]);
break;
/* String operators. */
- case OP_STRING_EQ:
- rnc (st_compare_pad (s0, s0l, s1, s1l) == 0);
+ case OP_EQ_STRING:
+ set_number (node, st_compare_pad (str[0], str_len[0],
+ str[1], str_len[1]) == 0);
break;
- case OP_STRING_GE:
- rnc (st_compare_pad (s0, s0l, s1, s1l) >= 0);
+ case OP_GE_STRING:
+ set_number (node, st_compare_pad (str[0], str_len[0],
+ str[1], str_len[1]) >= 0);
break;
- case OP_STRING_GT:
- rnc (st_compare_pad (s0, s0l, s1, s1l) > 0);
+ case OP_GT_STRING:
+ set_number (node, st_compare_pad (str[0], str_len[0],
+ str[1], str_len[1]) > 0);
break;
- case OP_STRING_LE:
- rnc (st_compare_pad (s0, s0l, s1, s1l) <= 0);
+ case OP_LE_STRING:
+ set_number (node, st_compare_pad (str[0], str_len[0],
+ str[1], str_len[1]) <= 0);
break;
- case OP_STRING_LT:
- rnc (st_compare_pad (s0, s0l, s1, s1l) < 0);
+ case OP_LT_STRING:
+ set_number (node, st_compare_pad (str[0], str_len[0],
+ str[1], str_len[1]) < 0);
break;
- case OP_STRING_NE:
- rnc (st_compare_pad (s0, s0l, s1, s1l) != 0);
+ case OP_NE_STRING:
+ set_number (node, st_compare_pad (str[0], str_len[0],
+ str[1], str_len[1]) != 0);
break;
/* Unary functions. */
case OP_NEG:
- rnc (-n0);
+ set_number (node, -num[0]);
break;
case OP_ABS:
- rnc (fabs (n0));
+ set_number (node, fabs (num[0]));
break;
case OP_ARCOS:
- rnc (acos (n0));
+ set_number_errno (node, acos (num[0]));
break;
case OP_ARSIN:
- rnc (asin (n0));
+ set_number_errno (node, asin (num[0]));
break;
case OP_ARTAN:
- rnc (atan (n0));
+ set_number_errno (node, atan (num[0]));
break;
case OP_COS:
- rnc (cos (n0));
+ set_number_errno (node, cos (num[0]));
break;
case OP_EXP:
- rnc (exp (n0));
+ set_number_errno (node, exp (num[0]));
break;
case OP_LG10:
- rnc (log10 (n0));
+ set_number_errno (node, log10 (num[0]));
break;
case OP_LN:
- rnc (log (n0));
+ set_number_errno (node, log (num[0]));
break;
case OP_MOD10:
- rnc (fmod (n0, 10));
+ set_number_errno (node, fmod (num[0], 10));
break;
case OP_RND:
- rnc (n0 >= 0.0 ? floor (n0 + 0.5) : -floor (-n0 + 0.5));
+ if (num[0] >= 0.0)
+ set_number_errno (node, floor (num[0] + 0.5));
+ else
+ set_number_errno (node, -floor (-num[0] + 0.5));
break;
case OP_SIN:
- rnc (sin (n0));
+ set_number_errno (node, sin (num[0]));
break;
case OP_SQRT:
- rnc (sqrt (n0));
+ set_number_errno (node, sqrt (num[0]));
break;
case OP_TAN:
- rnc (tan (n0));
+ set_number_errno (node, tan (num[0]));
break;
case OP_TRUNC:
- rnc (n0 >= 0.0 ? floor (n0) : -floor (-n0));
+ if (num[0] >= 0.0)
+ set_number_errno (node, floor (num[0]));
+ else
+ set_number_errno (node, -floor (-num[0]));
break;
/* N-ary numeric functions. */
case OP_ANY:
- if (n0 == SYSMIS)
- frnc (SYSMIS);
- else
- {
- int sysmis = 1;
- double ni;
-
- for (i = 1; i < n->n; i++)
- {
- ni = n->arg[i]->num_con.value;
- if (n0 == ni)
- {
- frnc (1.0);
- goto any_done;
- }
- if (ni != SYSMIS)
- sysmis = 0;
- }
- frnc (sysmis ? SYSMIS : 0.0);
- }
- any_done:
- break;
- case OP_ANY_STRING:
- for (i = 1; i < n->n; i++)
- if (!st_compare_pad (n->arg[0]->str_con.s, n->arg[0]->str_con.len,
- n->arg[i]->str_con.s, n->arg[i]->str_con.len))
- {
- frnc (1.0);
- goto any_string_done;
- }
- frnc (0.0);
- any_string_done:
+ {
+ double result = 0.0;
+ for (i = 1; i < n->n; i++)
+ if (num[0] == n->arg[i]->num_con.value)
+ {
+ result = 1.0;
+ break;
+ }
+ set_number (node, result);
+ }
+ break;
+ case OP_ANY_STRING:
+ {
+ double result = 0.0;
+ for (i = 1; i < n->n; i++)
+ if (!st_compare_pad (n->arg[0]->str_con.s, n->arg[0]->str_con.len,
+ n->arg[i]->str_con.s, n->arg[i]->str_con.len))
+ {
+ result = 1.0;
+ break;
+ }
+ set_number (node, result);
+ }
break;
case OP_CFVAR:
case OP_SD:
case OP_SUM:
case OP_VARIANCE:
+ /* FIXME */
+ break;
+
+ case OP_RANGE:
{
- double d[2] =
- {0.0, 0.0}; /* sum, sum of squares */
- double min = DBL_MAX; /* minimum value */
- double max = -DBL_MAX; /* maximum value */
- double ni; /* value of i'th argument */
- int nv = 0; /* number of valid arguments */
-
- for (i = 0; i < n->n; i++)
- {
- ni = n->arg[i]->num_con.value;
- if (ni != SYSMIS)
- {
- nv++;
- d[0] += ni;
- d[1] += ni * ni;
- if (ni < min)
- min = ni;
- if (ni > max)
- max = ni;
- }
- }
- if (n->type == OP_NMISS)
- frnc (i - nv);
- else if (n->type == OP_NVALID)
- frnc (nv);
- else if (nv >= (int) n->arg[i])
- {
- switch (n->type)
- {
- case OP_CFVAR:
- frnc (calc_cfvar (d, nv));
- break;
- case OP_MAX:
- frnc (max);
- break;
- case OP_MEAN:
- frnc (calc_mean (d, nv));
- break;
- case OP_MIN:
- frnc (min);
- break;
- case OP_SD:
- frnc (calc_stddev (calc_variance (d, nv)));
- break;
- case OP_SUM:
- frnc (d[0]);
- break;
- case OP_VARIANCE:
- frnc (calc_variance (d, nv));
- break;
- }
- }
- else
- frnc (SYSMIS);
+ double result = 0.0;
+
+ for (i = 1; i < n->n; i += 2)
+ {
+ double min = n->arg[i]->num_con.value;
+ double max = n->arg[i + 1]->num_con.value;
+ if (num[0] >= min && num[0] <= max)
+ {
+ result = 1.0;
+ break;
+ }
+ }
+ set_number (node, result);
}
break;
- case OP_RANGE:
- if (n0 == SYSMIS)
- frnc (SYSMIS);
- else
- {
- double min, max;
- int sysmis = 1;
-
- for (i = 1; i < n->n; i += 2)
- {
- min = n->arg[i]->num_con.value;
- max = n->arg[i + 1]->num_con.value;
- if (min == SYSMIS || max == SYSMIS)
- continue;
- sysmis = 0;
- if (n0 >= min && n0 <= max)
- {
- frnc (1.0);
- goto range_done;
- }
- }
- frnc (sysmis ? SYSMIS : 0.0);
- }
- range_done:
- break;
- case OP_RANGE_STRING:
- for (i = 1; i < n->n; i += 2)
- if (st_compare_pad (n->arg[0]->str_con.s, n->arg[0]->str_con.len,
- n->arg[i]->str_con.s, n->arg[i]->str_con.len) >= 0
- && st_compare_pad (n->arg[0]->str_con.s, n->arg[0]->str_con.len,
- n->arg[i + 1]->str_con.s,
- n->arg[i + 1]->str_con.len) <= 0)
- {
- frnc (1.0);
- goto range_str_done;
- }
- frnc (0.0);
- range_str_done:
- break;
- /* Time function. */
+ case OP_RANGE_STRING:
+ {
+ double result = 0.0;
+
+ for (i = 1; i < n->n; i += 2)
+ {
+ const char *min = n->arg[i]->str_con.s;
+ size_t min_len = n->arg[i]->str_con.len;
+ const char *max = n->arg[i + 1]->str_con.s;
+ size_t max_len = n->arg[i + 1]->str_con.len;
+
+ if (st_compare_pad (str[0], str_len[0], min, min_len) >= 0
+ && st_compare_pad (str[0], str_len[0], max, max_len) <= 0)
+ {
+ result = 1.0;
+ break;
+ }
+ }
+ set_number (node, result);
+ break;
+ }
+
+ /* Time functions. */
case OP_TIME_HMS:
- rnc (60. * (60. * n0 + n1) + n2);
+ {
+ double min, max;
+ min = min (num[0], min (num[1], num[2]));
+ max = max (num[0], max (num[1], num[2]));
+ if (min < 0. && max > 0.)
+ break;
+ set_number (node, 60. * (60. * num[0] + num[1]) + num[2]);
+ }
+ break;
+ case OP_CTIME_DAYS:
+ set_number (node, num[0] / (60. * 60. * 24.));
+ break;
+ case OP_CTIME_HOURS:
+ set_number (node, num[0] / (60. * 60.));
+ break;
+ case OP_CTIME_MINUTES:
+ set_number (node, num[0] / 60.);
+ break;
+ case OP_TIME_DAYS:
+ set_number (node, num[0] * (60. * 60. * 24.));
+ break;
+ case OP_CTIME_SECONDS:
+ set_number (node, num[0]);
break;
/* Date construction functions. */
case OP_DATE_DMY:
- rnc (60. * 60. * 24. * yrmoda (n2, n1, n0));
+ set_number (node, 60. * 60. * 24. * yrmoda (num[2], num[1], num[0]));
break;
case OP_DATE_MDY:
- rnc (60. * 60. * 24. * yrmoda (n2, n0, n1));
+ set_number (node, 60. * 60. * 24. * yrmoda (num[2], num[0], num[1]));
break;
case OP_DATE_MOYR:
- rnc (60. * 60. * 24. * yrmoda (n1, n0, 1));
+ set_number (node, 60. * 60. * 24. * yrmoda (num[1], num[0], 1));
break;
case OP_DATE_QYR:
- rnc (60. * 60. * 24. * yrmoda (n1, 3 * (int) n0 - 2, 1));
+ set_number (node,
+ 60. * 60. * 24. * yrmoda (num[1], 3 * (int) num[0] - 2, 1));
break;
case OP_DATE_WKYR:
{
- double t = yrmoda (n1, 1, 1);
+ double t = yrmoda (num[1], 1, 1);
+ if (num[0] < 0. || num[0] > 53.)
+ break;
if (t != SYSMIS)
- t = 60. * 60. * 24. * (t + 7. * (n0 - 1));
- rnc (t);
+ t = 60. * 60. * 24. * (t + 7. * (num[0] - 1));
+ set_number (node, t);
}
break;
case OP_DATE_YRDAY:
{
- double t = yrmoda (n0, 1, 1);
+ double t = yrmoda (num[0], 1, 1);
if (t != SYSMIS)
- t = 60. * 60. * 24. * (t + n0 - 1);
- rnc (t);
+ t = 60. * 60. * 24. * (t + num[1] - 1);
+ set_number (node, t);
}
break;
case OP_YRMODA:
- rnc (yrmoda (n0, n1, n2));
+ set_number (node, yrmoda (num[0], num[1], num[2]));
break;
+
/* Date extraction functions. */
case OP_XDATE_DATE:
- rnc (floor (n0 / 60. / 60. / 24.) * 60. * 60. * 24.);
+ set_number_errno (node,
+ floor (num[0] / 60. / 60. / 24.) * 60. * 60. * 24.);
break;
case OP_XDATE_HOUR:
- rnc (fmod (floor (n0 / 60. / 60.), 24.));
+ set_number_errno (node, fmod (floor (num[0] / 60. / 60.), 24.));
break;
case OP_XDATE_JDAY:
- rnc (julian_to_jday (n0 / 86400.));
+ set_number (node, julian_to_jday (num[0] / 86400.));
break;
case OP_XDATE_MDAY:
{
int day;
- julian_to_calendar (n0 / 86400., NULL, NULL, &day);
- rnc (day);
+ julian_to_calendar (num[0] / 86400., NULL, NULL, &day);
+ set_number (node, day);
}
break;
case OP_XDATE_MINUTE:
- rnc (fmod (floor (n0 / 60.), 60.));
+ set_number_errno (node, fmod (floor (num[0] / 60.), 60.));
break;
case OP_XDATE_MONTH:
{
int month;
- julian_to_calendar (n0 / 86400., NULL, &month, NULL);
- rnc (month);
+ julian_to_calendar (num[0] / 86400., NULL, &month, NULL);
+ set_number (node, month);
}
break;
case OP_XDATE_QUARTER:
{
int month;
- julian_to_calendar (n0 / 86400., NULL, &month, NULL);
- rnc ((month - 1) / 3 + 1);
+ julian_to_calendar (num[0] / 86400., NULL, &month, NULL);
+ set_number (node, (month - 1) / 3 + 1);
}
break;
case OP_XDATE_SECOND:
- rnc (fmod (n0, 60.));
+ set_number_errno (node, fmod (num[0], 60.));
break;
case OP_XDATE_TDAY:
- rnc (floor (n0 / 60. / 60. / 24.));
+ set_number_errno (node, floor (num[0] / 60. / 60. / 24.));
break;
case OP_XDATE_TIME:
- rnc (n0 - floor (n0 / 60. / 60. / 24.) * 60. * 60. * 24.);
+ set_number_errno (node, num[0] - (floor (num[0] / 60. / 60. / 24.)
+ * 60. * 60. * 24.));
break;
case OP_XDATE_WEEK:
- rnc ((julian_to_jday (n0) - 1) / 7 + 1);
+ set_number (node, (julian_to_jday (num[0]) - 1) / 7 + 1);
break;
case OP_XDATE_WKDAY:
- rnc (julian_to_wday (n0));
+ set_number (node, julian_to_wday (num[0]));
break;
case OP_XDATE_YEAR:
{
int year;
- julian_to_calendar (n0 / 86400., &year, NULL, NULL);
- rnc (year);
+ julian_to_calendar (num[0] / 86400., &year, NULL, NULL);
+ set_number (node, year);
}
break;
/* String functions. */
case OP_CONCAT:
{
- len = s0l;
- memcpy (strbuf, s0, len);
+ char string[256];
+ int length = str_len[0];
+ memcpy (string, str[0], length);
for (i = 1; i < n->n; i++)
{
- add = sl (i);
- if (add + len > 255)
- add = 255 - len;
- memcpy (&strbuf[len], s (i), add);
- len += add;
+ int add = n->arg[i]->str_con.len;
+ if (add + length > 255)
+ add = 255 - length;
+ memcpy (&string[length], n->arg[i]->str_con.s, add);
+ length += add;
}
- n = repl_str_con (n, strbuf, len);
+ set_string (node, string, length);
}
break;
- case OP_INDEX:
- rnc (s1l ? str_search (s0, s0l, s1, s1l) : SYSMIS);
- break;
- case OP_INDEX_OPT:
- if (n2 == SYSMIS || (int) n2 <= 0 || s1l % (int) n2)
- {
- msg (SW, _("While optimizing a constant expression, there was "
- "a bad value for the third argument to INDEX."));
- frnc (SYSMIS);
- }
- else
- {
- int pos = 0;
- int c = s1l / (int) n2;
- int r;
-
- for (i = 0; i < c; i++)
- {
- r = str_search (s0, s0l, s (i), sl (i));
- if (r < pos || pos == 0)
- pos = r;
- }
- frnc (pos);
- }
- break;
- case OP_RINDEX:
- rnc (str_rsearch (s0, s0l, s1, s1l));
- break;
- case OP_RINDEX_OPT:
- if (n2 == SYSMIS || (int) n2 <= 0 || s1l % (int) n2)
- {
- msg (SE, _("While optimizing a constant expression, there was "
- "a bad value for the third argument to RINDEX."));
- frnc (SYSMIS);
- }
- else
- {
- int pos = 0;
- int c = s1l / n2;
- int r;
-
- for (i = 0; i < c; i++)
- {
- r = str_rsearch (s0, s0l, s (i), sl (i));
- if (r > pos)
- pos = r;
- }
- frnc (pos);
- }
+ case OP_INDEX_2:
+ case OP_INDEX_3:
+ case OP_RINDEX_2:
+ case OP_RINDEX_3:
+ {
+ int result, chunk_width, chunk_cnt;
+
+ if (n->type == OP_INDEX_2 || n->type == OP_RINDEX_2)
+ chunk_width = str_len[1];
+ else
+ chunk_width = num[2];
+ if (chunk_width <= 0 || chunk_width > str_len[1]
+ || str_len[1] % chunk_width != 0)
+ break;
+ chunk_cnt = str_len[1] / chunk_width;
+
+ result = 0;
+ for (i = 0; i < chunk_cnt; i++)
+ {
+ const char *chunk = str[1] + chunk_width * i;
+ int ofs;
+ if (n->type == OP_INDEX_2 || n->type == OP_INDEX_3)
+ {
+ ofs = str_search (str[0], str_len[0], chunk, chunk_width);
+ if (ofs < result || result == 0)
+ result = ofs;
+ }
+ else
+ {
+ ofs = str_rsearch (str[0], str_len[0], chunk, chunk_width);
+ if (ofs > result)
+ result = ofs;
+ }
+ }
+ set_number (node, result);
+ }
break;
case OP_LENGTH:
- frnc (s0l);
+ set_number (node, str_len[0]);
break;
case OP_LOWER:
{
char *cp;
- for (cp = &s0[s0l]; cp >= s0; cp--)
- *cp = tolower ((unsigned char) (*cp));
- n = repl_str_con (n, s0, s0l);
+ for (cp = str[0]; cp < str[0] + str_len[0]; cp++)
+ *cp = tolower ((unsigned char) *cp);
}
break;
case OP_UPPER:
{
char *cp;
- for (cp = &s0[s0[0] + 1]; cp > s0; cp--)
- *cp = toupper ((unsigned char) (*cp));
- n = repl_str_con (n, s0, s0l);
+ for (cp = str[0]; cp < str[0] + str_len[0]; cp++)
+ *cp = toupper ((unsigned char) *cp);
}
break;
case OP_LPAD:
- case OP_LPAD_OPT:
case OP_RPAD:
- case OP_RPAD_OPT:
{
- int c;
-
- if (n1 == SYSMIS)
- {
- n = repl_str_con (n, NULL, 0);
- break;
- }
- len = n1;
- len = range (len, 1, 255);
- add = max (n1 - s0l, 0);
-
- if (n->type == OP_LPAD_OPT || n->type == OP_RPAD_OPT)
- {
- if (s2l < 1)
- {
- c = n->type == OP_LPAD_OPT ? 'L' : 'R';
- msg (SE, _("Third argument to %cPAD() must be at least one "
- "character in length."), c);
- c = ' ';
- }
- else
- c = s2[0];
- }
- else
- c = ' ';
-
- if (n->type == OP_LPAD || n->type == OP_LPAD_OPT)
- memmove (&s0[add], s0, len);
- if (n->type == OP_LPAD || n->type == OP_LPAD_OPT)
- memset (s0, c, add);
- else
- memset (&s0[s0l], c, add);
-
- n = repl_str_con (n, s0, len);
+ char string[256];
+ int len, pad_len;
+ char pad_char;
+
+ /* Target length. */
+ len = num[1];
+ if (len < 1 || len > 255)
+ break;
+
+ /* Pad character. */
+ if (str_len[2] != 1)
+ break;
+ pad_char = str[2][0];
+
+ if (str_len[0] >= len)
+ len = str_len[0];
+ pad_len = len - str_len[0];
+ if (n->type == OP_LPAD)
+ {
+ memset (string, pad_char, pad_len);
+ memcpy (string + pad_len, str[0], str_len[0]);
+ }
+ else
+ {
+ memcpy (string, str[0], str_len[0]);
+ memset (string + str_len[0], pad_char, pad_len);
+ }
+
+ set_string (node, string, len);
}
break;
case OP_LTRIM:
- case OP_LTRIM_OPT:
case OP_RTRIM:
- case OP_RTRIM_OPT:
{
- int c = ' ';
- char *cp = s0;
-
- if (n->type == OP_LTRIM_OPT || n->type == OP_RTRIM_OPT)
- {
- if (s1l < 1)
- {
- c = n->type == OP_LTRIM_OPT ? 'L' : 'R';
- msg (SE, _("Second argument to %cTRIM() must be at least one "
- "character in length."), c);
- }
- else
- c = s1[0];
- }
- len = s0l;
- if (n->type == OP_LTRIM || n->type == OP_LTRIM_OPT)
- {
- while (*cp == c && cp < &s0[len])
- cp++;
- len -= cp - s0;
- }
+ char pad_char;
+ const char *cp = str[0];
+ int len = str_len[0];
+
+ /* Pad character. */
+ if (str_len[1] != 1)
+ break;
+ pad_char = str[1][0];
+
+ if (n->type == OP_LTRIM)
+ while (len > 0 && *cp == pad_char)
+ cp++, len--;
else
- while (len > 0 && s0[len - 1] == c)
+ while (len > 0 && str[0][len - 1] == pad_char)
len--;
- n = repl_str_con (n, cp, len);
- }
- break;
- case OP_NUMBER:
- case OP_NUMBER_OPT:
- {
- union value v;
- struct data_in di;
-
- di.s = s0;
- di.e = s0 + s0l;
- di.v = &v;
- di.flags = DI_IGNORE_ERROR;
- di.f1 = 1;
-
- if (n->type == OP_NUMBER_OPT)
- {
- di.format.type = (int) n->arg[1];
- di.format.w = (int) n->arg[2];
- di.format.d = (int) n->arg[3];
- }
- else
- {
- di.format.type = FMT_F;
- di.format.w = s0l;
- di.format.d = 0;
- }
-
- data_in (&di);
- frnc (v.f);
- }
- break;
- case OP_STRING:
- {
- union value v;
- struct fmt_spec f;
- f.type = (int) n->arg[1];
- f.w = (int) n->arg[2];
- f.d = (int) n->arg[3];
- v.f = n0;
-
- assert ((formats[f.type].cat & FCAT_STRING) == 0);
- data_out (strbuf, &f, &v);
- n = repl_str_con (n, strbuf, f.w);
+ set_string (node, cp, len);
}
break;
- case OP_SUBSTR:
- case OP_SUBSTR_OPT:
+ case OP_SUBSTR_2:
+ case OP_SUBSTR_3:
{
- int pos = (int) n1;
- if (pos > s0l || pos <= 0 || n1 == SYSMIS
- || (n->type == OP_SUBSTR_OPT && n2 == SYSMIS))
- n = repl_str_con (n, NULL, 0);
+ int pos = (int) num[1];
+ if (pos > str_len[0] || pos <= 0 || num[1] == SYSMIS
+ || (n->type == OP_SUBSTR_3 && num[2] == SYSMIS))
+ set_string (node, NULL, 0);
else
{
- if (n->type == OP_SUBSTR_OPT)
+ int len;
+ if (n->type == OP_SUBSTR_3)
{
- len = (int) n2;
- if (len + pos - 1 > s0l)
- len = s0l - pos + 1;
+ len = (int) num[2];
+ if (len + pos - 1 > str_len[0])
+ len = str_len[0] - pos + 1;
}
else
- len = s0l - pos + 1;
- n = repl_str_con (n, &s0[pos - 1], len);
+ len = str_len[0] - pos + 1;
+ set_string (node, &str[0][pos - 1], len);
}
}
break;
/* Weirdness. */
- case OP_INV:
- rnc (1.0 / n0);
- break;
case OP_MOD:
- if (n0 == 0.0 && n1 == SYSMIS)
- frnc (0.0);
+ if (num[0] == 0.0 && num[1] == SYSMIS)
+ set_number (node, 0.0);
else
- rnc (fmod (n0, n1));
+ set_number (node, fmod (num[0], num[1]));
break;
case OP_NUM_TO_BOOL:
- if (n0 == 0.0)
- n0 = 0.0;
- else if (n0 == 1.0)
- n0 = 1.0;
- else if (n0 != SYSMIS)
+ if (num[0] == 0.0)
+ num[0] = 0.0;
+ else if (num[0] == 1.0)
+ num[0] = 1.0;
+ else if (num[0] != SYSMIS)
{
msg (SE, _("When optimizing a constant expression, an integer "
"that was being used as an Boolean value was found "
"to have a constant value other than 0, 1, or SYSMIS."));
- n0 = 0.0;
+ num[0] = 0.0;
}
- rnc (n0);
+ set_number (node, num[0]);
break;
}
- return n;
}
-#undef n0
-#undef n1
-#undef n2
-
-#undef s0
-#undef s0l
-#undef s1
-#undef s1l
-#undef s2
-#undef s2l
-#undef s
-#undef sl
-
-#undef rnc
-#undef frnc
+static void
+evaluate_tree_with_missing (union any_node **node UNUSED, size_t count UNUSED)
+{
+ /* FIXME */
+}
-static struct nonterm_node *
-repl_num_con (struct nonterm_node * n, double d)
+static void
+collapse_node (union any_node **node, size_t child_idx)
{
- int i;
- if (!finite (d) || errno)
- d = SYSMIS;
- else
- for (i = 0; i < n->n; i++)
- if (n->arg[i]->type == OP_NUM_CON && n->arg[i]->num_con.value == SYSMIS)
- {
- d = SYSMIS;
- break;
- }
- return force_repl_num_con (n, d);
+ struct nonterm_node *nonterm = &(*node)->nonterm;
+ union any_node *child;
+
+ child = nonterm->arg[child_idx];
+ nonterm->arg[child_idx] = NULL;
+ free_node (*node);
+ *node = child;
}
-static struct nonterm_node *
-force_repl_num_con (struct nonterm_node * n, double d)
+
+static void
+set_number (union any_node **node, double value)
{
struct num_con_node *num;
+
+ free_node (*node);
- if (!finite (d) || errno)
- d = SYSMIS;
- free_node ((union any_node *) n);
- num = xmalloc (sizeof *num);
+ *node = xmalloc (sizeof *num);
+ num = &(*node)->num_con;
num->type = OP_NUM_CON;
- num->value = d;
- return (struct nonterm_node *) num;
+ num->value = finite (value) ? value : SYSMIS;
+}
+
+static void
+set_number_errno (union any_node **node, double value)
+{
+ if (errno == EDOM || errno == ERANGE)
+ value = SYSMIS;
+ set_number (node, value);
}
-static struct nonterm_node *
-repl_str_con (struct nonterm_node * n, char *s, int len)
+static void
+set_string (union any_node **node, const char *string, size_t length)
{
struct str_con_node *str;
/* The ordering here is important since the source string may be
part of a subnode of n. */
- str = xmalloc (sizeof *str + len - 1);
+ str = xmalloc (sizeof *str + length - 1);
str->type = OP_STR_CON;
- str->len = len;
- memcpy (str->s, s, len);
- free_node ((union any_node *) n);
- return (struct nonterm_node *) str;
+ str->len = length;
+ memcpy (str->s, string, length);
+ free_node (*node);
+ *node = (union any_node *) str;
}
/* Returns the number of days since 10 Oct 1582 for the date
month = floor (month + EPSILON);
day = floor (day + EPSILON);
- if (year >= 0. && year <= 199.)
+ if (year >= 0. && year <= 29.)
+ year += 2000.;
+ else if (year >= 30. && year <= 99.)
year += 1900.;
if ((year < 1582. || year > 19999.)
|| (year == 1582. && (month < 10. || (month == 10. && day < 15.)))
- || (month < -1 || month > 13)
- || (day < -1 || day > 32))
+ || (month < 0 || month > 13)
+ || (day < 0 || day > 31))
return SYSMIS;
return calendar_to_julian (year, month, day);
}
static void
dump_node (struct expr_dump_state *eds, union any_node * n)
{
- if (n->type == OP_AND || n->type == OP_OR)
- {
- int i;
-
- dump_node (eds, n->nonterm.arg[0]);
- for (i = 1; i < n->nonterm.n; i++)
- {
- dump_node (eds, n->nonterm.arg[i]);
- emit (eds, n->type);
- }
- return;
- }
- else if (n->type < OP_TERMINAL)
+ if (IS_NONTERMINAL (n->type))
{
int i;
for (i = 0; i < n->nonterm.n; i++)
emit (eds, (int) n->nonterm.arg[n->nonterm.n + 1]);
emit (eds, (int) n->nonterm.arg[n->nonterm.n + 2]);
}
- return;
}
-
- emit (eds, n->type);
- if (n->type == OP_NUM_CON)
- emit_num_con (eds, n->num_con.value);
- else if (n->type == OP_STR_CON)
- emit_str_con (eds, n->str_con.s, n->str_con.len);
- else if (n->type == OP_NUM_VAR || n->type == OP_STR_VAR
- || n->type == OP_STR_MIS)
- emit_var (eds, n->var.v);
- else if (n->type == OP_NUM_LAG || n->type == OP_STR_LAG)
+ else
{
- emit_var (eds, n->lag.v);
- emit (eds, n->lag.lag);
+ emit (eds, n->type);
+ if (n->type == OP_NUM_CON)
+ emit_num_con (eds, n->num_con.value);
+ else if (n->type == OP_STR_CON)
+ emit_str_con (eds, n->str_con.s, n->str_con.len);
+ else if (n->type == OP_NUM_VAR || n->type == OP_STR_VAR)
+ emit_var (eds, n->var.v);
+ else if (n->type == OP_NUM_LAG || n->type == OP_STR_LAG)
+ {
+ emit_var (eds, n->lag.v);
+ emit (eds, n->lag.lag);
+ }
+ else if (n->type == OP_NUM_SYS || n->type == OP_NUM_VAL)
+ emit (eds, n->var.v->fv);
+ else
+ assert (n->type == OP_CASENUM);
}
- else if (n->type == OP_NUM_SYS || n->type == OP_NUM_VAL)
- emit (eds, n->var.v->fv);
- else
- assert (n->type == OP_CASENUM);
}
static void