1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 #if TIME_WITH_SYS_TIME
43 #include "julcal/julcal.h"
53 /* FIXME: This could be even more efficient if we caught SYSMIS when
54 it first reared its ugly head, then threw it into an entirely new
55 switch that handled SYSMIS aggressively like all the code does now.
56 But I've spent a couple of weeks on the expression code, and that's
57 enough to make anyone sick. For that matter, it could be more
58 efficient if I hand-coded it in assembly for a dozen processors,
59 but I'm not going to do that either. */
62 expr_evaluate (struct expression *e, struct ccase *c, union value *v)
64 unsigned char *op = e->op;
66 unsigned char *str = e->str;
67 struct variable **vars = e->var;
71 union value *sp = e->stack;
82 for (i = 1; i < *op; i++)
84 if (sp[i].f == SYSMIS)
97 for (i = 1; i < *op; i++)
99 if (sp[i].f == SYSMIS)
111 if (sp[0].f == SYSMIS)
113 if (approx_eq (sp[1].f, 0.0))
116 else if (sp[1].f == SYSMIS)
124 else if (approx_eq (sp[0].f, 0.0) && approx_eq (sp[1].f, 0.0))
127 sp->f = pow (sp[0].f, sp[1].f);
131 /* Note that the equality operator (==) may be used here
132 (instead of approx_eq) because booleans are always
133 *exactly* 0, 1, or SYSMIS.
135 Truth table (in order of detection):
147 1 and SYSMIS = SYSMIS
148 SYSMIS and SYSMIS = SYSMIS
152 SYSMIS and 1 = SYSMIS
156 if (sp[0].f == 0.0); /* 1 */
157 else if (sp[1].f == 0.0)
159 else if (sp[1].f == SYSMIS)
160 sp->f = SYSMIS; /* 3 */
163 /* Truth table (in order of detection):
176 SYSMIS or SYSMIS = SYSMIS
184 if (sp[0].f == 1.0); /* 1 */
185 else if (sp[1].f == 1.0)
187 else if (sp[1].f == SYSMIS)
188 sp->f = SYSMIS; /* 3 */
193 else if (sp[0].f == 1.0)
199 if (sp[0].f != SYSMIS)
201 if (sp[1].f == SYSMIS)
204 sp->f = approx_eq (sp[0].f, sp[1].f);
209 if (sp[0].f != SYSMIS)
211 if (sp[1].f == SYSMIS)
214 sp->f = approx_ge (sp[0].f, sp[1].f);
219 if (sp[0].f != SYSMIS)
221 if (sp[1].f == SYSMIS)
224 sp->f = approx_gt (sp[0].f, sp[1].f);
229 if (sp[0].f != SYSMIS)
231 if (sp[1].f == SYSMIS)
234 sp->f = approx_le (sp[0].f, sp[1].f);
239 if (sp[0].f != SYSMIS)
241 if (sp[1].f == SYSMIS)
244 sp->f = approx_lt (sp[0].f, sp[1].f);
249 if (sp[0].f != SYSMIS)
251 if (sp[1].f == SYSMIS)
254 sp->f = approx_ne (sp[0].f, sp[1].f);
258 /* String operators. */
261 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
262 &sp[1].c[1], sp[1].c[0]) == 0;
266 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
267 &sp[1].c[1], sp[1].c[0]) >= 0;
271 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
272 &sp[1].c[1], sp[1].c[0]) > 0;
276 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
277 &sp[1].c[1], sp[1].c[0]) <= 0;
281 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
282 &sp[1].c[1], sp[1].c[0]) < 0;
286 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
287 &sp[1].c[1], sp[1].c[0]) != 0;
290 /* Unary functions. */
297 sp->f = fabs (sp->f);
303 sp->f = acos (sp->f);
312 sp->f = asin (sp->f);
319 sp->f = atan (sp->f);
338 sp->f = log10 (sp->f);
347 sp->f = log10 (sp->f);
354 sp->f = fmod (sp->f, 10);
360 sp->f = floor (sp->f + 0.5);
362 sp->f = -floor (-sp->f + 0.5);
373 sp->f = sqrt (sp->f);
391 sp->f = floor (sp->f);
393 sp->f = -floor (-sp->f);
397 /* N-ary numeric functions. */
406 for (i = 1; i <= n_args; i++)
407 if (approx_eq (sp[0].f, sp[i].f))
412 else if (sp[i].f != SYSMIS)
414 sp->f = sysmis ? SYSMIS : 0.0;
422 for (i = 1; i <= n_args; i++)
423 if (!st_compare_pad (&sp[0].c[1], sp[0].c[0],
424 &sp[i].c[1], sp[i].c[0]))
440 for (i = 0; i < n_args; i++)
441 if (sp[i].f != SYSMIS)
445 sum[1] += sp[i].f * sp[i].f;
450 sp->f = calc_cfvar (sum, nv);
457 double max = -DBL_MAX;
460 for (i = 0; i < n_args; i++)
461 if (sp[i].f != SYSMIS)
481 for (i = 0; i < n_args; i++)
482 if (sp[i].f != SYSMIS)
490 sp->f = calc_mean (sum, nv);
497 double min = DBL_MAX;
500 for (i = 0; i < n_args; i++)
501 if (sp[i].f != SYSMIS)
519 for (i = 0; i < n_args; i++)
520 if (sp[i].f == SYSMIS)
531 for (i = 0; i < n_args; i++)
532 if (sp[i].f != SYSMIS)
545 for (i = 1; i <= n_args; i += 2)
546 if (sp[i].f == SYSMIS || sp[i + 1].f == SYSMIS)
548 else if (approx_ge (sp[0].f, sp[i].f)
549 && approx_le (sp[0].f, sp[i + 1].f))
556 sp->f = sysmis ? SYSMIS : 0.0;
559 case OP_RANGE_STRING:
564 for (i = 1; i <= n_args; i += 2)
565 if (st_compare_pad (&sp[0].c[1], sp[0].c[0],
566 &sp[i].c[1], sp[i].c[0]) >= 0
567 && st_compare_pad (&sp[0].c[1], sp[0].c[0],
568 &sp[i + 1].c[1], sp[i + 1].c[0]) <= 0)
582 sum[0] = sum[1] = 0.0;
585 for (i = 0; i < n_args; i++)
586 if (sp[i].f != SYSMIS)
590 sum[1] += sp[i].f * sp[i].f;
595 sp->f = calc_stddev (calc_variance (sum, nv));
605 for (i = 0; i < n_args; i++)
606 if (sp[i].f != SYSMIS)
623 sum[0] = sum[1] = 0.0;
626 for (i = 0; i < n_args; i++)
627 if (sp[i].f != SYSMIS)
631 sum[1] += sp[i].f * sp[i].f;
636 sp->f = calc_variance (sum, nv);
640 /* Time construction function. */
643 if (sp[0].f == SYSMIS || sp[1].f == SYSMIS || sp[2].f == SYSMIS)
646 sp->f = 60. * (60. * sp[0].f + sp[1].f) + sp[2].f;
649 /* Date construction functions. */
652 sp->f = yrmoda (sp[2].f, sp[1].f, sp[0].f);
654 sp->f *= 60. * 60. * 24.;
658 sp->f = yrmoda (sp[2].f, sp[0].f, sp[1].f);
660 sp->f *= 60. * 60. * 24.;
664 sp->f = yrmoda (sp[1].f, sp[0].f, 1);
666 sp->f *= 60. * 60. * 24.;
670 if (sp[0].f == SYSMIS)
674 sp->f = yrmoda (sp[1].f, sp[0].f * 3 - 2, 1);
676 sp->f *= 60. * 60. * 24.;
681 if (sp[0].f == SYSMIS)
685 sp[1].f = yrmoda (sp[1].f, 1, 1);
687 sp[1].f = 60. * 60. * 24. * (sp[1].f + 7. * (floor (sp[0].f) - 1.));
693 if (sp[1].f == SYSMIS)
697 sp->f = yrmoda (sp[0].f, 1, 1);
699 sp->f = 60. * 60. * 24. * (sp->f + floor (sp[1].f) - 1);
704 sp->f = yrmoda (sp[0].f, sp[1].f, sp[2].f);
707 /* Date extraction functions. */
710 sp->f = floor (sp->f / 60. / 60. / 24.) * 60. * 60. * 24.;
714 sp->f = fmod (floor (sp->f / 60. / 60.), 24.);
718 sp->f = 86400. * julian_to_jday (sp->f / 86400.);
724 julian_to_calendar (sp->f / 86400., NULL, NULL, &day);
728 case OP_XDATE_MINUTE:
730 sp->f = fmod (floor (sp->f / 60.), 60.);
736 julian_to_calendar (sp->f / 86400., NULL, &month, NULL);
740 case OP_XDATE_QUARTER:
744 julian_to_calendar (sp->f / 86400., NULL, &month, NULL);
745 sp->f = (month - 1) / 3 + 1;
748 case OP_XDATE_SECOND:
750 sp->f = fmod (sp->f, 60.);
754 sp->f = floor (sp->f / 60. / 60. / 24.);
758 sp->f -= floor (sp->f / 60. / 60. / 24.) * 60. * 60. * 24.;
762 sp->f = (julian_to_jday (sp->f / 86400.) - 1) / 7 + 1;
766 sp->f = julian_to_wday (sp->f / 86400.);
772 julian_to_calendar (sp->f / 86400., &year, NULL, NULL);
777 /* String functions. */
783 dest = pool_alloc (e->pool, 256);
787 for (i = 0; i < n_args; i++)
790 if (sp[i].c[0] + dest[0] < 255)
792 memcpy (&dest[dest[0] + 1], &sp[i].c[1], sp[i].c[0]);
793 dest[0] += sp[i].c[0];
797 memcpy (&dest[dest[0] + 1], &sp[i].c[1], 255 - dest[0]);
811 int last = sp[0].c[0] - sp[1].c[0];
812 for (i = 0; i <= last; i++)
813 if (!memcmp (&sp[0].c[i + 1], &sp[0].c[1], sp[0].c[0]))
823 /* Length of each search string. */
824 int part_len = sp[2].f;
827 if (sp[1].c[0] == 0 || part_len <= 0 || sp[2].f == SYSMIS
828 || sp[1].c[0] % part_len != 0)
832 /* Last possible index. */
833 int last = sp[0].c[0] - part_len;
835 for (i = 0; i <= last; i++)
836 for (j = 0; j < sp[1].c[0]; j += part_len)
837 if (!memcmp (&sp[0].c[i], &sp[1].c[j], part_len))
852 for (i = sp[0].c[0] - sp[1].c[0]; i >= 0; i--)
853 if (!memcmp (&sp[0].c[i + 1], &sp[0].c[1], sp[0].c[0]))
863 /* Length of each search string. */
864 int part_len = sp[2].f;
867 if (sp[1].c[0] == 0 || part_len <= 0 || sp[2].f == SYSMIS
868 || sp[1].c[0] % part_len != 0)
872 for (i = sp[0].c[0] - part_len; i >= 0; i--)
873 for (j = 0; j < sp[1].c[0]; j += part_len)
874 if (!memcmp (&sp[0].c[i], &sp[1].c[j], part_len))
887 for (i = sp[0].c[0]; i >= 1; i--)
888 sp[0].c[i] = tolower ((unsigned char) (sp[0].c[i]));
891 for (i = sp[0].c[0]; i >= 1; i--)
892 sp[0].c[i] = toupper ((unsigned char) (sp[0].c[i]));
899 if (sp[1].f == SYSMIS || len < 0 || len > 255)
901 else if (len > sp[0].c[0])
905 dest = pool_alloc (e->pool, len + 1);
907 memset (&dest[1], ' ', len - sp->c[0]);
908 memcpy (&dest[len - sp->c[0] + 1], &sp->c[1], sp->c[0]);
918 if (sp[1].f == SYSMIS || len < 0 || len > 255 || sp[2].c[0] != 1)
920 else if (len > sp[0].c[0])
924 dest = pool_alloc (e->pool, len + 1);
926 memset (&dest[1], sp[2].c[1], len - sp->c[0]);
927 memcpy (&dest[len - sp->c[0] + 1], &sp->c[1], sp->c[0]);
937 if (sp[1].f == SYSMIS || len < 0 || len > 255)
939 else if (len > sp[0].c[0])
943 dest = pool_alloc (e->pool, len + 1);
945 memcpy (&dest[1], &sp->c[1], sp->c[0]);
946 memset (&dest[sp->c[0] + 1], ' ', len - sp->c[0]);
956 if (len < 0 || len > 255 || sp[2].c[0] != 1)
958 else if (len > sp[0].c[0])
962 dest = pool_alloc (e->pool, len + 1);
964 memcpy (&dest[1], &sp->c[1], sp->c[0]);
965 memset (&dest[sp->c[0] + 1], sp[2].c[1], len - sp->c[0]);
972 int len = sp[0].c[0];
975 while (i <= len && sp[0].c[i] == ' ')
979 sp[0].c[i] = sp[0].c[0] - i;
991 int len = sp[0].c[0];
992 int cmp = sp[1].c[1];
995 while (i <= len && sp[0].c[i] == cmp)
999 sp[0].c[i] = sp[0].c[0] - i;
1000 sp->c = &sp[0].c[i];
1007 while (sp[0].c[sp[0].c[0]] == ' ')
1012 if (sp[1].c[0] != 1)
1016 /* Note that NULs are not allowed in strings. This code
1017 needs to change if this decision is changed. */
1018 int cmp = sp[1].c[1];
1019 while (sp[0].c[sp[0].c[0]] == cmp)
1028 di.e = &sp->c[1] + sp->c[0];
1030 di.flags = DI_IGNORE_ERROR;
1032 di.format.type = FMT_F;
1033 di.format.w = sp->c[0];
1042 di.e = &sp->c[1] + sp->c[0];
1044 di.flags = DI_IGNORE_ERROR;
1046 di.format.type = *op++;
1047 di.format.w = *op++;
1048 di.format.d = *op++;
1055 unsigned char *dest;
1061 dest = pool_alloc (e->pool, f.w + 1);
1064 data_out (&dest[1], &f, sp);
1074 if (index < 1 || index > sp[0].c[0])
1079 sp->c[index] = sp->c[0] - index;
1092 if (sp[1].f == SYSMIS || sp[2].f == SYSMIS || index < 1
1093 || index > sp[0].c[0] || n < 1)
1100 sp->c[index] = sp->c[0] - index;
1111 if (sp->f != SYSMIS)
1115 if (sp->f != SYSMIS)
1118 case OP_NUM_TO_BOOL:
1119 if (approx_eq (sp->f, 0.0))
1121 else if (approx_eq (sp->f, 1.0))
1123 else if (sp->f != SYSMIS)
1125 msg (SE, _("A number being treated as a Boolean in an "
1126 "expression was found to have a value other than "
1127 "0 (false), 1 (true), or the system-missing value. "
1128 "The result was forced to 0."));
1136 if (sp[0].f != SYSMIS)
1138 if (sp[1].f == SYSMIS)
1140 if (approx_ne (sp[0].f, 0.0))
1144 sp->f = fmod (sp[0].f, sp[1].f);
1148 if (sp->f != SYSMIS)
1149 sp->f *= rng_get_double_normal (pspp_rng ());
1152 if (sp->f != SYSMIS)
1153 sp->f *= rng_get_double (pspp_rng ());
1156 if (sp[0].f == SYSMIS || !finite (sp[0].f))
1161 case OP_VEC_ELEM_NUM:
1163 int rindx = sp[0].f + EPSILON;
1164 const struct vector *v = dict_get_vector (default_dict, *op++);
1166 if (sp[0].f == SYSMIS || rindx < 1 || rindx > v->cnt)
1168 if (sp[0].f == SYSMIS)
1169 msg (SE, _("SYSMIS is not a valid index value for vector "
1170 "%s. The result will be set to SYSMIS."),
1173 msg (SE, _("%g is not a valid index value for vector %s. "
1174 "The result will be set to SYSMIS."),
1179 sp->f = c->data[v->var[rindx - 1]->fv].f;
1182 case OP_VEC_ELEM_STR:
1184 int rindx = sp[0].f + EPSILON;
1185 const struct vector *vect = dict_get_vector (default_dict, *op++);
1188 if (sp[0].f == SYSMIS || rindx < 1 || rindx > vect->cnt)
1190 if (sp[0].f == SYSMIS)
1191 msg (SE, _("SYSMIS is not a valid index value for vector "
1192 "%s. The result will be set to the empty "
1196 msg (SE, _("%g is not a valid index value for vector %s. "
1197 "The result will be set to the empty string."),
1198 sp[0].f, vect->name);
1199 sp->c = pool_alloc (e->pool, 1);
1204 v = vect->var[rindx - 1];
1205 sp->c = pool_alloc (e->pool, v->width + 1);
1206 sp->c[0] = v->width;
1207 memcpy (&sp->c[1], c->data[v->fv].s, v->width);
1218 sp->c = pool_alloc (e->pool, *str + 1);
1219 memcpy (sp->c, str, *str + 1);
1224 sp->f = c->data[(*vars)->fv].f;
1225 if (is_num_user_missing (sp->f, *vars))
1231 int width = (*vars)->width;
1234 sp->c = pool_alloc (e->pool, width + 1);
1236 memcpy (&sp->c[1], &c->data[(*vars)->fv], width);
1242 struct ccase *c = lagged_case (*op++);
1249 sp->f = c->data[(*vars)->fv].f;
1250 if (is_num_user_missing (sp->f, *vars))
1258 struct ccase *c = lagged_case (*op++);
1259 int width = (*vars)->width;
1262 sp->c = pool_alloc (e->pool, width + 1);
1266 memset (sp->c, ' ', width);
1268 memcpy (&sp->c[1], &c->data[(*vars)->fv], width);
1275 sp->f = c->data[*op++].f == SYSMIS;
1279 sp->f = is_str_user_missing (c->data[(*vars)->fv].s, *vars);
1284 sp->f = c->data[*op++].f;
1288 sp->f = vfm_sink_info.ncases + 1;
1295 #if GLOBAL_DEBUGGING
1296 printf (_("evaluate_expression(): not implemented: %s\n"),
1299 printf (_("evaluate_expression(): not implemented: %d\n"), op[-1]);
1307 if (e->type != EX_STRING)
1309 double value = sp->f;
1310 if (!finite (value))