1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 /* AIX requires this to be the first thing in the file. */
23 #define alloca __builtin_alloca
31 #ifndef alloca /* predefined by HP cc +Olibcalls */
38 #if TIME_WITH_SYS_TIME
59 #include "julcal/julcal.h"
68 /* FIXME: This could be even more efficient if we caught SYSMIS when
69 it first reared its ugly head, then threw it into an entirely new
70 switch that handled SYSMIS aggressively like all the code does now.
71 But I've spent a couple of weeks on the expression code, and that's
72 enough to make anyone sick. For that matter, it could be more
73 efficient if I hand-coded it in assembly for a dozen processors,
74 but I'm not going to do that either. */
76 /* These macros are defined differently depending on the way that
77 the stack is managed. (i.e., I have to adapt the code to inferior
80 void CHECK_STRING_SPACE(int x): Assure that at least X+1 bytes of
81 space are available in the string evaluation stack.
83 unsigned char *ALLOC_STRING_SPACE(int x): Return a pointer to X+1
84 bytes of space. CHECK_STRING_SPACE must have previously been
85 called with an argument of at least X. */
88 #define CHECK_STRING_SPACE(X) /* nothing to do! */
89 #define ALLOC_STRING_SPACE(X) \
91 #else /* !PAGED_STACK */
92 #define CHECK_STRING_SPACE(X) \
95 if (str_stk + X >= str_end) \
97 e->str_size += 1024; \
98 e->str_stk = xrealloc (e->str_stk, e->str_size); \
99 str_end = e->str_stk + e->str_size - 1; \
104 #define ALLOC_STRING_SPACE(X) \
105 (str_stk += X + 1, str_stk - X - 1)
106 #endif /* !PAGED_STACK */
109 expr_evaluate (struct expression *e, struct ccase *c, union value *v)
111 unsigned char *op = e->op;
112 double *dbl = e->num;
113 unsigned char *str = e->str;
115 unsigned char *str_stk = e->str_stk;
116 unsigned char *str_end = e->str_stk + e->str_size - 1;
118 struct variable **vars = e->var;
122 union value *sp = e->stack;
131 for (i = 1; i < *op; i++)
133 if (sp[i].f == SYSMIS)
146 for (i = 1; i < *op; i++)
148 if (sp[i].f == SYSMIS)
160 if (sp[0].f == SYSMIS)
162 if (approx_eq (sp[1].f, 0.0))
165 else if (sp[1].f == SYSMIS)
173 else if (approx_eq (sp[0].f, 0.0) && approx_eq (sp[1].f, 0.0))
176 sp->f = pow (sp[0].f, sp[1].f);
180 /* Note that the equality operator (==) may be used here
181 (instead of approx_eq) because booleans are always
182 *exactly* 0, 1, or SYSMIS.
184 Truth table (in order of detection):
196 1 and SYSMIS = SYSMIS
197 SYSMIS and SYSMIS = SYSMIS
201 SYSMIS and 1 = SYSMIS
205 if (sp[0].f == 0.0); /* 1 */
206 else if (sp[1].f == 0.0)
208 else if (sp[1].f == SYSMIS)
209 sp->f = SYSMIS; /* 3 */
212 /* Truth table (in order of detection):
225 SYSMIS or SYSMIS = SYSMIS
233 if (sp[0].f == 1.0); /* 1 */
234 else if (sp[1].f == 1.0)
236 else if (sp[1].f == SYSMIS)
237 sp->f = SYSMIS; /* 3 */
242 else if (sp[0].f == 1.0)
248 if (sp[0].f != SYSMIS)
250 if (sp[1].f == SYSMIS)
253 sp->f = approx_eq (sp[0].f, sp[1].f);
258 if (sp[0].f != SYSMIS)
260 if (sp[1].f == SYSMIS)
263 sp->f = approx_ge (sp[0].f, sp[1].f);
268 if (sp[0].f != SYSMIS)
270 if (sp[1].f == SYSMIS)
273 sp->f = approx_gt (sp[0].f, sp[1].f);
278 if (sp[0].f != SYSMIS)
280 if (sp[1].f == SYSMIS)
283 sp->f = approx_le (sp[0].f, sp[1].f);
288 if (sp[0].f != SYSMIS)
290 if (sp[1].f == SYSMIS)
293 sp->f = approx_lt (sp[0].f, sp[1].f);
298 if (sp[0].f != SYSMIS)
300 if (sp[1].f == SYSMIS)
303 sp->f = approx_ne (sp[0].f, sp[1].f);
307 /* String operators. */
310 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
311 &sp[1].c[1], sp[1].c[0]) == 0;
315 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
316 &sp[1].c[1], sp[1].c[0]) >= 0;
320 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
321 &sp[1].c[1], sp[1].c[0]) > 0;
325 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
326 &sp[1].c[1], sp[1].c[0]) <= 0;
330 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
331 &sp[1].c[1], sp[1].c[0]) < 0;
335 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
336 &sp[1].c[1], sp[1].c[0]) != 0;
339 /* Unary functions. */
346 sp->f = fabs (sp->f);
352 sp->f = acos (sp->f);
361 sp->f = asin (sp->f);
368 sp->f = atan (sp->f);
387 sp->f = log10 (sp->f);
396 sp->f = log10 (sp->f);
403 sp->f = fmod (sp->f, 10);
409 sp->f = floor (sp->f + 0.5);
411 sp->f = -floor (-sp->f + 0.5);
422 sp->f = sqrt (sp->f);
440 sp->f = floor (sp->f);
442 sp->f = -floor (-sp->f);
446 /* N-ary numeric functions. */
455 for (i = 1; i <= n_args; i++)
456 if (approx_eq (sp[0].f, sp[i].f))
461 else if (sp[i].f != SYSMIS)
463 sp->f = sysmis ? SYSMIS : 0.0;
471 for (i = 1; i <= n_args; i++)
472 if (!st_compare_pad (&sp[0].c[1], sp[0].c[0],
473 &sp[i].c[1], sp[i].c[0]))
489 for (i = 0; i < n_args; i++)
490 if (sp[i].f != SYSMIS)
494 sum[1] += sp[i].f * sp[i].f;
499 sp->f = calc_cfvar (sum, nv);
506 double max = -DBL_MAX;
509 for (i = 0; i < n_args; i++)
510 if (sp[i].f != SYSMIS)
530 for (i = 0; i < n_args; i++)
531 if (sp[i].f != SYSMIS)
539 sp->f = calc_mean (sum, nv);
546 double min = DBL_MAX;
549 for (i = 0; i < n_args; i++)
550 if (sp[i].f != SYSMIS)
568 for (i = 0; i < n_args; i++)
569 if (sp[i].f == SYSMIS)
580 for (i = 0; i < n_args; i++)
581 if (sp[i].f != SYSMIS)
594 for (i = 1; i <= n_args; i += 2)
595 if (sp[i].f == SYSMIS || sp[i + 1].f == SYSMIS)
597 else if (approx_ge (sp[0].f, sp[i].f)
598 && approx_le (sp[0].f, sp[i + 1].f))
605 sp->f = sysmis ? SYSMIS : 0.0;
608 case OP_RANGE_STRING:
613 for (i = 1; i <= n_args; i += 2)
614 if (st_compare_pad (&sp[0].c[1], sp[0].c[0],
615 &sp[i].c[1], sp[i].c[0]) >= 0
616 && st_compare_pad (&sp[0].c[1], sp[0].c[0],
617 &sp[i + 1].c[1], sp[i + 1].c[0]) <= 0)
631 sum[0] = sum[1] = 0.0;
634 for (i = 0; i < n_args; i++)
635 if (sp[i].f != SYSMIS)
639 sum[1] += sp[i].f * sp[i].f;
644 sp->f = calc_stddev (calc_variance (sum, nv));
654 for (i = 0; i < n_args; i++)
655 if (sp[i].f != SYSMIS)
672 sum[0] = sum[1] = 0.0;
675 for (i = 0; i < n_args; i++)
676 if (sp[i].f != SYSMIS)
680 sum[1] += sp[i].f * sp[i].f;
685 sp->f = calc_variance (sum, nv);
689 /* Time construction function. */
692 if (sp[0].f == SYSMIS || sp[1].f == SYSMIS || sp[2].f == SYSMIS)
695 sp->f = 60. * (60. * sp[0].f + sp[1].f) + sp[2].f;
698 /* Date construction functions. */
701 sp->f = yrmoda (sp[2].f, sp[1].f, sp[0].f);
703 sp->f *= 60. * 60. * 24.;
707 sp->f = yrmoda (sp[2].f, sp[0].f, sp[1].f);
709 sp->f *= 60. * 60. * 24.;
713 sp->f = yrmoda (sp[1].f, sp[0].f, 1);
715 sp->f *= 60. * 60. * 24.;
719 if (sp[0].f == SYSMIS)
723 sp->f = yrmoda (sp[1].f, sp[0].f * 3 - 2, 1);
725 sp->f *= 60. * 60. * 24.;
730 if (sp[0].f == SYSMIS)
734 sp[1].f = yrmoda (sp[1].f, 1, 1);
736 sp[1].f = 60. * 60. * 24. * (sp[1].f + 7. * (floor (sp[0].f) - 1.));
742 if (sp[1].f == SYSMIS)
746 sp->f = yrmoda (sp[0].f, 1, 1);
748 sp->f = 60. * 60. * 24. * (sp->f + floor (sp[1].f) - 1);
753 sp->f = yrmoda (sp[0].f, sp[1].f, sp[2].f);
756 /* Date extraction functions. */
759 sp->f = floor (sp->f / 60. / 60. / 24.) * 60. * 60. * 24.;
763 sp->f = fmod (floor (sp->f / 60. / 60.), 24.);
767 sp->f = 86400. * julian_to_jday (sp->f / 86400.);
773 julian_to_calendar (sp->f / 86400., NULL, NULL, &day);
777 case OP_XDATE_MINUTE:
779 sp->f = fmod (floor (sp->f / 60.), 60.);
785 julian_to_calendar (sp->f / 86400., NULL, &month, NULL);
789 case OP_XDATE_QUARTER:
793 julian_to_calendar (sp->f / 86400., NULL, &month, NULL);
794 sp->f = (month - 1) / 3 + 1;
797 case OP_XDATE_SECOND:
799 sp->f = fmod (sp->f, 60.);
803 sp->f = floor (sp->f / 60. / 60. / 24.);
807 sp->f -= floor (sp->f / 60. / 60. / 24.) * 60. * 60. * 24.;
811 sp->f = (julian_to_jday (sp->f / 86400.) - 1) / 7 + 1;
815 sp->f = julian_to_wday (sp->f / 86400.);
821 julian_to_calendar (sp->f / 86400., &year, NULL, NULL);
826 /* String functions. */
832 CHECK_STRING_SPACE (255);
833 dest = ALLOC_STRING_SPACE (255);
837 for (i = 0; i < n_args; i++)
840 if (sp[i].c[0] + dest[0] < 255)
842 memcpy (&dest[dest[0] + 1], &sp[i].c[1], sp[i].c[0]);
843 dest[0] += sp[i].c[0];
847 memcpy (&dest[dest[0] + 1], &sp[i].c[1], 255 - dest[0]);
861 int last = sp[0].c[0] - sp[1].c[0];
862 for (i = 0; i <= last; i++)
863 if (!memcmp (&sp[0].c[i + 1], &sp[0].c[1], sp[0].c[0]))
873 /* Length of each search string. */
874 int part_len = sp[2].f;
877 if (sp[1].c[0] == 0 || part_len <= 0 || sp[2].f == SYSMIS
878 || sp[1].c[0] % part_len != 0)
882 /* Last possible index. */
883 int last = sp[0].c[0] - part_len;
885 for (i = 0; i <= last; i++)
886 for (j = 0; j < sp[1].c[0]; j += part_len)
887 if (!memcmp (&sp[0].c[i], &sp[1].c[j], part_len))
902 for (i = sp[0].c[0] - sp[1].c[0]; i >= 0; i--)
903 if (!memcmp (&sp[0].c[i + 1], &sp[0].c[1], sp[0].c[0]))
913 /* Length of each search string. */
914 int part_len = sp[2].f;
917 if (sp[1].c[0] == 0 || part_len <= 0 || sp[2].f == SYSMIS
918 || sp[1].c[0] % part_len != 0)
922 for (i = sp[0].c[0] - part_len; i >= 0; i--)
923 for (j = 0; j < sp[1].c[0]; j += part_len)
924 if (!memcmp (&sp[0].c[i], &sp[1].c[j], part_len))
937 for (i = sp[0].c[0]; i >= 1; i--)
938 sp[0].c[i] = tolower ((unsigned char) (sp[0].c[i]));
941 for (i = sp[0].c[0]; i >= 1; i--)
942 sp[0].c[i] = toupper ((unsigned char) (sp[0].c[i]));
949 if (sp[1].f == SYSMIS || len < 0 || len > 255)
951 else if (len > sp[0].c[0])
955 CHECK_STRING_SPACE (len);
956 dest = ALLOC_STRING_SPACE (len);
958 memset (&dest[1], ' ', len - sp->c[0]);
959 memcpy (&dest[len - sp->c[0] + 1], &sp->c[1], sp->c[0]);
969 if (sp[1].f == SYSMIS || len < 0 || len > 255 || sp[2].c[0] != 1)
971 else if (len > sp[0].c[0])
975 CHECK_STRING_SPACE (len);
976 dest = ALLOC_STRING_SPACE (len);
978 memset (&dest[1], sp[2].c[1], len - sp->c[0]);
979 memcpy (&dest[len - sp->c[0] + 1], &sp->c[1], sp->c[0]);
989 if (sp[1].f == SYSMIS || len < 0 || len > 255)
991 else if (len > sp[0].c[0])
995 CHECK_STRING_SPACE (len);
996 dest = ALLOC_STRING_SPACE (len);
998 memcpy (&dest[1], &sp->c[1], sp->c[0]);
999 memset (&dest[sp->c[0] + 1], ' ', len - sp->c[0]);
1009 if (len < 0 || len > 255 || sp[2].c[0] != 1)
1011 else if (len > sp[0].c[0])
1013 unsigned char *dest;
1015 CHECK_STRING_SPACE (len);
1016 dest = ALLOC_STRING_SPACE (len);
1018 memcpy (&dest[1], &sp->c[1], sp->c[0]);
1019 memset (&dest[sp->c[0] + 1], sp[2].c[1], len - sp->c[0]);
1026 int len = sp[0].c[0];
1029 while (i <= len && sp[0].c[i] == ' ')
1033 sp[0].c[i] = sp[0].c[0] - i;
1034 sp->c = &sp[0].c[i];
1041 if (sp[1].c[0] != 1)
1045 int len = sp[0].c[0];
1046 int cmp = sp[1].c[1];
1049 while (i <= len && sp[0].c[i] == cmp)
1053 sp[0].c[i] = sp[0].c[0] - i;
1054 sp->c = &sp[0].c[i];
1061 while (sp[0].c[sp[0].c[0]] == ' ')
1066 if (sp[1].c[0] != 1)
1070 /* Note that NULs are not allowed in strings. This code
1071 needs to change if this decision is changed. */
1072 int cmp = sp[1].c[1];
1073 while (sp[0].c[sp[0].c[0]] == cmp)
1082 di.e = &sp->c[1] + sp->c[0];
1084 di.flags = DI_IGNORE_ERROR;
1086 di.format.type = FMT_F;
1087 di.format.w = sp->c[0];
1096 di.e = &sp->c[1] + sp->c[0];
1098 di.flags = DI_IGNORE_ERROR;
1100 di.format.type = *op++;
1101 di.format.w = *op++;
1102 di.format.d = *op++;
1109 unsigned char *dest;
1115 CHECK_STRING_SPACE (f.w);
1116 dest = ALLOC_STRING_SPACE (f.w);
1119 data_out (&dest[1], &f, sp);
1129 if (index < 1 || index > sp[0].c[0])
1134 sp->c[index] = sp->c[0] - index;
1147 if (sp[1].f == SYSMIS || sp[2].f == SYSMIS || index < 1
1148 || index > sp[0].c[0] || n < 1)
1155 sp->c[index] = sp->c[0] - index;
1166 if (sp->f != SYSMIS)
1170 if (sp->f != SYSMIS)
1173 case OP_NUM_TO_BOOL:
1174 if (approx_eq (sp->f, 0.0))
1176 else if (approx_eq (sp->f, 1.0))
1178 else if (sp->f != SYSMIS)
1180 msg (SE, _("A number being treated as a Boolean in an "
1181 "expression was found to have a value other than "
1182 "0 (false), 1 (true), or the system-missing value. "
1183 "The result was forced to 0."));
1191 if (sp[0].f != SYSMIS)
1193 if (sp[1].f == SYSMIS)
1195 if (approx_ne (sp[0].f, 0.0))
1199 sp->f = fmod (sp[0].f, sp[1].f);
1203 if (sp->f != SYSMIS)
1204 sp->f *= rng_get_double_normal (pspp_rng ());
1207 if (sp->f != SYSMIS)
1208 sp->f *= rng_get_double (pspp_rng ());
1211 if (sp[0].f == SYSMIS || !finite (sp[0].f))
1216 case OP_VEC_ELEM_NUM:
1218 int rindx = sp[0].f + EPSILON;
1219 const struct vector *v = dict_get_vector (default_dict, *op++);
1221 if (sp[0].f == SYSMIS || rindx < 1 || rindx > v->cnt)
1223 if (sp[0].f == SYSMIS)
1224 msg (SE, _("SYSMIS is not a valid index value for vector "
1225 "%s. The result will be set to SYSMIS."),
1228 msg (SE, _("%g is not a valid index value for vector %s. "
1229 "The result will be set to SYSMIS."),
1234 sp->f = c->data[v->var[rindx - 1]->fv].f;
1237 case OP_VEC_ELEM_STR:
1239 int rindx = sp[0].f + EPSILON;
1240 const struct vector *vect = dict_get_vector (default_dict, *op++);
1243 if (sp[0].f == SYSMIS || rindx < 1 || rindx > vect->cnt)
1245 if (sp[0].f == SYSMIS)
1246 msg (SE, _("SYSMIS is not a valid index value for vector "
1247 "%s. The result will be set to the empty "
1251 msg (SE, _("%g is not a valid index value for vector %s. "
1252 "The result will be set to the empty string."),
1253 sp[0].f, vect->name);
1254 CHECK_STRING_SPACE (0);
1255 sp->c = ALLOC_STRING_SPACE (0);
1260 v = vect->var[rindx - 1];
1261 CHECK_STRING_SPACE (v->width);
1262 sp->c = ALLOC_STRING_SPACE (v->width);
1263 sp->c[0] = v->width;
1264 memcpy (&sp->c[1], c->data[v->fv].s, v->width);
1275 CHECK_STRING_SPACE (*str);
1276 sp->c = ALLOC_STRING_SPACE (*str);
1277 memcpy (sp->c, str, *str + 1);
1282 sp->f = c->data[(*vars)->fv].f;
1283 if (is_num_user_missing (sp->f, *vars))
1289 int width = (*vars)->width;
1292 CHECK_STRING_SPACE (width);
1293 sp->c = ALLOC_STRING_SPACE (width);
1295 memcpy (&sp->c[1], &c->data[(*vars)->fv], width);
1301 struct ccase *c = lagged_case (*op++);
1308 sp->f = c->data[(*vars)->fv].f;
1309 if (is_num_user_missing (sp->f, *vars))
1317 struct ccase *c = lagged_case (*op++);
1318 int width = (*vars)->width;
1321 CHECK_STRING_SPACE (width);
1322 sp->c = ALLOC_STRING_SPACE (width);
1326 memset (sp->c, ' ', width);
1328 memcpy (&sp->c[1], &c->data[(*vars)->fv], width);
1335 sp->f = c->data[*op++].f == SYSMIS;
1339 sp->f = is_str_user_missing (c->data[(*vars)->fv].s, *vars);
1344 sp->f = c->data[*op++].f;
1348 sp->f = vfm_sink_info.ncases + 1;
1355 #if GLOBAL_DEBUGGING
1356 printf (_("evaluate_expression(): not implemented: %s\n"),
1359 printf (_("evaluate_expression(): not implemented: %d\n"), op[-1]);
1367 if (e->type != EX_STRING)
1369 double value = sp->f;
1370 if (!finite (value))
1381 memcpy (e->str_stack, sp->c, sp->c[0] + 1);
1382 v->c = e->str_stack;