1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 /* AIX requires this to be the first thing in the file. */
23 #define alloca __builtin_alloca
31 #ifndef alloca /* predefined by HP cc +Olibcalls */
38 #if TIME_WITH_SYS_TIME
59 #include "julcal/julcal.h"
69 /* FIXME: This could be even more efficient if we caught SYSMIS when
70 it first reared its ugly head, then threw it into an entirely new
71 switch that handled SYSMIS aggressively like all the code does now.
72 But I've spent a couple of weeks on the expression code, and that's
73 enough to make anyone sick. For that matter, it could be more
74 efficient if I hand-coded it in assembly for a dozen processors,
75 but I'm not going to do that either. */
77 /* These macros are defined differently depending on the way that
78 the stack is managed. (i.e., I have to adapt the code to inferior
81 void CHECK_STRING_SPACE(int x): Assure that at least X+1 bytes of
82 space are available in the string evaluation stack.
84 unsigned char *ALLOC_STRING_SPACE(int x): Return a pointer to X+1
85 bytes of space. CHECK_STRING_SPACE must have previously been
86 called with an argument of at least X. */
89 #define CHECK_STRING_SPACE(X) /* nothing to do! */
90 #define ALLOC_STRING_SPACE(X) \
92 #else /* !PAGED_STACK */
93 #define CHECK_STRING_SPACE(X) \
96 if (str_stk + X >= str_end) \
98 e->str_size += 1024; \
99 e->str_stk = xrealloc (e->str_stk, e->str_size); \
100 str_end = e->str_stk + e->str_size - 1; \
105 #define ALLOC_STRING_SPACE(X) \
106 (str_stk += X + 1, str_stk - X - 1)
107 #endif /* !PAGED_STACK */
110 expr_evaluate (struct expression *e, struct ccase *c, union value *v)
112 unsigned char *op = e->op;
113 double *dbl = e->num;
114 unsigned char *str = e->str;
116 unsigned char *str_stk = e->str_stk;
117 unsigned char *str_end = e->str_stk + e->str_size - 1;
119 struct variable **vars = e->var;
123 union value *sp = e->stack;
132 for (i = 1; i < *op; i++)
134 if (sp[i].f == SYSMIS)
147 for (i = 1; i < *op; i++)
149 if (sp[i].f == SYSMIS)
161 if (sp[0].f == SYSMIS)
163 if (approx_eq (sp[1].f, 0.0))
166 else if (sp[1].f == SYSMIS)
174 else if (approx_eq (sp[0].f, 0.0) && approx_eq (sp[1].f, 0.0))
177 sp->f = pow (sp[0].f, sp[1].f);
181 /* Note that the equality operator (==) may be used here
182 (instead of approx_eq) because booleans are always
183 *exactly* 0, 1, or SYSMIS.
185 Truth table (in order of detection):
197 1 and SYSMIS = SYSMIS
198 SYSMIS and SYSMIS = SYSMIS
202 SYSMIS and 1 = SYSMIS
206 if (sp[0].f == 0.0); /* 1 */
207 else if (sp[1].f == 0.0)
209 else if (sp[1].f == SYSMIS)
210 sp->f = SYSMIS; /* 3 */
213 /* Truth table (in order of detection):
226 SYSMIS or SYSMIS = SYSMIS
234 if (sp[0].f == 1.0); /* 1 */
235 else if (sp[1].f == 1.0)
237 else if (sp[1].f == SYSMIS)
238 sp->f = SYSMIS; /* 3 */
243 else if (sp[0].f == 1.0)
249 if (sp[0].f != SYSMIS)
251 if (sp[1].f == SYSMIS)
254 sp->f = approx_eq (sp[0].f, sp[1].f);
259 if (sp[0].f != SYSMIS)
261 if (sp[1].f == SYSMIS)
264 sp->f = approx_ge (sp[0].f, sp[1].f);
269 if (sp[0].f != SYSMIS)
271 if (sp[1].f == SYSMIS)
274 sp->f = approx_gt (sp[0].f, sp[1].f);
279 if (sp[0].f != SYSMIS)
281 if (sp[1].f == SYSMIS)
284 sp->f = approx_le (sp[0].f, sp[1].f);
289 if (sp[0].f != SYSMIS)
291 if (sp[1].f == SYSMIS)
294 sp->f = approx_lt (sp[0].f, sp[1].f);
299 if (sp[0].f != SYSMIS)
301 if (sp[1].f == SYSMIS)
304 sp->f = approx_ne (sp[0].f, sp[1].f);
308 /* String operators. */
311 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
312 &sp[1].c[1], sp[1].c[0]) == 0;
316 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
317 &sp[1].c[1], sp[1].c[0]) >= 0;
321 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
322 &sp[1].c[1], sp[1].c[0]) > 0;
326 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
327 &sp[1].c[1], sp[1].c[0]) <= 0;
331 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
332 &sp[1].c[1], sp[1].c[0]) < 0;
336 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
337 &sp[1].c[1], sp[1].c[0]) != 0;
340 /* Unary functions. */
347 sp->f = fabs (sp->f);
353 sp->f = acos (sp->f);
362 sp->f = asin (sp->f);
369 sp->f = atan (sp->f);
388 sp->f = log10 (sp->f);
397 sp->f = log10 (sp->f);
404 sp->f = fmod (sp->f, 10);
410 sp->f = floor (sp->f + 0.5);
412 sp->f = -floor (-sp->f + 0.5);
423 sp->f = sqrt (sp->f);
441 sp->f = floor (sp->f);
443 sp->f = -floor (-sp->f);
447 /* N-ary numeric functions. */
456 for (i = 1; i <= n_args; i++)
457 if (approx_eq (sp[0].f, sp[i].f))
462 else if (sp[i].f != SYSMIS)
464 sp->f = sysmis ? SYSMIS : 0.0;
472 for (i = 1; i <= n_args; i++)
473 if (!st_compare_pad (&sp[0].c[1], sp[0].c[0],
474 &sp[i].c[1], sp[i].c[0]))
490 for (i = 0; i < n_args; i++)
491 if (sp[i].f != SYSMIS)
495 sum[1] += sp[i].f * sp[i].f;
500 sp->f = calc_cfvar (sum, nv);
507 double max = -DBL_MAX;
510 for (i = 0; i < n_args; i++)
511 if (sp[i].f != SYSMIS)
531 for (i = 0; i < n_args; i++)
532 if (sp[i].f != SYSMIS)
540 sp->f = calc_mean (sum, nv);
547 double min = DBL_MAX;
550 for (i = 0; i < n_args; i++)
551 if (sp[i].f != SYSMIS)
569 for (i = 0; i < n_args; i++)
570 if (sp[i].f == SYSMIS)
581 for (i = 0; i < n_args; i++)
582 if (sp[i].f != SYSMIS)
595 for (i = 1; i <= n_args; i += 2)
596 if (sp[i].f == SYSMIS || sp[i + 1].f == SYSMIS)
598 else if (approx_ge (sp[0].f, sp[i].f)
599 && approx_le (sp[0].f, sp[i + 1].f))
606 sp->f = sysmis ? SYSMIS : 0.0;
609 case OP_RANGE_STRING:
614 for (i = 1; i <= n_args; i += 2)
615 if (st_compare_pad (&sp[0].c[1], sp[0].c[0],
616 &sp[i].c[1], sp[i].c[0]) >= 0
617 && st_compare_pad (&sp[0].c[1], sp[0].c[0],
618 &sp[i + 1].c[1], sp[i + 1].c[0]) <= 0)
632 sum[0] = sum[1] = 0.0;
635 for (i = 0; i < n_args; i++)
636 if (sp[i].f != SYSMIS)
640 sum[1] += sp[i].f * sp[i].f;
645 sp->f = calc_stddev (calc_variance (sum, nv));
655 for (i = 0; i < n_args; i++)
656 if (sp[i].f != SYSMIS)
673 sum[0] = sum[1] = 0.0;
676 for (i = 0; i < n_args; i++)
677 if (sp[i].f != SYSMIS)
681 sum[1] += sp[i].f * sp[i].f;
686 sp->f = calc_variance (sum, nv);
690 /* Time construction function. */
693 if (sp[0].f == SYSMIS || sp[1].f == SYSMIS || sp[2].f == SYSMIS)
696 sp->f = 60. * (60. * sp[0].f + sp[1].f) + sp[2].f;
699 /* Date construction functions. */
702 sp->f = yrmoda (sp[2].f, sp[1].f, sp[0].f);
704 sp->f *= 60. * 60. * 24.;
708 sp->f = yrmoda (sp[2].f, sp[0].f, sp[1].f);
710 sp->f *= 60. * 60. * 24.;
713 (--sp)->f = yrmoda (sp[1].f, sp[0].f, 1);
715 sp->f *= 60. * 60. * 24.;
719 if (sp[0].f == SYSMIS)
723 sp->f = yrmoda (sp[1].f, sp[0].f * 3 - 2, 1);
725 sp->f *= 60. * 60. * 24.;
730 if (sp[0].f == SYSMIS)
734 sp[1].f = yrmoda (sp[1].f, 1, 1);
736 sp[1].f = 60. * 60. * 24. * (sp[1].f + 7. * (floor (sp[0].f) - 1.));
742 if (sp[1].f == SYSMIS)
746 sp->f = yrmoda (sp[0].f, 1, 1);
748 sp->f = 60. * 60. * 24. * (sp->f + floor (sp[1].f) - 1);
753 sp->f = yrmoda (sp[0].f, sp[1].f, sp[2].f);
756 /* Date extraction functions. */
759 sp->f = floor (sp->f / 60. / 60. / 24.) * 60. * 60. * 24.;
763 sp->f = fmod (floor (sp->f / 60. / 60.), 24.);
767 sp->f = 86400. * julian_to_jday (sp->f / 86400.);
773 julian_to_calendar (sp->f / 86400., NULL, NULL, &day);
777 case OP_XDATE_MINUTE:
779 sp->f = fmod (floor (sp->f / 60.), 60.);
785 julian_to_calendar (sp->f / 86400., NULL, &month, NULL);
789 case OP_XDATE_QUARTER:
793 julian_to_calendar (sp->f / 86400., NULL, &month, NULL);
794 sp->f = (month - 1) / 3 + 1;
797 case OP_XDATE_SECOND:
799 sp->f = fmod (sp->f, 60.);
803 sp->f = floor (sp->f / 60. / 60. / 24.);
807 sp->f -= floor (sp->f / 60. / 60. / 24.) * 60. * 60. * 24.;
811 sp->f = (julian_to_jday (sp->f / 86400.) - 1) / 7 + 1;
815 sp->f = julian_to_wday (sp->f / 86400.);
821 julian_to_calendar (sp->f / 86400., &year, NULL, NULL);
826 /* String functions. */
832 CHECK_STRING_SPACE (255);
833 dest = ALLOC_STRING_SPACE (255);
837 for (i = 0; i < n_args; i++)
840 if (sp[i].c[0] + dest[0] < 255)
842 memcpy (&dest[dest[0] + 1], &sp[i].c[1], sp[i].c[0]);
843 dest[0] += sp[i].c[0];
847 memcpy (&dest[dest[0] + 1], &sp[i].c[1], 255 - dest[0]);
861 int last = sp[0].c[0] - sp[1].c[0];
862 for (i = 0; i <= last; i++)
863 if (!memcmp (&sp[0].c[i + 1], &sp[0].c[1], sp[0].c[0]))
873 /* Length of each search string. */
874 int part_len = sp[2].f;
877 if (sp[1].c[0] == 0 || part_len <= 0 || sp[2].f == SYSMIS
878 || sp[1].c[0] % part_len != 0)
882 /* Last possible index. */
883 int last = sp[0].c[0] - part_len;
885 for (i = 0; i <= last; i++)
886 for (j = 0; j < sp[1].c[0]; j += part_len)
887 if (!memcmp (&sp[0].c[i], &sp[1].c[j], part_len))
902 for (i = sp[0].c[0] - sp[1].c[0]; i >= 0; i--)
903 if (!memcmp (&sp[0].c[i + 1], &sp[0].c[1], sp[0].c[0]))
913 /* Length of each search string. */
914 int part_len = sp[2].f;
917 if (sp[1].c[0] == 0 || part_len <= 0 || sp[2].f == SYSMIS
918 || sp[1].c[0] % part_len != 0)
922 for (i = sp[0].c[0] - part_len; i >= 0; i--)
923 for (j = 0; j < sp[1].c[0]; j += part_len)
924 if (!memcmp (&sp[0].c[i], &sp[1].c[j], part_len))
937 for (i = sp[0].c[0]; i >= 1; i--)
938 sp[0].c[i] = tolower ((unsigned char) (sp[0].c[i]));
941 for (i = sp[0].c[0]; i >= 1; i--)
942 sp[0].c[i] = toupper ((unsigned char) (sp[0].c[i]));
949 if (sp[1].f == SYSMIS || len < 0 || len > 255)
951 else if (len > sp[0].c[0])
955 CHECK_STRING_SPACE (len);
956 dest = ALLOC_STRING_SPACE (len);
958 memset (&dest[1], ' ', len - sp->c[0]);
959 memcpy (&dest[len - sp->c[0] + 1], &sp->c[1], sp->c[0]);
969 if (sp[1].f == SYSMIS || len < 0 || len > 255 || sp[2].c[0] != 1)
971 else if (len > sp[0].c[0])
975 CHECK_STRING_SPACE (len);
976 dest = ALLOC_STRING_SPACE (len);
978 memset (&dest[1], sp[2].c[1], len - sp->c[0]);
979 memcpy (&dest[len - sp->c[0] + 1], &sp->c[1], sp->c[0]);
989 if (sp[1].f == SYSMIS || len < 0 || len > 255)
991 else if (len > sp[0].c[0])
995 CHECK_STRING_SPACE (len);
996 dest = ALLOC_STRING_SPACE (len);
998 memcpy (&dest[1], &sp->c[1], sp->c[0]);
999 memset (&dest[sp->c[0] + 1], ' ', len - sp->c[0]);
1009 if (len < 0 || len > 255 || sp[2].c[0] != 1)
1011 else if (len > sp[0].c[0])
1013 unsigned char *dest;
1015 CHECK_STRING_SPACE (len);
1016 dest = ALLOC_STRING_SPACE (len);
1018 memcpy (&dest[1], &sp->c[1], sp->c[0]);
1019 memset (&dest[sp->c[0] + 1], sp[2].c[1], len - sp->c[0]);
1026 int len = sp[0].c[0];
1029 while (i <= len && sp[0].c[i] == ' ')
1033 sp[0].c[i] = sp[0].c[0] - i;
1034 sp->c = &sp[0].c[i];
1041 if (sp[1].c[0] != 1)
1045 int len = sp[0].c[0];
1046 int cmp = sp[1].c[1];
1049 while (i <= len && sp[0].c[i] == cmp)
1053 sp[0].c[i] = sp[0].c[0] - i;
1054 sp->c = &sp[0].c[i];
1061 while (sp[0].c[sp[0].c[0]] == ' ')
1066 if (sp[1].c[0] != 1)
1070 /* Note that NULs are not allowed in strings. This code
1071 needs to change if this decision is changed. */
1072 int cmp = sp[1].c[1];
1073 while (sp[0].c[sp[0].c[0]] == cmp)
1082 di.e = &sp->c[1] + sp->c[0];
1084 di.flags = DI_IGNORE_ERROR;
1086 di.format.type = FMT_F;
1087 di.format.w = sp->c[0];
1096 di.e = &sp->c[1] + sp->c[0];
1098 di.flags = DI_IGNORE_ERROR;
1100 di.format.type = *op++;
1101 di.format.w = *op++;
1102 di.format.d = *op++;
1109 unsigned char *dest;
1115 CHECK_STRING_SPACE (f.w);
1116 dest = ALLOC_STRING_SPACE (f.w);
1119 data_out (&dest[1], &f, sp);
1129 if (index < 1 || index > sp[0].c[0])
1134 sp->c[index] = sp->c[0] - index;
1147 if (sp[1].f == SYSMIS || sp[2].f == SYSMIS || index < 1
1148 || index > sp[0].c[0] || n < 1)
1155 sp->c[index] = sp->c[0] - index;
1166 if (sp->f != SYSMIS)
1170 if (sp->f != SYSMIS)
1173 case OP_NUM_TO_BOOL:
1174 if (approx_eq (sp->f, 0.0))
1176 else if (approx_eq (sp->f, 1.0))
1178 else if (sp->f != SYSMIS)
1180 msg (SE, _("A number being treated as a Boolean in an "
1181 "expression was found to have a value other than "
1182 "0 (false), 1 (true), or the system-missing value. "
1183 "The result was forced to 0."));
1191 if (sp[0].f != SYSMIS)
1193 if (sp[1].f == SYSMIS)
1195 if (approx_ne (sp[0].f, 0.0))
1199 sp->f = fmod (sp[0].f, sp[1].f);
1203 if (sp->f != SYSMIS)
1204 sp->f = rand_normal (sp->f);
1207 if (sp->f != SYSMIS)
1208 sp->f = rand_uniform (sp->f);
1211 if (sp[0].f == SYSMIS || !finite (sp[0].f))
1216 case OP_VEC_ELEM_NUM:
1218 int rindx = sp[0].f + EPSILON;
1219 struct vector *v = &vec[*op++];
1221 if (sp[0].f == SYSMIS || rindx < 1 || rindx > v->nv)
1223 if (sp[0].f == SYSMIS)
1224 msg (SE, _("SYSMIS is not a valid index value for vector "
1225 "%s. The result will be set to SYSMIS."),
1228 msg (SE, _("%g is not a valid index value for vector %s. "
1229 "The result will be set to SYSMIS."),
1234 sp->f = c->data[v->v[rindx - 1]->fv].f;
1237 case OP_VEC_ELEM_STR:
1239 int rindx = sp[0].f + EPSILON;
1240 struct vector *vect = &vec[*op++];
1243 if (sp[0].f == SYSMIS || rindx < 1 || rindx > vect->nv)
1245 if (sp[0].f == SYSMIS)
1246 msg (SE, _("SYSMIS is not a valid index value for vector "
1247 "%s. The result will be set to the empty "
1251 msg (SE, _("%g is not a valid index value for vector %s. "
1252 "The result will be set to the empty string."),
1253 sp[0].f, vect->name);
1254 CHECK_STRING_SPACE (0);
1255 sp->c = ALLOC_STRING_SPACE (0);
1260 v = vect->v[rindx - 1];
1261 CHECK_STRING_SPACE (v->width);
1262 sp->c = ALLOC_STRING_SPACE (v->width);
1263 sp->c[0] = v->width;
1264 memcpy (&sp->c[1], c->data[v->fv].s, v->width);
1275 CHECK_STRING_SPACE (*str);
1276 sp->c = ALLOC_STRING_SPACE (*str);
1277 memcpy (sp->c, str, *str + 1);
1282 sp->f = c->data[(*vars)->fv].f;
1283 if (is_num_user_missing (sp->f, *vars))
1289 int width = (*vars)->width;
1292 CHECK_STRING_SPACE (width);
1293 sp->c = ALLOC_STRING_SPACE (width);
1295 memcpy (&sp->c[1], &c->data[(*vars)->fv], width);
1301 struct ccase *c = lagged_case (*op++);
1308 sp->f = c->data[(*vars)->fv].f;
1309 if (is_num_user_missing (sp->f, *vars))
1317 struct ccase *c = lagged_case (*op++);
1318 int width = (*vars)->width;
1321 CHECK_STRING_SPACE (width);
1322 sp->c = ALLOC_STRING_SPACE (width);
1326 memset (sp->c, ' ', width);
1328 memcpy (&sp->c[1], &c->data[(*vars)->fv], width);
1335 sp->f = c->data[*op++].f == SYSMIS;
1339 sp->f = is_str_user_missing (c->data[(*vars)->fv].s, *vars);
1344 sp->f = c->data[*op++].f;
1348 sp->f = vfm_sink_info.ncases + 1;
1355 /* This case prevents Checker from choking. */
1361 #if GLOBAL_DEBUGGING
1362 printf (_("evaluate_expression(): not implemented: %s\n"),
1365 printf (_("evaluate_expression(): not implemented: %d\n"), op[-1]);
1373 if (e->type != EX_STRING)
1375 double value = sp->f;
1376 if (!finite (value))
1387 memcpy (e->str_stack, sp->c, sp->c[0] + 1);
1388 v->c = e->str_stack;