1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 /* AIX requires this to be the first thing in the file. */
23 #define alloca __builtin_alloca
31 #ifndef alloca /* predefined by HP cc +Olibcalls */
38 #if TIME_WITH_SYS_TIME
59 #include "julcal/julcal.h"
69 /* FIXME: This could be even more efficient if we caught SYSMIS when
70 it first reared its ugly head, then threw it into an entirely new
71 switch that handled SYSMIS aggressively like all the code does now.
72 But I've spent a couple of weeks on the expression code, and that's
73 enough to make anyone sick. For that matter, it could be more
74 efficient if I hand-coded it in assembly for a dozen processors,
75 but I'm not going to do that either. */
77 /* These macros are defined differently depending on the way that
78 the stack is managed. (i.e., I have to adapt the code to inferior
81 void CHECK_STRING_SPACE(int x): Assure that at least X+1 bytes of
82 space are available in the string evaluation stack.
84 unsigned char *ALLOC_STRING_SPACE(int x): Return a pointer to X+1
85 bytes of space. CHECK_STRING_SPACE must have previously been
86 called with an argument of at least X. */
89 #define CHECK_STRING_SPACE(X) /* nothing to do! */
90 #define ALLOC_STRING_SPACE(X) \
92 #else /* !PAGED_STACK */
93 #define CHECK_STRING_SPACE(X) \
96 if (str_stk + X >= str_end) \
98 e->str_size += 1024; \
99 e->str_stk = xrealloc (e->str_stk, e->str_size); \
100 str_end = e->str_stk + e->str_size - 1; \
105 #define ALLOC_STRING_SPACE(X) \
106 (str_stk += X + 1, str_stk - X - 1)
107 #endif /* !PAGED_STACK */
110 expr_evaluate (struct expression *e, struct ccase *c, union value *v)
112 unsigned char *op = e->op;
113 double *dbl = e->num;
114 unsigned char *str = e->str;
116 unsigned char *str_stk = e->str_stk;
117 unsigned char *str_end = e->str_stk + e->str_size - 1;
119 struct variable **vars = e->var;
123 union value *sp = e->stack;
132 for (i = 1; i < *op; i++)
134 if (sp[i].f == SYSMIS)
147 for (i = 1; i < *op; i++)
149 if (sp[i].f == SYSMIS)
161 if (sp[0].f == SYSMIS)
163 if (approx_eq (sp[1].f, 0.0))
166 else if (sp[1].f == SYSMIS)
174 else if (approx_eq (sp[0].f, 0.0) && approx_eq (sp[1].f, 0.0))
177 sp->f = pow (sp[0].f, sp[1].f);
181 /* Note that the equality operator (==) may be used here
182 (instead of approx_eq) because booleans are always
183 *exactly* 0, 1, or SYSMIS.
185 Truth table (in order of detection):
197 1 and SYSMIS = SYSMIS
198 SYSMIS and SYSMIS = SYSMIS
202 SYSMIS and 1 = SYSMIS
206 if (sp[0].f == 0.0); /* 1 */
207 else if (sp[1].f == 0.0)
209 else if (sp[1].f == SYSMIS)
210 sp->f = SYSMIS; /* 3 */
213 /* Truth table (in order of detection):
226 SYSMIS or SYSMIS = SYSMIS
234 if (sp[0].f == 1.0); /* 1 */
235 else if (sp[1].f == 1.0)
237 else if (sp[1].f == SYSMIS)
238 sp->f = SYSMIS; /* 3 */
243 else if (sp[0].f == 1.0)
249 if (sp[0].f != SYSMIS)
251 if (sp[1].f == SYSMIS)
254 sp->f = approx_eq (sp[0].f, sp[1].f);
259 if (sp[0].f != SYSMIS)
261 if (sp[1].f == SYSMIS)
264 sp->f = approx_ge (sp[0].f, sp[1].f);
269 if (sp[0].f != SYSMIS)
271 if (sp[1].f == SYSMIS)
274 sp->f = approx_gt (sp[0].f, sp[1].f);
279 if (sp[0].f != SYSMIS)
281 if (sp[1].f == SYSMIS)
284 sp->f = approx_le (sp[0].f, sp[1].f);
289 if (sp[0].f != SYSMIS)
291 if (sp[1].f == SYSMIS)
294 sp->f = approx_lt (sp[0].f, sp[1].f);
299 if (sp[0].f != SYSMIS)
301 if (sp[1].f == SYSMIS)
304 sp->f = approx_ne (sp[0].f, sp[1].f);
308 /* String operators. */
311 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
312 &sp[1].c[1], sp[1].c[0]) == 0;
316 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
317 &sp[1].c[1], sp[1].c[0]) >= 0;
321 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
322 &sp[1].c[1], sp[1].c[0]) > 0;
326 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
327 &sp[1].c[1], sp[1].c[0]) <= 0;
331 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
332 &sp[1].c[1], sp[1].c[0]) < 0;
336 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
337 &sp[1].c[1], sp[1].c[0]) != 0;
340 /* Unary functions. */
347 sp->f = fabs (sp->f);
353 sp->f = acos (sp->f);
362 sp->f = asin (sp->f);
369 sp->f = atan (sp->f);
388 sp->f = log10 (sp->f);
397 sp->f = log10 (sp->f);
404 sp->f = fmod (sp->f, 10);
410 sp->f = floor (sp->f + 0.5);
412 sp->f = -floor (-sp->f + 0.5);
423 sp->f = sqrt (sp->f);
441 sp->f = floor (sp->f);
443 sp->f = -floor (-sp->f);
447 /* N-ary numeric functions. */
456 for (i = 1; i <= n_args; i++)
457 if (approx_eq (sp[0].f, sp[i].f))
462 else if (sp[i].f != SYSMIS)
464 sp->f = sysmis ? SYSMIS : 0.0;
472 for (i = 1; i <= n_args; i++)
473 if (!st_compare_pad (&sp[0].c[1], sp[0].c[0],
474 &sp[i].c[1], sp[i].c[0]))
490 for (i = 0; i < n_args; i++)
491 if (sp[i].f != SYSMIS)
495 sum[1] += sp[i].f * sp[i].f;
500 sp->f = calc_cfvar (sum, nv);
507 double max = -DBL_MAX;
510 for (i = 0; i < n_args; i++)
511 if (sp[i].f != SYSMIS)
531 for (i = 0; i < n_args; i++)
532 if (sp[i].f != SYSMIS)
540 sp->f = calc_mean (sum, nv);
547 double min = DBL_MAX;
550 for (i = 0; i < n_args; i++)
551 if (sp[i].f != SYSMIS)
569 for (i = 0; i < n_args; i++)
570 if (sp[i].f == SYSMIS)
581 for (i = 0; i < n_args; i++)
582 if (sp[i].f != SYSMIS)
595 for (i = 1; i <= n_args; i += 2)
596 if (sp[i].f == SYSMIS || sp[i + 1].f == SYSMIS)
598 else if (approx_ge (sp[0].f, sp[i].f)
599 && approx_le (sp[0].f, sp[i + 1].f))
606 sp->f = sysmis ? SYSMIS : 0.0;
609 case OP_RANGE_STRING:
614 for (i = 1; i <= n_args; i += 2)
615 if (st_compare_pad (&sp[0].c[1], sp[0].c[0],
616 &sp[i].c[1], sp[i].c[0]) >= 0
617 && st_compare_pad (&sp[0].c[1], sp[0].c[0],
618 &sp[i + 1].c[1], sp[i + 1].c[0]) <= 0)
632 sum[0] = sum[1] = 0.0;
635 for (i = 0; i < n_args; i++)
636 if (sp[i].f != SYSMIS)
640 sum[1] += sp[i].f * sp[i].f;
645 sp->f = calc_stddev (calc_variance (sum, nv));
655 for (i = 0; i < n_args; i++)
656 if (sp[i].f != SYSMIS)
673 sum[0] = sum[1] = 0.0;
676 for (i = 0; i < n_args; i++)
677 if (sp[i].f != SYSMIS)
681 sum[1] += sp[i].f * sp[i].f;
686 sp->f = calc_variance (sum, nv);
690 /* Time construction function. */
693 if (sp[0].f == SYSMIS || sp[1].f == SYSMIS || sp[2].f == SYSMIS)
696 sp->f = 60. * (60. * sp[0].f + sp[1].f) + sp[2].f;
699 /* Date construction functions. */
702 sp->f = yrmoda (sp[2].f, sp[1].f, sp[0].f);
704 sp->f *= 60. * 60. * 24.;
708 sp->f = yrmoda (sp[2].f, sp[0].f, sp[1].f);
710 sp->f *= 60. * 60. * 24.;
714 sp->f = yrmoda (sp[1].f, sp[0].f, 1);
716 sp->f *= 60. * 60. * 24.;
720 if (sp[0].f == SYSMIS)
724 sp->f = yrmoda (sp[1].f, sp[0].f * 3 - 2, 1);
726 sp->f *= 60. * 60. * 24.;
731 if (sp[0].f == SYSMIS)
735 sp[1].f = yrmoda (sp[1].f, 1, 1);
737 sp[1].f = 60. * 60. * 24. * (sp[1].f + 7. * (floor (sp[0].f) - 1.));
743 if (sp[1].f == SYSMIS)
747 sp->f = yrmoda (sp[0].f, 1, 1);
749 sp->f = 60. * 60. * 24. * (sp->f + floor (sp[1].f) - 1);
754 sp->f = yrmoda (sp[0].f, sp[1].f, sp[2].f);
757 /* Date extraction functions. */
760 sp->f = floor (sp->f / 60. / 60. / 24.) * 60. * 60. * 24.;
764 sp->f = fmod (floor (sp->f / 60. / 60.), 24.);
768 sp->f = 86400. * julian_to_jday (sp->f / 86400.);
774 julian_to_calendar (sp->f / 86400., NULL, NULL, &day);
778 case OP_XDATE_MINUTE:
780 sp->f = fmod (floor (sp->f / 60.), 60.);
786 julian_to_calendar (sp->f / 86400., NULL, &month, NULL);
790 case OP_XDATE_QUARTER:
794 julian_to_calendar (sp->f / 86400., NULL, &month, NULL);
795 sp->f = (month - 1) / 3 + 1;
798 case OP_XDATE_SECOND:
800 sp->f = fmod (sp->f, 60.);
804 sp->f = floor (sp->f / 60. / 60. / 24.);
808 sp->f -= floor (sp->f / 60. / 60. / 24.) * 60. * 60. * 24.;
812 sp->f = (julian_to_jday (sp->f / 86400.) - 1) / 7 + 1;
816 sp->f = julian_to_wday (sp->f / 86400.);
822 julian_to_calendar (sp->f / 86400., &year, NULL, NULL);
827 /* String functions. */
833 CHECK_STRING_SPACE (255);
834 dest = ALLOC_STRING_SPACE (255);
838 for (i = 0; i < n_args; i++)
841 if (sp[i].c[0] + dest[0] < 255)
843 memcpy (&dest[dest[0] + 1], &sp[i].c[1], sp[i].c[0]);
844 dest[0] += sp[i].c[0];
848 memcpy (&dest[dest[0] + 1], &sp[i].c[1], 255 - dest[0]);
862 int last = sp[0].c[0] - sp[1].c[0];
863 for (i = 0; i <= last; i++)
864 if (!memcmp (&sp[0].c[i + 1], &sp[0].c[1], sp[0].c[0]))
874 /* Length of each search string. */
875 int part_len = sp[2].f;
878 if (sp[1].c[0] == 0 || part_len <= 0 || sp[2].f == SYSMIS
879 || sp[1].c[0] % part_len != 0)
883 /* Last possible index. */
884 int last = sp[0].c[0] - part_len;
886 for (i = 0; i <= last; i++)
887 for (j = 0; j < sp[1].c[0]; j += part_len)
888 if (!memcmp (&sp[0].c[i], &sp[1].c[j], part_len))
903 for (i = sp[0].c[0] - sp[1].c[0]; i >= 0; i--)
904 if (!memcmp (&sp[0].c[i + 1], &sp[0].c[1], sp[0].c[0]))
914 /* Length of each search string. */
915 int part_len = sp[2].f;
918 if (sp[1].c[0] == 0 || part_len <= 0 || sp[2].f == SYSMIS
919 || sp[1].c[0] % part_len != 0)
923 for (i = sp[0].c[0] - part_len; i >= 0; i--)
924 for (j = 0; j < sp[1].c[0]; j += part_len)
925 if (!memcmp (&sp[0].c[i], &sp[1].c[j], part_len))
938 for (i = sp[0].c[0]; i >= 1; i--)
939 sp[0].c[i] = tolower ((unsigned char) (sp[0].c[i]));
942 for (i = sp[0].c[0]; i >= 1; i--)
943 sp[0].c[i] = toupper ((unsigned char) (sp[0].c[i]));
950 if (sp[1].f == SYSMIS || len < 0 || len > 255)
952 else if (len > sp[0].c[0])
956 CHECK_STRING_SPACE (len);
957 dest = ALLOC_STRING_SPACE (len);
959 memset (&dest[1], ' ', len - sp->c[0]);
960 memcpy (&dest[len - sp->c[0] + 1], &sp->c[1], sp->c[0]);
970 if (sp[1].f == SYSMIS || len < 0 || len > 255 || sp[2].c[0] != 1)
972 else if (len > sp[0].c[0])
976 CHECK_STRING_SPACE (len);
977 dest = ALLOC_STRING_SPACE (len);
979 memset (&dest[1], sp[2].c[1], len - sp->c[0]);
980 memcpy (&dest[len - sp->c[0] + 1], &sp->c[1], sp->c[0]);
990 if (sp[1].f == SYSMIS || len < 0 || len > 255)
992 else if (len > sp[0].c[0])
996 CHECK_STRING_SPACE (len);
997 dest = ALLOC_STRING_SPACE (len);
999 memcpy (&dest[1], &sp->c[1], sp->c[0]);
1000 memset (&dest[sp->c[0] + 1], ' ', len - sp->c[0]);
1010 if (len < 0 || len > 255 || sp[2].c[0] != 1)
1012 else if (len > sp[0].c[0])
1014 unsigned char *dest;
1016 CHECK_STRING_SPACE (len);
1017 dest = ALLOC_STRING_SPACE (len);
1019 memcpy (&dest[1], &sp->c[1], sp->c[0]);
1020 memset (&dest[sp->c[0] + 1], sp[2].c[1], len - sp->c[0]);
1027 int len = sp[0].c[0];
1030 while (i <= len && sp[0].c[i] == ' ')
1034 sp[0].c[i] = sp[0].c[0] - i;
1035 sp->c = &sp[0].c[i];
1042 if (sp[1].c[0] != 1)
1046 int len = sp[0].c[0];
1047 int cmp = sp[1].c[1];
1050 while (i <= len && sp[0].c[i] == cmp)
1054 sp[0].c[i] = sp[0].c[0] - i;
1055 sp->c = &sp[0].c[i];
1062 while (sp[0].c[sp[0].c[0]] == ' ')
1067 if (sp[1].c[0] != 1)
1071 /* Note that NULs are not allowed in strings. This code
1072 needs to change if this decision is changed. */
1073 int cmp = sp[1].c[1];
1074 while (sp[0].c[sp[0].c[0]] == cmp)
1083 di.e = &sp->c[1] + sp->c[0];
1085 di.flags = DI_IGNORE_ERROR;
1087 di.format.type = FMT_F;
1088 di.format.w = sp->c[0];
1097 di.e = &sp->c[1] + sp->c[0];
1099 di.flags = DI_IGNORE_ERROR;
1101 di.format.type = *op++;
1102 di.format.w = *op++;
1103 di.format.d = *op++;
1110 unsigned char *dest;
1116 CHECK_STRING_SPACE (f.w);
1117 dest = ALLOC_STRING_SPACE (f.w);
1120 data_out (&dest[1], &f, sp);
1130 if (index < 1 || index > sp[0].c[0])
1135 sp->c[index] = sp->c[0] - index;
1148 if (sp[1].f == SYSMIS || sp[2].f == SYSMIS || index < 1
1149 || index > sp[0].c[0] || n < 1)
1156 sp->c[index] = sp->c[0] - index;
1167 if (sp->f != SYSMIS)
1171 if (sp->f != SYSMIS)
1174 case OP_NUM_TO_BOOL:
1175 if (approx_eq (sp->f, 0.0))
1177 else if (approx_eq (sp->f, 1.0))
1179 else if (sp->f != SYSMIS)
1181 msg (SE, _("A number being treated as a Boolean in an "
1182 "expression was found to have a value other than "
1183 "0 (false), 1 (true), or the system-missing value. "
1184 "The result was forced to 0."));
1192 if (sp[0].f != SYSMIS)
1194 if (sp[1].f == SYSMIS)
1196 if (approx_ne (sp[0].f, 0.0))
1200 sp->f = fmod (sp[0].f, sp[1].f);
1204 if (sp->f != SYSMIS)
1205 sp->f = rand_normal (sp->f);
1208 if (sp->f != SYSMIS)
1209 sp->f = rand_uniform (sp->f);
1212 if (sp[0].f == SYSMIS || !finite (sp[0].f))
1217 case OP_VEC_ELEM_NUM:
1219 int rindx = sp[0].f + EPSILON;
1220 struct vector *v = &vec[*op++];
1222 if (sp[0].f == SYSMIS || rindx < 1 || rindx > v->nv)
1224 if (sp[0].f == SYSMIS)
1225 msg (SE, _("SYSMIS is not a valid index value for vector "
1226 "%s. The result will be set to SYSMIS."),
1229 msg (SE, _("%g is not a valid index value for vector %s. "
1230 "The result will be set to SYSMIS."),
1235 sp->f = c->data[v->v[rindx - 1]->fv].f;
1238 case OP_VEC_ELEM_STR:
1240 int rindx = sp[0].f + EPSILON;
1241 struct vector *vect = &vec[*op++];
1244 if (sp[0].f == SYSMIS || rindx < 1 || rindx > vect->nv)
1246 if (sp[0].f == SYSMIS)
1247 msg (SE, _("SYSMIS is not a valid index value for vector "
1248 "%s. The result will be set to the empty "
1252 msg (SE, _("%g is not a valid index value for vector %s. "
1253 "The result will be set to the empty string."),
1254 sp[0].f, vect->name);
1255 CHECK_STRING_SPACE (0);
1256 sp->c = ALLOC_STRING_SPACE (0);
1261 v = vect->v[rindx - 1];
1262 CHECK_STRING_SPACE (v->width);
1263 sp->c = ALLOC_STRING_SPACE (v->width);
1264 sp->c[0] = v->width;
1265 memcpy (&sp->c[1], c->data[v->fv].s, v->width);
1276 CHECK_STRING_SPACE (*str);
1277 sp->c = ALLOC_STRING_SPACE (*str);
1278 memcpy (sp->c, str, *str + 1);
1283 sp->f = c->data[(*vars)->fv].f;
1284 if (is_num_user_missing (sp->f, *vars))
1290 int width = (*vars)->width;
1293 CHECK_STRING_SPACE (width);
1294 sp->c = ALLOC_STRING_SPACE (width);
1296 memcpy (&sp->c[1], &c->data[(*vars)->fv], width);
1302 struct ccase *c = lagged_case (*op++);
1309 sp->f = c->data[(*vars)->fv].f;
1310 if (is_num_user_missing (sp->f, *vars))
1318 struct ccase *c = lagged_case (*op++);
1319 int width = (*vars)->width;
1322 CHECK_STRING_SPACE (width);
1323 sp->c = ALLOC_STRING_SPACE (width);
1327 memset (sp->c, ' ', width);
1329 memcpy (&sp->c[1], &c->data[(*vars)->fv], width);
1336 sp->f = c->data[*op++].f == SYSMIS;
1340 sp->f = is_str_user_missing (c->data[(*vars)->fv].s, *vars);
1345 sp->f = c->data[*op++].f;
1349 sp->f = vfm_sink_info.ncases + 1;
1356 #if GLOBAL_DEBUGGING
1357 printf (_("evaluate_expression(): not implemented: %s\n"),
1360 printf (_("evaluate_expression(): not implemented: %d\n"), op[-1]);
1368 if (e->type != EX_STRING)
1370 double value = sp->f;
1371 if (!finite (value))
1382 memcpy (e->str_stack, sp->c, sp->c[0] + 1);
1383 v->c = e->str_stack;