1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 #if TIME_WITH_SYS_TIME
43 #include "julcal/julcal.h"
52 /* FIXME: This could be even more efficient if we caught SYSMIS when
53 it first reared its ugly head, then threw it into an entirely new
54 switch that handled SYSMIS aggressively like all the code does now.
55 But I've spent a couple of weeks on the expression code, and that's
56 enough to make anyone sick. For that matter, it could be more
57 efficient if I hand-coded it in assembly for a dozen processors,
58 but I'm not going to do that either. */
60 /* These macros are defined differently depending on the way that
61 the stack is managed. (i.e., I have to adapt the code to inferior
64 void CHECK_STRING_SPACE(int x): Assure that at least X+1 bytes of
65 space are available in the string evaluation stack.
67 unsigned char *ALLOC_STRING_SPACE(int x): Return a pointer to X+1
68 bytes of space. CHECK_STRING_SPACE must have previously been
69 called with an argument of at least X. */
72 #define CHECK_STRING_SPACE(X) /* nothing to do! */
73 #define ALLOC_STRING_SPACE(X) \
75 #else /* !PAGED_STACK */
76 #define CHECK_STRING_SPACE(X) \
79 if (str_stk + X >= str_end) \
81 e->str_size += 1024; \
82 e->str_stk = xrealloc (e->str_stk, e->str_size); \
83 str_end = e->str_stk + e->str_size - 1; \
88 #define ALLOC_STRING_SPACE(X) \
89 (str_stk += X + 1, str_stk - X - 1)
90 #endif /* !PAGED_STACK */
93 expr_evaluate (struct expression *e, struct ccase *c, union value *v)
95 unsigned char *op = e->op;
97 unsigned char *str = e->str;
99 unsigned char *str_stk = e->str_stk;
100 unsigned char *str_end = e->str_stk + e->str_size - 1;
102 struct variable **vars = e->var;
106 union value *sp = e->stack;
115 for (i = 1; i < *op; i++)
117 if (sp[i].f == SYSMIS)
130 for (i = 1; i < *op; i++)
132 if (sp[i].f == SYSMIS)
144 if (sp[0].f == SYSMIS)
146 if (approx_eq (sp[1].f, 0.0))
149 else if (sp[1].f == SYSMIS)
157 else if (approx_eq (sp[0].f, 0.0) && approx_eq (sp[1].f, 0.0))
160 sp->f = pow (sp[0].f, sp[1].f);
164 /* Note that the equality operator (==) may be used here
165 (instead of approx_eq) because booleans are always
166 *exactly* 0, 1, or SYSMIS.
168 Truth table (in order of detection):
180 1 and SYSMIS = SYSMIS
181 SYSMIS and SYSMIS = SYSMIS
185 SYSMIS and 1 = SYSMIS
189 if (sp[0].f == 0.0); /* 1 */
190 else if (sp[1].f == 0.0)
192 else if (sp[1].f == SYSMIS)
193 sp->f = SYSMIS; /* 3 */
196 /* Truth table (in order of detection):
209 SYSMIS or SYSMIS = SYSMIS
217 if (sp[0].f == 1.0); /* 1 */
218 else if (sp[1].f == 1.0)
220 else if (sp[1].f == SYSMIS)
221 sp->f = SYSMIS; /* 3 */
226 else if (sp[0].f == 1.0)
232 if (sp[0].f != SYSMIS)
234 if (sp[1].f == SYSMIS)
237 sp->f = approx_eq (sp[0].f, sp[1].f);
242 if (sp[0].f != SYSMIS)
244 if (sp[1].f == SYSMIS)
247 sp->f = approx_ge (sp[0].f, sp[1].f);
252 if (sp[0].f != SYSMIS)
254 if (sp[1].f == SYSMIS)
257 sp->f = approx_gt (sp[0].f, sp[1].f);
262 if (sp[0].f != SYSMIS)
264 if (sp[1].f == SYSMIS)
267 sp->f = approx_le (sp[0].f, sp[1].f);
272 if (sp[0].f != SYSMIS)
274 if (sp[1].f == SYSMIS)
277 sp->f = approx_lt (sp[0].f, sp[1].f);
282 if (sp[0].f != SYSMIS)
284 if (sp[1].f == SYSMIS)
287 sp->f = approx_ne (sp[0].f, sp[1].f);
291 /* String operators. */
294 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
295 &sp[1].c[1], sp[1].c[0]) == 0;
299 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
300 &sp[1].c[1], sp[1].c[0]) >= 0;
304 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
305 &sp[1].c[1], sp[1].c[0]) > 0;
309 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
310 &sp[1].c[1], sp[1].c[0]) <= 0;
314 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
315 &sp[1].c[1], sp[1].c[0]) < 0;
319 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
320 &sp[1].c[1], sp[1].c[0]) != 0;
323 /* Unary functions. */
330 sp->f = fabs (sp->f);
336 sp->f = acos (sp->f);
345 sp->f = asin (sp->f);
352 sp->f = atan (sp->f);
371 sp->f = log10 (sp->f);
380 sp->f = log10 (sp->f);
387 sp->f = fmod (sp->f, 10);
393 sp->f = floor (sp->f + 0.5);
395 sp->f = -floor (-sp->f + 0.5);
406 sp->f = sqrt (sp->f);
424 sp->f = floor (sp->f);
426 sp->f = -floor (-sp->f);
430 /* N-ary numeric functions. */
439 for (i = 1; i <= n_args; i++)
440 if (approx_eq (sp[0].f, sp[i].f))
445 else if (sp[i].f != SYSMIS)
447 sp->f = sysmis ? SYSMIS : 0.0;
455 for (i = 1; i <= n_args; i++)
456 if (!st_compare_pad (&sp[0].c[1], sp[0].c[0],
457 &sp[i].c[1], sp[i].c[0]))
473 for (i = 0; i < n_args; i++)
474 if (sp[i].f != SYSMIS)
478 sum[1] += sp[i].f * sp[i].f;
483 sp->f = calc_cfvar (sum, nv);
490 double max = -DBL_MAX;
493 for (i = 0; i < n_args; i++)
494 if (sp[i].f != SYSMIS)
514 for (i = 0; i < n_args; i++)
515 if (sp[i].f != SYSMIS)
523 sp->f = calc_mean (sum, nv);
530 double min = DBL_MAX;
533 for (i = 0; i < n_args; i++)
534 if (sp[i].f != SYSMIS)
552 for (i = 0; i < n_args; i++)
553 if (sp[i].f == SYSMIS)
564 for (i = 0; i < n_args; i++)
565 if (sp[i].f != SYSMIS)
578 for (i = 1; i <= n_args; i += 2)
579 if (sp[i].f == SYSMIS || sp[i + 1].f == SYSMIS)
581 else if (approx_ge (sp[0].f, sp[i].f)
582 && approx_le (sp[0].f, sp[i + 1].f))
589 sp->f = sysmis ? SYSMIS : 0.0;
592 case OP_RANGE_STRING:
597 for (i = 1; i <= n_args; i += 2)
598 if (st_compare_pad (&sp[0].c[1], sp[0].c[0],
599 &sp[i].c[1], sp[i].c[0]) >= 0
600 && st_compare_pad (&sp[0].c[1], sp[0].c[0],
601 &sp[i + 1].c[1], sp[i + 1].c[0]) <= 0)
615 sum[0] = sum[1] = 0.0;
618 for (i = 0; i < n_args; i++)
619 if (sp[i].f != SYSMIS)
623 sum[1] += sp[i].f * sp[i].f;
628 sp->f = calc_stddev (calc_variance (sum, nv));
638 for (i = 0; i < n_args; i++)
639 if (sp[i].f != SYSMIS)
656 sum[0] = sum[1] = 0.0;
659 for (i = 0; i < n_args; i++)
660 if (sp[i].f != SYSMIS)
664 sum[1] += sp[i].f * sp[i].f;
669 sp->f = calc_variance (sum, nv);
673 /* Time construction function. */
676 if (sp[0].f == SYSMIS || sp[1].f == SYSMIS || sp[2].f == SYSMIS)
679 sp->f = 60. * (60. * sp[0].f + sp[1].f) + sp[2].f;
682 /* Date construction functions. */
685 sp->f = yrmoda (sp[2].f, sp[1].f, sp[0].f);
687 sp->f *= 60. * 60. * 24.;
691 sp->f = yrmoda (sp[2].f, sp[0].f, sp[1].f);
693 sp->f *= 60. * 60. * 24.;
697 sp->f = yrmoda (sp[1].f, sp[0].f, 1);
699 sp->f *= 60. * 60. * 24.;
703 if (sp[0].f == SYSMIS)
707 sp->f = yrmoda (sp[1].f, sp[0].f * 3 - 2, 1);
709 sp->f *= 60. * 60. * 24.;
714 if (sp[0].f == SYSMIS)
718 sp[1].f = yrmoda (sp[1].f, 1, 1);
720 sp[1].f = 60. * 60. * 24. * (sp[1].f + 7. * (floor (sp[0].f) - 1.));
726 if (sp[1].f == SYSMIS)
730 sp->f = yrmoda (sp[0].f, 1, 1);
732 sp->f = 60. * 60. * 24. * (sp->f + floor (sp[1].f) - 1);
737 sp->f = yrmoda (sp[0].f, sp[1].f, sp[2].f);
740 /* Date extraction functions. */
743 sp->f = floor (sp->f / 60. / 60. / 24.) * 60. * 60. * 24.;
747 sp->f = fmod (floor (sp->f / 60. / 60.), 24.);
751 sp->f = 86400. * julian_to_jday (sp->f / 86400.);
757 julian_to_calendar (sp->f / 86400., NULL, NULL, &day);
761 case OP_XDATE_MINUTE:
763 sp->f = fmod (floor (sp->f / 60.), 60.);
769 julian_to_calendar (sp->f / 86400., NULL, &month, NULL);
773 case OP_XDATE_QUARTER:
777 julian_to_calendar (sp->f / 86400., NULL, &month, NULL);
778 sp->f = (month - 1) / 3 + 1;
781 case OP_XDATE_SECOND:
783 sp->f = fmod (sp->f, 60.);
787 sp->f = floor (sp->f / 60. / 60. / 24.);
791 sp->f -= floor (sp->f / 60. / 60. / 24.) * 60. * 60. * 24.;
795 sp->f = (julian_to_jday (sp->f / 86400.) - 1) / 7 + 1;
799 sp->f = julian_to_wday (sp->f / 86400.);
805 julian_to_calendar (sp->f / 86400., &year, NULL, NULL);
810 /* String functions. */
816 CHECK_STRING_SPACE (255);
817 dest = ALLOC_STRING_SPACE (255);
821 for (i = 0; i < n_args; i++)
824 if (sp[i].c[0] + dest[0] < 255)
826 memcpy (&dest[dest[0] + 1], &sp[i].c[1], sp[i].c[0]);
827 dest[0] += sp[i].c[0];
831 memcpy (&dest[dest[0] + 1], &sp[i].c[1], 255 - dest[0]);
845 int last = sp[0].c[0] - sp[1].c[0];
846 for (i = 0; i <= last; i++)
847 if (!memcmp (&sp[0].c[i + 1], &sp[0].c[1], sp[0].c[0]))
857 /* Length of each search string. */
858 int part_len = sp[2].f;
861 if (sp[1].c[0] == 0 || part_len <= 0 || sp[2].f == SYSMIS
862 || sp[1].c[0] % part_len != 0)
866 /* Last possible index. */
867 int last = sp[0].c[0] - part_len;
869 for (i = 0; i <= last; i++)
870 for (j = 0; j < sp[1].c[0]; j += part_len)
871 if (!memcmp (&sp[0].c[i], &sp[1].c[j], part_len))
886 for (i = sp[0].c[0] - sp[1].c[0]; i >= 0; i--)
887 if (!memcmp (&sp[0].c[i + 1], &sp[0].c[1], sp[0].c[0]))
897 /* Length of each search string. */
898 int part_len = sp[2].f;
901 if (sp[1].c[0] == 0 || part_len <= 0 || sp[2].f == SYSMIS
902 || sp[1].c[0] % part_len != 0)
906 for (i = sp[0].c[0] - part_len; i >= 0; i--)
907 for (j = 0; j < sp[1].c[0]; j += part_len)
908 if (!memcmp (&sp[0].c[i], &sp[1].c[j], part_len))
921 for (i = sp[0].c[0]; i >= 1; i--)
922 sp[0].c[i] = tolower ((unsigned char) (sp[0].c[i]));
925 for (i = sp[0].c[0]; i >= 1; i--)
926 sp[0].c[i] = toupper ((unsigned char) (sp[0].c[i]));
933 if (sp[1].f == SYSMIS || len < 0 || len > 255)
935 else if (len > sp[0].c[0])
939 CHECK_STRING_SPACE (len);
940 dest = ALLOC_STRING_SPACE (len);
942 memset (&dest[1], ' ', len - sp->c[0]);
943 memcpy (&dest[len - sp->c[0] + 1], &sp->c[1], sp->c[0]);
953 if (sp[1].f == SYSMIS || len < 0 || len > 255 || sp[2].c[0] != 1)
955 else if (len > sp[0].c[0])
959 CHECK_STRING_SPACE (len);
960 dest = ALLOC_STRING_SPACE (len);
962 memset (&dest[1], sp[2].c[1], len - sp->c[0]);
963 memcpy (&dest[len - sp->c[0] + 1], &sp->c[1], sp->c[0]);
973 if (sp[1].f == SYSMIS || len < 0 || len > 255)
975 else if (len > sp[0].c[0])
979 CHECK_STRING_SPACE (len);
980 dest = ALLOC_STRING_SPACE (len);
982 memcpy (&dest[1], &sp->c[1], sp->c[0]);
983 memset (&dest[sp->c[0] + 1], ' ', len - sp->c[0]);
993 if (len < 0 || len > 255 || sp[2].c[0] != 1)
995 else if (len > sp[0].c[0])
999 CHECK_STRING_SPACE (len);
1000 dest = ALLOC_STRING_SPACE (len);
1002 memcpy (&dest[1], &sp->c[1], sp->c[0]);
1003 memset (&dest[sp->c[0] + 1], sp[2].c[1], len - sp->c[0]);
1010 int len = sp[0].c[0];
1013 while (i <= len && sp[0].c[i] == ' ')
1017 sp[0].c[i] = sp[0].c[0] - i;
1018 sp->c = &sp[0].c[i];
1025 if (sp[1].c[0] != 1)
1029 int len = sp[0].c[0];
1030 int cmp = sp[1].c[1];
1033 while (i <= len && sp[0].c[i] == cmp)
1037 sp[0].c[i] = sp[0].c[0] - i;
1038 sp->c = &sp[0].c[i];
1045 while (sp[0].c[sp[0].c[0]] == ' ')
1050 if (sp[1].c[0] != 1)
1054 /* Note that NULs are not allowed in strings. This code
1055 needs to change if this decision is changed. */
1056 int cmp = sp[1].c[1];
1057 while (sp[0].c[sp[0].c[0]] == cmp)
1066 di.e = &sp->c[1] + sp->c[0];
1068 di.flags = DI_IGNORE_ERROR;
1070 di.format.type = FMT_F;
1071 di.format.w = sp->c[0];
1080 di.e = &sp->c[1] + sp->c[0];
1082 di.flags = DI_IGNORE_ERROR;
1084 di.format.type = *op++;
1085 di.format.w = *op++;
1086 di.format.d = *op++;
1093 unsigned char *dest;
1099 CHECK_STRING_SPACE (f.w);
1100 dest = ALLOC_STRING_SPACE (f.w);
1103 data_out (&dest[1], &f, sp);
1113 if (index < 1 || index > sp[0].c[0])
1118 sp->c[index] = sp->c[0] - index;
1131 if (sp[1].f == SYSMIS || sp[2].f == SYSMIS || index < 1
1132 || index > sp[0].c[0] || n < 1)
1139 sp->c[index] = sp->c[0] - index;
1150 if (sp->f != SYSMIS)
1154 if (sp->f != SYSMIS)
1157 case OP_NUM_TO_BOOL:
1158 if (approx_eq (sp->f, 0.0))
1160 else if (approx_eq (sp->f, 1.0))
1162 else if (sp->f != SYSMIS)
1164 msg (SE, _("A number being treated as a Boolean in an "
1165 "expression was found to have a value other than "
1166 "0 (false), 1 (true), or the system-missing value. "
1167 "The result was forced to 0."));
1175 if (sp[0].f != SYSMIS)
1177 if (sp[1].f == SYSMIS)
1179 if (approx_ne (sp[0].f, 0.0))
1183 sp->f = fmod (sp[0].f, sp[1].f);
1187 if (sp->f != SYSMIS)
1188 sp->f *= rng_get_double_normal (pspp_rng ());
1191 if (sp->f != SYSMIS)
1192 sp->f *= rng_get_double (pspp_rng ());
1195 if (sp[0].f == SYSMIS || !finite (sp[0].f))
1200 case OP_VEC_ELEM_NUM:
1202 int rindx = sp[0].f + EPSILON;
1203 const struct vector *v = dict_get_vector (default_dict, *op++);
1205 if (sp[0].f == SYSMIS || rindx < 1 || rindx > v->cnt)
1207 if (sp[0].f == SYSMIS)
1208 msg (SE, _("SYSMIS is not a valid index value for vector "
1209 "%s. The result will be set to SYSMIS."),
1212 msg (SE, _("%g is not a valid index value for vector %s. "
1213 "The result will be set to SYSMIS."),
1218 sp->f = c->data[v->var[rindx - 1]->fv].f;
1221 case OP_VEC_ELEM_STR:
1223 int rindx = sp[0].f + EPSILON;
1224 const struct vector *vect = dict_get_vector (default_dict, *op++);
1227 if (sp[0].f == SYSMIS || rindx < 1 || rindx > vect->cnt)
1229 if (sp[0].f == SYSMIS)
1230 msg (SE, _("SYSMIS is not a valid index value for vector "
1231 "%s. The result will be set to the empty "
1235 msg (SE, _("%g is not a valid index value for vector %s. "
1236 "The result will be set to the empty string."),
1237 sp[0].f, vect->name);
1238 CHECK_STRING_SPACE (0);
1239 sp->c = ALLOC_STRING_SPACE (0);
1244 v = vect->var[rindx - 1];
1245 CHECK_STRING_SPACE (v->width);
1246 sp->c = ALLOC_STRING_SPACE (v->width);
1247 sp->c[0] = v->width;
1248 memcpy (&sp->c[1], c->data[v->fv].s, v->width);
1259 CHECK_STRING_SPACE (*str);
1260 sp->c = ALLOC_STRING_SPACE (*str);
1261 memcpy (sp->c, str, *str + 1);
1266 sp->f = c->data[(*vars)->fv].f;
1267 if (is_num_user_missing (sp->f, *vars))
1273 int width = (*vars)->width;
1276 CHECK_STRING_SPACE (width);
1277 sp->c = ALLOC_STRING_SPACE (width);
1279 memcpy (&sp->c[1], &c->data[(*vars)->fv], width);
1285 struct ccase *c = lagged_case (*op++);
1292 sp->f = c->data[(*vars)->fv].f;
1293 if (is_num_user_missing (sp->f, *vars))
1301 struct ccase *c = lagged_case (*op++);
1302 int width = (*vars)->width;
1305 CHECK_STRING_SPACE (width);
1306 sp->c = ALLOC_STRING_SPACE (width);
1310 memset (sp->c, ' ', width);
1312 memcpy (&sp->c[1], &c->data[(*vars)->fv], width);
1319 sp->f = c->data[*op++].f == SYSMIS;
1323 sp->f = is_str_user_missing (c->data[(*vars)->fv].s, *vars);
1328 sp->f = c->data[*op++].f;
1332 sp->f = vfm_sink_info.ncases + 1;
1339 #if GLOBAL_DEBUGGING
1340 printf (_("evaluate_expression(): not implemented: %s\n"),
1343 printf (_("evaluate_expression(): not implemented: %d\n"), op[-1]);
1351 if (e->type != EX_STRING)
1353 double value = sp->f;
1354 if (!finite (value))
1365 memcpy (e->str_stack, sp->c, sp->c[0] + 1);
1366 v->c = e->str_stack;