1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 #if TIME_WITH_SYS_TIME
37 #include <gsl/gsl_randist.h>
43 #include "dictionary.h"
45 #include "julcal/julcal.h"
57 expr_evaluate (const struct expression *e, const struct ccase *c, int case_idx,
60 unsigned char *op = e->op;
62 unsigned char *str = e->str;
63 struct variable **vars = e->var;
67 union value *sp = e->stack;
77 if (sp[1].f == SYSMIS)
79 else if (sp[0].f != SYSMIS)
84 if (sp[1].f == SYSMIS)
86 else if (sp[0].f != SYSMIS)
91 if (sp[1].f == SYSMIS)
93 else if (sp[0].f != SYSMIS)
98 if (sp[1].f == SYSMIS || sp[1].f == 0.)
100 else if (sp[0].f != SYSMIS)
105 if (sp[0].f == SYSMIS)
110 else if (sp[1].f == SYSMIS)
117 else if (sp[0].f == 0.0 && sp[1].f <= 0.0)
120 sp->f = pow (sp[0].f, sp[1].f);
124 /* Note that booleans are always one of 0, 1, or SYSMIS.
126 Truth table (in order of detection):
138 1 and SYSMIS = SYSMIS
139 SYSMIS and SYSMIS = SYSMIS
143 SYSMIS and 1 = SYSMIS
147 if (sp[0].f == 0.0); /* 1 */
148 else if (sp[1].f == 0.0)
150 else if (sp[1].f == SYSMIS)
151 sp->f = SYSMIS; /* 3 */
154 /* Truth table (in order of detection):
167 SYSMIS or SYSMIS = SYSMIS
175 if (sp[0].f == 1.0); /* 1 */
176 else if (sp[1].f == 1.0)
178 else if (sp[1].f == SYSMIS)
179 sp->f = SYSMIS; /* 3 */
184 else if (sp[0].f == 1.0)
190 if (sp[0].f != SYSMIS)
192 if (sp[1].f == SYSMIS)
195 sp->f = sp[0].f == sp[1].f;
200 if (sp[0].f != SYSMIS)
202 if (sp[1].f == SYSMIS)
205 sp->f = sp[0].f >= sp[1].f;
210 if (sp[0].f != SYSMIS)
212 if (sp[1].f == SYSMIS)
215 sp->f = sp[0].f > sp[1].f;
220 if (sp[0].f != SYSMIS)
222 if (sp[1].f == SYSMIS)
225 sp->f = sp[0].f <= sp[1].f;
230 if (sp[0].f != SYSMIS)
232 if (sp[1].f == SYSMIS)
235 sp->f = sp[0].f < sp[1].f;
240 if (sp[0].f != SYSMIS)
242 if (sp[1].f == SYSMIS)
245 sp->f = sp[0].f != sp[1].f;
249 /* String operators. */
252 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
253 &sp[1].c[1], sp[1].c[0]) == 0;
257 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
258 &sp[1].c[1], sp[1].c[0]) >= 0;
262 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
263 &sp[1].c[1], sp[1].c[0]) > 0;
267 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
268 &sp[1].c[1], sp[1].c[0]) <= 0;
272 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
273 &sp[1].c[1], sp[1].c[0]) < 0;
277 sp[0].f = st_compare_pad (&sp[0].c[1], sp[0].c[0],
278 &sp[1].c[1], sp[1].c[0]) != 0;
281 /* Unary functions. */
288 sp->f = fabs (sp->f);
294 sp->f = acos (sp->f);
303 sp->f = asin (sp->f);
310 sp->f = atan (sp->f);
329 sp->f = log10 (sp->f);
345 sp->f = fmod (sp->f, 10);
351 sp->f = floor (sp->f + 0.5);
353 sp->f = -floor (-sp->f + 0.5);
364 sp->f = sqrt (sp->f);
382 sp->f = floor (sp->f);
384 sp->f = -floor (-sp->f);
388 /* N-ary numeric functions. */
397 for (i = 1; i < n_args; i++)
398 if (sp[0].f == sp[i].f)
403 else if (sp[i].f != SYSMIS)
405 sp->f = sysmis ? SYSMIS : 0.0;
414 for (i = 1; i < n_args; i++)
415 if (!st_compare_pad (&sp[0].c[1], sp[0].c[0],
416 &sp[i].c[1], sp[i].c[0]))
427 double weight, mean, variance;
431 moments_of_values (sp, n_args,
432 &weight, &mean, &variance, NULL, NULL);
434 if (weight < *op++ || mean == SYSMIS
435 || mean == 0 || variance == SYSMIS)
438 sp->f = sqrt (variance) / mean;
445 double max = -DBL_MAX;
448 for (i = 0; i < n_args; i++)
449 if (sp[i].f != SYSMIS)
467 for (i = 1; i < n_args; i++)
468 if (st_compare_pad (&sp[i].c[1], sp[i].c[0],
469 &sp[max_idx].c[1], sp[max_idx].c[0]) > 0)
471 sp[0].c = sp[max_idx].c;
481 moments_of_values (sp, n_args,
482 &weight, &mean, NULL, NULL, NULL);
483 sp->f = weight < *op++ ? SYSMIS : mean;
490 double min = DBL_MAX;
493 for (i = 0; i < n_args; i++)
494 if (sp[i].f != SYSMIS)
512 for (i = 1; i < n_args; i++)
513 if (st_compare_pad (&sp[i].c[1], sp[i].c[0],
514 &sp[min_idx].c[1], sp[min_idx].c[0]) < 0)
516 sp[0].c = sp[min_idx].c;
525 for (i = 0; i < n_args; i++)
526 if (sp[i].f == SYSMIS)
537 for (i = 0; i < n_args; i++)
538 if (sp[i].f != SYSMIS)
551 for (i = 1; i < n_args; i += 2)
552 if (sp[i].f == SYSMIS || sp[i + 1].f == SYSMIS)
554 else if (sp[0].f >= sp[i].f && sp[0].f <= sp[i + 1].f)
561 sp->f = sysmis ? SYSMIS : 0.0;
564 case OP_RANGE_STRING:
569 for (i = 1; i < n_args; i += 2)
570 if (st_compare_pad (&sp[0].c[1], sp[0].c[0],
571 &sp[i].c[1], sp[i].c[0]) >= 0
572 && st_compare_pad (&sp[0].c[1], sp[0].c[0],
573 &sp[i + 1].c[1], sp[i + 1].c[0]) <= 0)
584 double weight, variance;
587 moments_of_values (sp, n_args,
588 &weight, NULL, &variance, NULL, NULL);
589 sp->f = weight < *op++ ? SYSMIS : sqrt (variance);
599 for (i = 0; i < n_args; i++)
600 if (sp[i].f != SYSMIS)
614 double weight, variance;
617 moments_of_values (sp, n_args,
618 &weight, NULL, &variance, NULL, NULL);
619 sp->f = weight < *op++ ? SYSMIS : variance;
623 /* Time construction function. */
626 if (sp[0].f == SYSMIS || sp[1].f == SYSMIS || sp[2].f == SYSMIS)
631 min = min (sp[0].f, min (sp[1].f, sp[2].f));
632 max = max (sp[0].f, max (sp[1].f, sp[2].f));
633 if (min < 0. && max > 0.)
635 msg (SW, _("TIME.HMS cannot mix positive and negative "
636 "in its arguments."));
640 sp->f = 60. * (60. * sp[0].f + sp[1].f) + sp[2].f;
645 sp->f /= 60. * 60. * 24.;
651 case OP_CTIME_MINUTES:
657 sp->f *= 60. * 60. * 24.;
659 case OP_CTIME_SECONDS:
663 /* Date construction functions. */
666 sp->f = yrmoda (sp[2].f, sp[1].f, sp[0].f);
668 sp->f *= 60. * 60. * 24.;
672 sp->f = yrmoda (sp[2].f, sp[0].f, sp[1].f);
674 sp->f *= 60. * 60. * 24.;
678 sp->f = yrmoda (sp[1].f, sp[0].f, 1);
680 sp->f *= 60. * 60. * 24.;
684 if (sp[0].f == SYSMIS)
688 sp->f = yrmoda (sp[1].f, sp[0].f * 3 - 2, 1);
690 sp->f *= 60. * 60. * 24.;
695 if (sp[0].f == SYSMIS || sp[1].f == SYSMIS)
697 else if (sp[0].f < 0. || sp[0].f > 53.)
699 msg (SW, _("Week argument to WKYR must be in range 0 to 53."));
704 double result = yrmoda (sp[1].f, 1, 1);
705 if (result != SYSMIS)
706 result = 60. * 60. * 24. * (result + 7. * (sp->f - 1.));
712 if (sp[1].f == SYSMIS)
716 sp->f = yrmoda (sp[0].f, 1, 1);
718 sp->f = 60. * 60. * 24. * (sp->f + floor (sp[1].f) - 1);
723 sp->f = yrmoda (sp[0].f, sp[1].f, sp[2].f);
726 /* Date extraction functions. */
729 sp->f = floor (sp->f / 60. / 60. / 24.) * 60. * 60. * 24.;
733 sp->f = fmod (floor (sp->f / 60. / 60.), 24.);
737 sp->f = julian_to_jday (sp->f / 86400.);
743 julian_to_calendar (sp->f / 86400., NULL, NULL, &day);
747 case OP_XDATE_MINUTE:
749 sp->f = fmod (floor (sp->f / 60.), 60.);
755 julian_to_calendar (sp->f / 86400., NULL, &month, NULL);
759 case OP_XDATE_QUARTER:
763 julian_to_calendar (sp->f / 86400., NULL, &month, NULL);
764 sp->f = (month - 1) / 3 + 1;
767 case OP_XDATE_SECOND:
769 sp->f = fmod (sp->f, 60.);
773 sp->f = floor (sp->f / 60. / 60. / 24.);
777 sp->f -= floor (sp->f / 60. / 60. / 24.) * 60. * 60. * 24.;
781 sp->f = (julian_to_jday (sp->f / 86400.) - 1) / 7 + 1;
785 sp->f = julian_to_wday (sp->f / 86400.);
791 julian_to_calendar (sp->f / 86400., &year, NULL, NULL);
796 /* String functions. */
802 dest = pool_alloc (e->pool, 256);
806 for (i = 0; i < n_args; i++)
809 if (sp[i].c[0] + dest[0] < 255)
811 memcpy (&dest[dest[0] + 1], &sp[i].c[1], sp[i].c[0]);
812 dest[0] += sp[i].c[0];
816 memcpy (&dest[dest[0] + 1], &sp[i].c[1], 255 - dest[0]);
830 int last = sp[0].c[0] - sp[1].c[0];
832 for (i = 0; i <= last; i++)
833 if (!memcmp (&sp[0].c[i + 1], &sp[1].c[1], sp[1].c[0]))
848 else if (sp[2].f == SYSMIS)
850 msg (SW, _("Argument 3 of RINDEX may not be system-missing."));
855 int part_len = sp[2].f;
857 if (part_len < 0 || part_len > sp[1].c[0]
858 || sp[1].c[0] % part_len != 0)
860 msg (SW, _("Argument 3 of RINDEX must be between 1 and "
861 "the length of argument 2, and it must "
862 "evenly divide the length of argument 2."));
868 int last = sp[0].c[0] - part_len;
869 for (i = 0; i <= last; i++)
870 for (j = 0; j < sp[1].c[0]; j += part_len)
871 if (!memcmp (&sp[0].c[i + 1], &sp[1].c[j + 1], part_len))
888 for (i = sp[0].c[0] - sp[1].c[0]; i >= 0; i--)
889 if (!memcmp (&sp[0].c[i + 1], &sp[1].c[1], sp[1].c[0]))
904 else if (sp[2].f == SYSMIS)
906 msg (SW, _("Argument 3 of RINDEX may not be system-missing."));
911 int part_len = sp[2].f;
913 if (part_len < 0 || part_len > sp[1].c[0]
914 || sp[1].c[0] % part_len != 0)
916 msg (SW, _("Argument 3 of RINDEX must be between 1 and "
917 "the length of argument 2, and it must "
918 "evenly divide the length of argument 2."));
923 for (i = sp[0].c[0] - part_len; i >= 0; i--)
924 for (j = 0; j < sp[1].c[0]; j += part_len)
925 if (!memcmp (&sp[0].c[i + 1], &sp[1].c[j + 1], part_len))
939 for (i = sp[0].c[0]; i >= 1; i--)
940 sp[0].c[i] = tolower ((unsigned char) (sp[0].c[i]));
943 for (i = sp[0].c[0]; i >= 1; i--)
944 sp[0].c[i] = toupper ((unsigned char) (sp[0].c[i]));
951 if (sp[1].f == SYSMIS || len < 0 || len > 255 || sp[2].c[0] != 1)
953 else if (len > sp[0].c[0])
957 dest = pool_alloc (e->pool, len + 1);
959 memset (&dest[1], sp[2].c[1], len - sp->c[0]);
960 memcpy (&dest[len - sp->c[0] + 1], &sp->c[1], sp->c[0]);
970 if (len < 0 || len > 255 || sp[2].c[0] != 1)
972 else if (len > sp[0].c[0])
976 dest = pool_alloc (e->pool, len + 1);
978 memcpy (&dest[1], &sp->c[1], sp->c[0]);
979 memset (&dest[sp->c[0] + 1], sp[2].c[1], len - sp->c[0]);
991 int len = sp[0].c[0];
992 int cmp = sp[1].c[1];
995 while (i <= len && sp[0].c[i] == cmp)
999 sp[0].c[i] = sp[0].c[0] - i;
1000 sp->c = &sp[0].c[i];
1007 if (sp[1].c[0] != 1)
1011 int cmp = sp[1].c[1];
1012 while (sp[0].c[0] > 0 && sp[0].c[sp[0].c[0]] == cmp)
1024 di.format.type = *op++;
1025 di.format.w = *op++;
1026 di.format.d = *op++;
1027 di.e = &sp->c[1] + min (sp->c[0], di.format.w);
1035 unsigned char *dest;
1041 dest = pool_alloc (e->pool, f.w + 1);
1044 assert ((formats[f.type].cat & FCAT_STRING) == 0);
1045 data_out (&dest[1], &f, sp);
1055 if (index < 1 || index > sp[0].c[0])
1060 sp->c[index] = sp->c[0] - index;
1073 if (sp[1].f == SYSMIS || sp[2].f == SYSMIS || index < 1
1074 || index > sp[0].c[0] || n < 1)
1081 sp->c[index] = sp->c[0] - index;
1092 if (sp->f != SYSMIS)
1095 case OP_NUM_TO_BOOL:
1098 else if (sp->f == 1.0)
1100 else if (sp->f != SYSMIS)
1102 msg (SE, _("A number being treated as a Boolean in an "
1103 "expression was found to have a value other than "
1104 "0 (false), 1 (true), or the system-missing value. "
1105 "The result was forced to 0."));
1113 if (sp[0].f != SYSMIS)
1115 if (sp[1].f == SYSMIS)
1121 sp->f = fmod (sp[0].f, sp[1].f);
1125 if (sp->f != SYSMIS)
1126 sp->f = gsl_ran_gaussian (get_rng (), sp->f);
1129 if (sp->f != SYSMIS)
1130 sp->f *= gsl_rng_uniform (get_rng ());
1133 sp->f = sp->f == SYSMIS || !finite (sp->f);
1135 case OP_VEC_ELEM_NUM:
1137 int rindx = sp[0].f + EPSILON;
1138 const struct vector *v = dict_get_vector (default_dict, *op++);
1140 if (sp[0].f == SYSMIS || rindx < 1 || rindx > v->cnt)
1142 if (sp[0].f == SYSMIS)
1143 msg (SE, _("SYSMIS is not a valid index value for vector "
1144 "%s. The result will be set to SYSMIS."),
1147 msg (SE, _("%g is not a valid index value for vector %s. "
1148 "The result will be set to SYSMIS."),
1154 sp->f = case_num (c, v->var[rindx - 1]->fv);
1157 case OP_VEC_ELEM_STR:
1159 int rindx = sp[0].f + EPSILON;
1160 const struct vector *vect = dict_get_vector (default_dict, *op++);
1163 if (sp[0].f == SYSMIS || rindx < 1 || rindx > vect->cnt)
1165 if (sp[0].f == SYSMIS)
1166 msg (SE, _("SYSMIS is not a valid index value for vector "
1167 "%s. The result will be set to the empty "
1171 msg (SE, _("%g is not a valid index value for vector %s. "
1172 "The result will be set to the empty string."),
1173 sp[0].f, vect->name);
1174 sp->c = pool_alloc (e->pool, 1);
1179 v = vect->var[rindx - 1];
1180 sp->c = pool_alloc (e->pool, v->width + 1);
1181 sp->c[0] = v->width;
1183 memcpy (&sp->c[1], case_str (c, v->fv), v->width);
1194 sp->c = pool_alloc (e->pool, *str + 1);
1195 memcpy (sp->c, str, *str + 1);
1201 sp->f = case_num (c, (*vars)->fv);
1202 if (is_num_user_missing (sp->f, *vars))
1208 int width = (*vars)->width;
1211 sp->c = pool_alloc (e->pool, width + 1);
1214 memcpy (&sp->c[1], case_str (c, (*vars)->fv), width);
1220 struct ccase *c = lagged_case (*op++);
1227 sp->f = case_num (c, (*vars)->fv);
1228 if (is_num_user_missing (sp->f, *vars))
1236 struct ccase *c = lagged_case (*op++);
1237 int width = (*vars)->width;
1240 sp->c = pool_alloc (e->pool, width + 1);
1244 memset (sp->c, ' ', width);
1246 memcpy (&sp->c[1], case_str (c, (*vars)->fv), width);
1254 sp->f = case_num (c, *op++) == SYSMIS;
1259 sp->f = case_num (c, *op++);
1276 if (e->type != EXPR_STRING)
1278 double value = sp->f;
1279 if (!finite (value))