1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
40 #include "debug-print.h"
43 /* Specialized error routine. */
45 static void dls_error (const struct data_in *, const char *format, ...)
49 vdls_error (const struct data_in *i, const char *format, va_list args)
54 if (i->flags & DI_IGNORE_ERROR)
58 if (!getl_reading_script)
59 ds_puts (&title, _("data-file error: "));
61 ds_printf (&title, _("(column %d"), i->f1);
63 ds_printf (&title, _("(columns %d-%d"), i->f1, i->f2);
64 ds_printf (&title, _(", field type %s) "), fmt_to_string (&i->format));
67 err_location (&e.where);
68 e.title = ds_c_str (&title);
70 err_vmsg (&e, format, args);
76 dls_error (const struct data_in *i, const char *format, ...)
80 va_start (args, format);
81 vdls_error (i, format, args);
85 /* Parsing utility functions. */
87 /* Excludes leading and trailing whitespace from I by adjusting
90 trim_whitespace (struct data_in *i)
92 while (i->s < i->e && isspace (i->s[0]))
95 while (i->s < i->e && isspace (i->e[-1]))
99 /* Returns nonzero if we're not at the end of the string being
102 have_char (struct data_in *i)
107 /* If implied decimal places are enabled, apply them to
110 apply_implied_decimals (struct data_in *i)
112 if ((i->flags & DI_IMPLIED_DECIMALS) && i->format.d > 0)
113 i->v->f /= pow (10., i->format.d);
116 /* Format parsers. */
118 static int parse_int (struct data_in *i, long *result);
120 /* This function is based on strtod() from the GNU C library. */
122 parse_numeric (struct data_in *i)
124 int sign; /* +1 or -1. */
125 double num; /* The number so far. */
127 int got_dot; /* Found a decimal point. */
128 int got_digit; /* Count of digits. */
130 int decimal; /* Decimal point character. */
131 int grouping; /* Grouping character. */
133 long int exponent; /* Number's exponent. */
134 int type; /* Usually same as i->format.type. */
138 type = i->format.type;
139 if (type == FMT_DOLLAR && have_char (i) && *i->s == '$')
148 sign = *i->s == '-' ? -1 : 1;
149 if (*i->s == '-' || *i->s == '+')
157 decimal = get_decimal();
158 grouping = get_grouping();
162 decimal = get_grouping();
163 grouping = get_decimal();
171 for (; have_char (i); i->s++)
177 /* Make sure that multiplication by 10 will not overflow. */
178 if (num > DBL_MAX * 0.1)
179 /* The value of the digit doesn't matter, since we have already
180 gotten as many digits as can be represented in a `double'.
181 This doesn't necessarily mean the result will overflow.
182 The exponent may reduce it to within range.
184 We just need to record that there was another
185 digit so that we can multiply by 10 later. */
188 num = (num * 10.0) + (*i->s - '0');
190 /* Keep track of the number of digits after the decimal point.
191 If we just divided by 10 here, we would lose precision. */
195 else if (!got_dot && *i->s == decimal)
196 /* Record that we have found the decimal point. */
198 else if ((type != FMT_COMMA && type != FMT_DOT) || *i->s != grouping)
199 /* Any other character terminates the number. */
210 dls_error (i, _("Field does not form a valid floating-point constant."));
216 && (tolower (*i->s) == 'e' || tolower (*i->s) == 'd'
217 || (type == FMT_E && (*i->s == '+' || *i->s == '-'))))
219 /* Get the exponent specified after the `e' or `E'. */
224 if (!parse_int (i, &exp))
232 else if (!got_dot && (i->flags & DI_IMPLIED_DECIMALS))
233 exponent -= i->format.d;
235 if (type == FMT_PCT && have_char (i) && *i->s == '%')
239 dls_error (i, _("Field contents followed by garbage."));
250 /* Multiply NUM by 10 to the EXPONENT power, checking for overflow
254 if (-exponent + got_digit > -(DBL_MIN_10_EXP) + 5
255 || num < DBL_MIN * pow (10.0, (double) -exponent))
257 dls_error (i, _("Underflow in floating-point constant."));
262 num *= pow (10.0, (double) exponent);
264 else if (exponent > 0)
266 if (num > DBL_MAX * pow (10.0, (double) -exponent))
268 dls_error (i, _("Overflow in floating-point constant."));
273 num *= pow (10.0, (double) exponent);
276 i->v->f = sign > 0 ? num : -num;
280 /* Returns the integer value of hex digit C. */
284 const char s[] = "0123456789abcdef";
285 const char *cp = strchr (s, tolower ((unsigned char) c));
292 parse_N (struct data_in *i)
294 const unsigned char *cp;
297 for (cp = i->s; cp < i->e; cp++)
301 dls_error (i, _("All characters in field must be digits."));
305 i->v->f = i->v->f * 10.0 + *cp - '0';
308 apply_implied_decimals (i);
313 parse_PIBHEX (struct data_in *i)
316 const unsigned char *cp;
321 for (cp = i->s; cp < i->e; cp++)
325 dls_error (i, _("Unrecognized character in field."));
329 n = n * 16.0 + hexit_value (*cp);
337 parse_RBHEX (struct data_in *i)
339 /* Validate input. */
341 if ((i->e - i->s) % 2)
343 dls_error (i, _("Field must have even length."));
348 const unsigned char *cp;
350 for (cp = i->s; cp < i->e; cp++)
353 dls_error (i, _("Field must contain only hex digits."));
363 unsigned char c[sizeof (double)];
369 memset (u.c, 0, sizeof u.c);
370 for (j = 0; j < min ((i->e - i->s) / 2, sizeof u.d); j++)
371 u.c[j] = 16 * hexit_value (i->s[j * 2]) + hexit_value (i->s[j * 2 + 1]);
380 parse_Z (struct data_in *i)
383 bool got_dot = false;
385 /* Warn user that we suck. */
392 _("Quality of zoned decimal (Z) input format code is "
393 "suspect. Check your results three times. Report bugs "
394 "to %s."),PACKAGE_BUGREPORT);
399 /* Validate input. */
404 dls_error (i, _("Zoned decimal field contains fewer than 2 "
409 /* Copy sign into buf[0]. */
410 if ((i->e[-1] & 0xc0) != 0xc0)
412 dls_error (i, _("Bad sign byte in zoned decimal number."));
415 buf[0] = (i->e[-1] ^ (i->e[-1] >> 1)) & 0x10 ? '-' : '+';
417 /* Copy digits into buf[1 ... len - 1] and terminate string. */
419 const unsigned char *sp;
422 for (sp = i->s, dp = buf + 1; sp < i->e - 1; sp++, dp++)
428 else if ((*sp & 0xf0) == 0xf0 && (*sp & 0xf) < 10)
429 *dp = (*sp & 0xf) + '0';
432 dls_error (i, _("Format error in zoned decimal number."));
439 /* Parse as number. */
443 i->v->f = strtod ((char *) buf, (char **) &tail);
444 if ((unsigned char *) tail != i->e)
446 dls_error (i, _("Error in syntax of zoned decimal number."));
452 apply_implied_decimals (i);
458 parse_IB (struct data_in *i)
465 /* We want the data to be in big-endian format. If this is a
466 little-endian machine, reverse the byte order. */
467 #ifdef WORDS_BIGENDIAN
470 memcpy (buf, i->s, i->e - i->s);
471 mm_reverse (buf, i->e - i->s);
475 /* If the value is negative, we need to logical-NOT each value
486 for (j = 0; j < i->e - i->s; j++)
487 i->v->f = i->v->f * 256.0 + (p[j] ^ xor);
490 /* If the value is negative, add 1 and set the sign, to complete a
491 two's-complement negation. */
493 i->v->f = -(i->v->f + 1.0);
495 apply_implied_decimals (i);
501 parse_PIB (struct data_in *i)
507 for (j = 0; j < i->e - i->s; j++)
508 i->v->f = i->v->f * 256.0 + i->s[j];
510 for (j = i->e - i->s - 1; j >= 0; j--)
511 i->v->f = i->v->f * 256.0 + i->s[j];
514 apply_implied_decimals (i);
520 parse_P (struct data_in *i)
522 const unsigned char *cp;
525 for (cp = i->s; cp < i->e - 1; cp++)
527 i->v->f = i->v->f * 10 + (*cp >> 4);
528 i->v->f = i->v->f * 10 + (*cp & 15);
530 i->v->f = i->v->f * 10 + (*cp >> 4);
531 if ((*cp ^ (*cp >> 1)) & 0x10)
534 apply_implied_decimals (i);
540 parse_PK (struct data_in *i)
542 const unsigned char *cp;
545 for (cp = i->s; cp < i->e; cp++)
547 i->v->f = i->v->f * 10 + (*cp >> 4);
548 i->v->f = i->v->f * 10 + (*cp & 15);
551 apply_implied_decimals (i);
557 parse_RB (struct data_in *i)
562 unsigned char c[sizeof (double)];
566 memset (u.c, 0, sizeof u.c);
567 memcpy (u.c, i->s, min ((int) sizeof (u.c), i->e - i->s));
574 parse_A (struct data_in *i)
576 ptrdiff_t len = i->e - i->s;
578 if (len >= i->format.w)
579 memcpy (i->v->s, i->s, i->format.w);
582 memcpy (i->v->s, i->s, len);
583 memset (i->v->s + len, ' ', i->format.w - len);
590 parse_AHEX (struct data_in *i)
592 /* Validate input. */
594 if ((i->e - i->s) % 2)
596 dls_error (i, _("Field must have even length."));
601 const unsigned char *cp;
603 for (cp = i->s; cp < i->e; cp++)
606 dls_error (i, _("Field must contain only hex digits."));
615 for (j = 0; j < min (i->e - i->s, i->format.w); j += 2)
616 i->v->s[j / 2] = hexit_value (i->s[j]) * 16 + hexit_value (i->s[j + 1]);
617 memset (i->v->s + (i->e - i->s) / 2, ' ', (i->format.w - (i->e - i->s)) / 2);
623 /* Date & time format components. */
625 /* Advances *CP past any whitespace characters. */
627 skip_whitespace (struct data_in *i)
629 while (isspace ((unsigned char) *i->s))
634 parse_leader (struct data_in *i)
641 force_have_char (struct data_in *i)
646 dls_error (i, _("Unexpected end of field."));
651 parse_int (struct data_in *i, long *result)
655 if (!force_have_char (i))
663 else if (*i->s == '-')
670 if (!isdigit (*i->s))
672 dls_error (i, _("Digit expected in field."));
679 *result = *result * 10 + *i->s++ - '0';
680 if (!have_char (i) || !isdigit (*i->s))
690 parse_day (struct data_in *i, long *day)
692 if (!parse_int (i, day))
694 if (*day >= 1 && *day <= 31)
697 dls_error (i, _("Day (%ld) must be between 1 and 31."), *day);
702 parse_day_count (struct data_in *i, long *day_count)
704 return parse_int (i, day_count);
708 parse_date_delimiter (struct data_in *i)
713 && (*i->s == '-' || *i->s == '/' || isspace (*i->s)
714 || *i->s == '.' || *i->s == ','))
722 dls_error (i, _("Delimiter expected between fields in date."));
726 /* Association between a name and a value. */
729 const char *name; /* Name. */
730 bool can_abbreviate; /* True if name may be abbreviated. */
731 int value; /* Value associated with name. */
734 /* Reads a name from I and sets *OUTPUT to the value associated
735 with that name. Returns true if successful, false otherwise. */
737 parse_enum (struct data_in *i, const char *what,
738 const struct enum_name *enum_names,
743 const struct enum_name *ep;
745 /* Consume alphabetic characters. */
748 while (have_char (i) && isalpha (*i->s))
755 dls_error (i, _("Parse error at `%c' expecting %s."), *i->s, what);
759 for (ep = enum_names; ep->name != NULL; ep++)
760 if ((ep->can_abbreviate
761 && lex_id_match_len (ep->name, strlen (ep->name), name, length))
762 || (!ep->can_abbreviate && length == strlen (ep->name)
763 && !memcmp (name, ep->name, length)))
769 dls_error (i, _("Unknown %s `%.*s'."), what, (int) length, name);
774 parse_month (struct data_in *i, long *month)
776 static const struct enum_name month_names[] =
778 {"january", true, 1},
779 {"february", true, 2},
786 {"september", true, 9},
787 {"october", true, 10},
788 {"november", true, 11},
789 {"december", true, 12},
809 if (!force_have_char (i))
814 if (!parse_int (i, month))
816 if (*month >= 1 && *month <= 12)
819 dls_error (i, _("Month (%ld) must be between 1 and 12."), *month);
823 return parse_enum (i, _("month"), month_names, month);
827 parse_year (struct data_in *i, long *year)
829 if (!parse_int (i, year))
832 if (*year >= 0 && *year <= 199)
834 if (*year >= 1582 || *year <= 19999)
837 dls_error (i, _("Year (%ld) must be between 1582 and 19999."), *year);
842 parse_trailer (struct data_in *i)
848 dls_error (i, _("Trailing garbage \"%s\" following date."), i->s);
853 parse_julian (struct data_in *i, long *julian)
855 if (!parse_int (i, julian))
859 int day = *julian % 1000;
861 if (day < 1 || day > 366)
863 dls_error (i, _("Julian day (%d) must be between 1 and 366."), day);
869 int year = *julian / 1000;
871 if (year >= 0 && year <= 199)
873 else if (year < 1582 || year > 19999)
875 dls_error (i, _("Year (%d) must be between 1582 and 19999."), year);
884 parse_quarter (struct data_in *i, long *quarter)
886 if (!parse_int (i, quarter))
888 if (*quarter >= 1 && *quarter <= 4)
891 dls_error (i, _("Quarter (%ld) must be between 1 and 4."), *quarter);
896 parse_q_delimiter (struct data_in *i)
899 if (!have_char (i) || tolower (*i->s) != 'q')
901 dls_error (i, _("`Q' expected between quarter and year."));
910 parse_week (struct data_in *i, long *week)
912 if (!parse_int (i, week))
914 if (*week >= 1 && *week <= 53)
917 dls_error (i, _("Week (%ld) must be between 1 and 53."), *week);
922 parse_wk_delimiter (struct data_in *i)
926 || tolower (i->s[0]) != 'w' || tolower (i->s[1]) != 'k')
928 dls_error (i, _("`WK' expected between week and year."));
937 parse_time_delimiter (struct data_in *i)
942 && (*i->s == ':' || *i->s == '.' || isspace (*i->s)))
951 dls_error (i, _("Delimiter expected between fields in time."));
956 parse_hour (struct data_in *i, long *hour)
958 if (!parse_int (i, hour))
963 dls_error (i, _("Hour (%ld) must be positive."), *hour);
968 parse_minute (struct data_in *i, long *minute)
970 if (!parse_int (i, minute))
972 if (*minute >= 0 && *minute <= 59)
975 dls_error (i, _("Minute (%ld) must be between 0 and 59."), *minute);
980 parse_opt_second (struct data_in *i, double *second)
988 && (*i->s == ':' || *i->s == '.' || isspace (*i->s)))
994 if (!delim || !isdigit (*i->s))
1001 while (have_char (i) && isdigit (*i->s))
1003 if (have_char (i) && *i->s == '.')
1005 while (have_char (i) && isdigit (*i->s))
1009 *second = strtod (buf, NULL);
1015 parse_hour24 (struct data_in *i, long *hour24)
1017 if (!parse_int (i, hour24))
1019 if (*hour24 >= 0 && *hour24 <= 23)
1022 dls_error (i, _("Hour (%ld) must be between 0 and 23."), *hour24);
1028 parse_weekday (struct data_in *i, long *weekday)
1030 static const struct enum_name weekday_names[] =
1032 {"sunday", true, 1},
1034 {"monday", true, 2},
1036 {"tuesday", true, 3},
1038 {"wednesday", true, 4},
1040 {"thursday", true, 5},
1042 {"friday", true, 6},
1044 {"saturday", true, 7},
1050 return parse_enum (i, _("weekday"), weekday_names, weekday);
1054 parse_spaces (struct data_in *i)
1056 skip_whitespace (i);
1061 parse_sign (struct data_in *i, int *sign)
1063 if (!force_have_char (i))
1085 /* Date & time formats. */
1088 calendar_error (void *i_, const char *format, ...)
1090 struct data_in *i = i_;
1093 va_start (args, format);
1094 vdls_error (i, format, args);
1099 ymd_to_ofs (struct data_in *i, int year, int month, int day, double *ofs)
1101 *ofs = calendar_gregorian_to_offset (year, month, day, calendar_error, i);
1102 return *ofs != SYSMIS;
1106 ymd_to_date (struct data_in *i, int year, int month, int day, double *date)
1108 if (ymd_to_ofs (i, year, month, day, date))
1110 *date *= 60. * 60. * 24.;
1118 parse_DATE (struct data_in *i)
1120 long day, month, year;
1122 return (parse_leader (i)
1123 && parse_day (i, &day)
1124 && parse_date_delimiter (i)
1125 && parse_month (i, &month)
1126 && parse_date_delimiter (i)
1127 && parse_year (i, &year)
1128 && parse_trailer (i)
1129 && ymd_to_date (i, year, month, day, &i->v->f));
1133 parse_ADATE (struct data_in *i)
1135 long month, day, year;
1137 return (parse_leader (i)
1138 && parse_month (i, &month)
1139 && parse_date_delimiter (i)
1140 && parse_day (i, &day)
1141 && parse_date_delimiter (i)
1142 && parse_year (i, &year)
1143 && parse_trailer (i)
1144 && ymd_to_date (i, year, month, day, &i->v->f));
1148 parse_EDATE (struct data_in *i)
1150 long month, day, year;
1152 return (parse_leader (i)
1153 && parse_day (i, &day)
1154 && parse_date_delimiter (i)
1155 && parse_month (i, &month)
1156 && parse_date_delimiter (i)
1157 && parse_year (i, &year)
1158 && parse_trailer (i)
1159 && ymd_to_date (i, year, month, day, &i->v->f));
1163 parse_SDATE (struct data_in *i)
1165 long month, day, year;
1167 return (parse_leader (i)
1168 && parse_year (i, &year)
1169 && parse_date_delimiter (i)
1170 && parse_month (i, &month)
1171 && parse_date_delimiter (i)
1172 && parse_day (i, &day)
1173 && parse_trailer (i)
1174 && ymd_to_date (i, year, month, day, &i->v->f));
1178 parse_JDATE (struct data_in *i)
1183 if (!parse_leader (i)
1184 || !parse_julian (i, &julian)
1185 || !parse_trailer (i)
1186 || !ymd_to_ofs (i, julian / 1000, 1, 1, &ofs))
1189 i->v->f = (ofs + julian % 1000 - 1) * 60. * 60. * 24.;
1194 parse_QYR (struct data_in *i)
1198 return (parse_leader (i)
1199 && parse_quarter (i, &quarter)
1200 && parse_q_delimiter (i)
1201 && parse_year (i, &year)
1202 && parse_trailer (i)
1203 && ymd_to_date (i, year, (quarter - 1) * 3 + 1, 1, &i->v->f));
1207 parse_MOYR (struct data_in *i)
1211 return (parse_leader (i)
1212 && parse_month (i, &month)
1213 && parse_date_delimiter (i)
1214 && parse_year (i, &year)
1215 && parse_trailer (i)
1216 && ymd_to_date (i, year, month, 1, &i->v->f));
1220 parse_WKYR (struct data_in *i)
1225 if (!parse_leader (i)
1226 || !parse_week (i, &week)
1227 || !parse_wk_delimiter (i)
1228 || !parse_year (i, &year)
1229 || !parse_trailer (i))
1234 if (!ymd_to_ofs (i, year, 1, 1, &ofs))
1239 if (ymd_to_ofs (i, 1583, 1, 1, &ofs))
1244 i->v->f = (ofs + (week - 1) * 7) * 60. * 60. * 24.;
1249 parse_TIME (struct data_in *i)
1255 if (!parse_leader (i)
1256 || !parse_sign (i, &sign)
1257 || !parse_spaces (i)
1258 || !parse_hour (i, &hour)
1259 || !parse_time_delimiter (i)
1260 || !parse_minute (i, &minute)
1261 || !parse_opt_second (i, &second))
1264 i->v->f = (hour * 60. * 60. + minute * 60. + second) * sign;
1269 parse_DTIME (struct data_in *i)
1272 long day_count, hour;
1276 if (!parse_leader (i)
1277 || !parse_sign (i, &sign)
1278 || !parse_spaces (i)
1279 || !parse_day_count (i, &day_count)
1280 || !parse_time_delimiter (i)
1281 || !parse_hour (i, &hour)
1282 || !parse_time_delimiter (i)
1283 || !parse_minute (i, &minute)
1284 || !parse_opt_second (i, &second))
1287 i->v->f = (day_count * 60. * 60. * 24.
1295 parse_DATETIME (struct data_in *i)
1297 long day, month, year;
1302 if (!parse_leader (i)
1303 || !parse_day (i, &day)
1304 || !parse_date_delimiter (i)
1305 || !parse_month (i, &month)
1306 || !parse_date_delimiter (i)
1307 || !parse_year (i, &year)
1308 || !parse_time_delimiter (i)
1309 || !parse_hour24 (i, &hour24)
1310 || !parse_time_delimiter (i)
1311 || !parse_minute (i, &minute)
1312 || !parse_opt_second (i, &second)
1313 || !ymd_to_date (i, year, month, day, &i->v->f))
1316 i->v->f += hour24 * 60. * 60. + minute * 60. + second;
1321 parse_WKDAY (struct data_in *i)
1325 if (!parse_leader (i)
1326 || !parse_weekday (i, &weekday)
1327 || !parse_trailer (i))
1335 parse_MONTH (struct data_in *i)
1339 if (!parse_leader (i)
1340 || !parse_month (i, &month)
1341 || !parse_trailer (i))
1348 /* Main dispatcher. */
1351 default_result (struct data_in *i)
1353 const struct fmt_desc *const fmt = &formats[i->format.type];
1355 /* Default to SYSMIS or blanks. */
1356 if (fmt->cat & FCAT_STRING)
1357 memset (i->v->s, ' ', i->format.w);
1359 i->v->f = get_blanks();
1363 data_in (struct data_in *i)
1365 const struct fmt_desc *const fmt = &formats[i->format.type];
1367 /* Check that we've got a string to work with. */
1368 if (i->e == i->s || i->format.w <= 0)
1374 i->f2 = i->f1 + (i->e - i->s) - 1;
1376 /* Make sure that the string isn't too long. */
1377 if (i->format.w > fmt->Imax_w)
1379 dls_error (i, _("Field too long (%d characters). Truncated after "
1381 i->format.w, fmt->Imax_w);
1382 i->format.w = fmt->Imax_w;
1385 if (fmt->cat & FCAT_BLANKS_SYSMIS)
1387 const unsigned char *cp;
1397 i->v->f = get_blanks();
1404 static int (*const handlers[FMT_NUMBER_OF_FORMATS])(struct data_in *) =
1406 parse_numeric, parse_N, parse_numeric, parse_numeric,
1407 parse_numeric, parse_numeric, parse_numeric,
1408 parse_Z, parse_A, parse_AHEX, parse_IB, parse_P, parse_PIB,
1409 parse_PIBHEX, parse_PK, parse_RB, parse_RBHEX,
1410 NULL, NULL, NULL, NULL, NULL,
1411 parse_DATE, parse_EDATE, parse_SDATE, parse_ADATE, parse_JDATE,
1412 parse_QYR, parse_MOYR, parse_WKYR,
1413 parse_DATETIME, parse_TIME, parse_DTIME,
1414 parse_WKDAY, parse_MONTH,
1417 int (*handler)(struct data_in *);
1420 handler = handlers[i->format.type];
1421 assert (handler != NULL);
1423 success = handler (i);
1431 /* Utility function. */
1433 /* Sets DI->{s,e} appropriately given that LINE has length LEN and the
1434 field starts at one-based column FC and ends at one-based column
1437 data_in_finite_line (struct data_in *di, const char *line, size_t len,
1440 di->s = line + ((size_t) fc <= len ? fc - 1 : len);
1441 di->e = line + ((size_t) lc <= len ? lc : len);