1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
4 Code for parsing floating-point numbers adapted from GNU C
7 This program is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public License as
9 published by the Free Software Foundation; either version 2 of the
10 License, or (at your option) any later version.
12 This program is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
35 #include "dictionary.h"
36 #include "file-handle.h"
44 #include "value-labels.h"
47 #include "debug-print.h"
49 /* Portable file reader. */
52 struct pool *pool; /* All the portable file state. */
54 jmp_buf bail_out; /* longjmp() target for error handling. */
56 struct file_handle *fh; /* File handle. */
57 FILE *file; /* File stream. */
58 char cc; /* Current character. */
59 unsigned char *trans; /* 256-byte character set translation table. */
61 int var_cnt; /* Number of variables. */
62 int weight_index; /* 0-based index of weight variable, or -1. */
63 int *widths; /* Variable widths, 0 for numeric. */
64 int value_cnt; /* Number of `value's per case. */
68 error (struct pfm_reader *r, const char *msg,...)
71 /* Displays MSG as an error message and aborts reading the
72 portable file via longjmp(). */
74 error (struct pfm_reader *r, const char *msg, ...)
82 getl_location (&e.where.filename, &e.where.line_number);
83 filename = handle_get_filename (r->fh);
84 e.title = title = pool_alloc (r->pool, strlen (filename) + 80);
85 sprintf (title, _("portable file %s corrupt at offset %ld: "),
86 filename, ftell (r->file));
89 err_vmsg (&e, msg, args);
92 longjmp (r->bail_out, 1);
95 /* Closes portable file reader R, after we're done with it. */
97 pfm_close_reader (struct pfm_reader *r)
100 pool_destroy (r->pool);
103 /* Read a single character into cur_char. */
105 advance (struct pfm_reader *r)
109 while ((c = getc (r->file)) == '\r' || c == '\n')
112 error (r, _("unexpected end of file"));
114 if (r->trans != NULL)
119 /* Skip a single character if present, and return whether it was
122 match (struct pfm_reader *r, int c)
133 static void read_header (struct pfm_reader *);
134 static void read_version_data (struct pfm_reader *, struct pfm_read_info *);
135 static void read_variables (struct pfm_reader *, struct dictionary *);
136 static void read_value_label (struct pfm_reader *, struct dictionary *);
137 void dump_dictionary (struct dictionary *);
139 /* Reads the dictionary from file with handle H, and returns it in a
140 dictionary structure. This dictionary may be modified in order to
141 rename, reorder, and delete variables, etc. */
143 pfm_open_reader (struct file_handle *fh, struct dictionary **dict,
144 struct pfm_read_info *info)
146 struct pool *volatile pool = NULL;
147 struct pfm_reader *volatile r = NULL;
149 *dict = dict_create ();
150 if (!fh_open (fh, "portable file", "rs"))
153 /* Create and initialize reader. */
154 pool = pool_create ();
155 r = pool_alloc (pool, sizeof *r);
157 if (setjmp (r->bail_out))
160 r->file = pool_fopen (r->pool, handle_get_filename (r->fh), "rb");
161 r->weight_index = -1;
167 /* Check that file open succeeded, prime reading. */
170 msg (ME, _("An error occurred while opening \"%s\" for reading "
171 "as a portable file: %s."),
172 handle_get_filename (r->fh), strerror (errno));
177 /* Read header, version, date info, product id, variables. */
179 read_version_data (r, info);
180 read_variables (r, *dict);
182 /* Read value labels. */
183 while (match (r, 'D'))
184 read_value_label (r, *dict);
186 /* Check that we've made it to the data. */
188 error (r, _("Data record expected."));
193 pfm_close_reader (r);
194 dict_destroy (*dict);
199 /* Returns the value of base-30 digit C,
200 or -1 if C is not a base-30 digit. */
202 base_30_value (unsigned char c)
204 static const char base_30_digits[] = "0123456789ABCDEFGHIJKLMNOPQRST";
205 const char *p = strchr (base_30_digits, c);
206 return p != NULL ? p - base_30_digits : -1;
209 /* Read a floating point value and return its value. */
211 read_float (struct pfm_reader *r)
215 bool got_dot = false; /* Seen a decimal point? */
216 bool got_digit = false; /* Seen any digits? */
217 bool negative = false; /* Number is negative? */
219 /* Skip leading spaces. */
220 while (match (r, ' '))
223 /* `*' indicates system-missing. */
226 advance (r); /* Probably a dot (.) but doesn't appear to matter. */
230 negative = match (r, '-');
233 int digit = base_30_value (r->cc);
238 /* Make sure that multiplication by 30 will not overflow. */
239 if (num > DBL_MAX * (1. / 30.))
240 /* The value of the digit doesn't matter, since we have already
241 gotten as many digits as can be represented in a `double'.
242 This doesn't necessarily mean the result will overflow.
243 The exponent may reduce it to within range.
245 We just need to record that there was another
246 digit so that we can multiply by 10 later. */
249 num = (num * 30.0) + digit;
251 /* Keep track of the number of digits after the decimal point.
252 If we just divided by 30 here, we would lose precision. */
256 else if (!got_dot && r->cc == '.')
257 /* Record that we have found the decimal point. */
260 /* Any other character terminates the number. */
266 /* Check that we had some digits. */
268 error (r, "Number expected.");
270 /* Get exponent if any. */
271 if (r->cc == '+' || r->cc == '-')
274 bool negative_exponent = r->cc == '-';
277 for (advance (r); (digit = base_30_value (r->cc)) != -1; advance (r))
279 if (exp > LONG_MAX / 30)
284 exp = exp * 30 + digit;
287 /* We don't check whether there were actually any digits, but we
289 if (negative_exponent)
294 /* Numbers must end with `/'. */
296 error (r, _("Missing numeric terminator."));
298 /* Multiply `num' by 30 to the `exponent' power, checking for
301 num *= pow (30.0, (double) exponent);
302 else if (exponent > 0)
304 if (num > DBL_MAX * pow (30.0, (double) -exponent))
307 num *= pow (30.0, (double) exponent);
310 return negative ? -num : num;
313 /* Read an integer and return its value. */
315 read_int (struct pfm_reader *r)
317 double f = read_float (r);
318 if (floor (f) != f || f >= INT_MAX || f <= INT_MIN)
319 error (r, _("Invalid integer."));
323 /* Reads a string into BUF, which must have room for 256
326 read_string (struct pfm_reader *r, char *buf)
328 int n = read_int (r);
329 if (n < 0 || n > 255)
330 error (r, _("Bad string length %d."), n);
340 /* Reads a string and returns a copy of it allocated from R's
342 static unsigned char *
343 read_pool_string (struct pfm_reader *r)
346 read_string (r, string);
347 return pool_strdup (r->pool, string);
350 /* Reads the 464-byte file header. */
352 read_header (struct pfm_reader *r)
354 /* portable_to_local[PORTABLE] translates the given portable
355 character into the local character set. */
356 static const unsigned char portable_to_local[256] =
359 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ."
360 "<(+|&[]!$*);^-/|,%_>?`:$@'=\" ~- 0123456789 -() {}\\ "
364 unsigned char *trans;
367 /* Read and ignore vanity splash strings. */
368 for (i = 0; i < 200; i++)
371 /* Skip the first 64 characters of the translation table.
372 We don't care about these. They are probably all set to
373 '0', marking them as untranslatable, and that would screw
374 up our actual translation of the real '0'. */
375 for (i = 0; i < 64; i++)
378 /* Read the rest of the translation table. */
379 trans = pool_malloc (r->pool, 256);
380 memset (trans, 0, 256);
389 trans[c] = portable_to_local[i];
392 /* Set up the translation table, then read the first
393 translated character. */
397 /* Skip and verify signature. */
398 for (i = 0; i < 8; i++)
399 if (!match (r, "SPSSPORT"[i]))
401 msg (SE, _("%s: Not a portable file."), handle_get_filename (r->fh));
402 longjmp (r->bail_out, 1);
406 /* Reads the version and date info record, as well as product and
407 subproduct identification records if present. */
409 read_version_data (struct pfm_reader *r, struct pfm_read_info *info)
411 char *date, *time, *product, *subproduct;
416 error (r, "Unrecognized version code `%c'.", r->cc);
417 date = read_pool_string (r);
418 time = read_pool_string (r);
419 product = match (r, '1') ? read_pool_string (r) : (unsigned char *) "";
421 = match (r, '3') ? read_pool_string (r) : (unsigned char *) "";
424 if (strlen (date) != 8)
425 error (r, _("Bad date string length %d."), strlen (date));
426 if (strlen (time) != 6)
427 error (r, _("Bad time string length %d."), strlen (time));
429 /* Save file info. */
433 for (i = 0; i < 8; i++)
435 static const int map[] = {6, 7, 8, 9, 3, 4, 0, 1};
436 info->creation_date[map[i]] = date[i];
438 info->creation_date[2] = info->creation_date[5] = ' ';
439 info->creation_date[10] = 0;
442 for (i = 0; i < 6; i++)
444 static const int map[] = {0, 1, 3, 4, 6, 7};
445 info->creation_time[map[i]] = time[i];
447 info->creation_time[2] = info->creation_time[5] = ' ';
448 info->creation_time[8] = 0;
451 st_trim_copy (info->product, product, sizeof info->product);
452 st_trim_copy (info->subproduct, subproduct, sizeof info->subproduct);
456 /* Translates a format specification read from portable file R as
457 the three integers INTS into a normal format specifier FORMAT,
458 checking that the format is appropriate for variable V. */
460 convert_format (struct pfm_reader *r, const int portable_format[3],
461 struct fmt_spec *format, struct variable *v)
463 format->type = translate_fmt (portable_format[0]);
464 if (format->type == -1)
465 error (r, _("%s: Bad format specifier byte (%d)."),
466 v->name, portable_format[0]);
467 format->w = portable_format[1];
468 format->d = portable_format[2];
470 if (!check_output_specifier (format, false)
471 || !check_specifier_width (format, v->width, false))
472 error (r, _("%s variable %s has invalid format specifier %s."),
473 v->type == NUMERIC ? _("Numeric") : _("String"),
474 v->name, fmt_to_string (format));
477 static union value parse_value (struct pfm_reader *, struct variable *);
479 /* Read information on all the variables. */
481 read_variables (struct pfm_reader *r, struct dictionary *dict)
483 char *weight_name = NULL;
487 error (r, _("Expected variable count record."));
489 r->var_cnt = read_int (r);
490 if (r->var_cnt <= 0 || r->var_cnt == NOT_INT)
491 error (r, _("Invalid number of variables %d."), r->var_cnt);
492 r->widths = pool_alloc (r->pool, sizeof *r->widths * r->var_cnt);
494 /* Purpose of this value is unknown. It is typically 161. */
499 weight_name = read_pool_string (r);
500 if (strlen (weight_name) > SHORT_NAME_LEN)
501 error (r, _("Weight variable name (%s) truncated."), weight_name);
504 for (i = 0; i < r->var_cnt; i++)
513 error (r, _("Expected variable record."));
515 width = read_int (r);
517 error (r, _("Invalid variable width %d."), width);
518 r->widths[i] = width;
520 read_string (r, name);
521 for (j = 0; j < 6; j++)
522 fmt[j] = read_int (r);
524 /* Weirdly enough, there is no # character in the SPSS portable
525 character set, so we can't check for it. */
526 if (strlen (name) > SHORT_NAME_LEN)
527 lose ((r, _("position %d: Variable name has %u characters."),
529 if ((name[0] < 74 /* A */ || name[0] > 125 /* Z */)
530 && name[0] != 152 /* @ */)
531 lose ((r, _("position %d: Variable name begins with invalid "
533 if (name[0] >= 100 /* a */ && name[0] <= 125 /* z */)
535 corrupt_msg (r, _("position %d: Variable name begins with "
536 "lowercase letter %c."),
537 i, name[0] - 100 + 'a');
538 name[0] -= 26 /* a - A */;
541 /* Verify remaining characters of variable name. */
542 for (j = 1; j < (int) strlen (name); j++)
546 if (!var_is_valid_name (name, false) || *name == '#')
547 error (r, _("position %d: Invalid variable name `%s'."), name);
550 if (width < 0 || width > 255)
551 error (r, "Bad width %d for variable %s.", width, name);
553 v = dict_create_var (dict, name, width);
555 error (r, _("Duplicate variable name %s."), name);
557 convert_format (r, &fmt[0], &v->print, v);
558 convert_format (r, &fmt[3], &v->write, v);
560 /* Range missing values. */
563 v->miss_type = MISSING_RANGE;
564 v->missing[0] = parse_value (r, v);
565 v->missing[1] = parse_value (r, v);
567 else if (match (r, 'A'))
569 v->miss_type = MISSING_HIGH;
570 v->missing[0] = parse_value (r, v);
572 else if (match (r, '9'))
574 v->miss_type = MISSING_LOW;
575 v->missing[0] = parse_value (r, v);
578 /* Single missing values. */
579 while (match (r, '8'))
581 static const int map_next[MISSING_COUNT] =
583 MISSING_1, MISSING_2, MISSING_3, -1,
584 MISSING_RANGE_1, MISSING_LOW_1, MISSING_HIGH_1,
588 static const int map_ofs[MISSING_COUNT] =
590 -1, 0, 1, 2, -1, -1, -1, 2, 1, 1,
593 v->miss_type = map_next[v->miss_type];
594 if (v->miss_type == -1)
595 error (r, _("Bad missing values for %s."), v->name);
597 assert (map_ofs[v->miss_type] != -1);
598 v->missing[map_ofs[v->miss_type]] = parse_value (r, v);
604 read_string (r, label);
605 v->label = xstrdup (label);
609 if (weight_name != NULL)
611 struct variable *weight_var = dict_lookup_var (dict, weight_name);
612 if (weight_var == NULL)
613 error (r, _("Weighting variable %s not present in dictionary."),
616 dict_set_weight (dict, weight_var);
620 /* Parse a value for variable VV into value V. */
622 parse_value (struct pfm_reader *r, struct variable *vv)
626 if (vv->type == ALPHA)
629 read_string (r, string);
630 st_bare_pad_copy (v.s, string, 8);
633 v.f = read_float (r);
638 /* Parse a value label record and return success. */
640 read_value_label (struct pfm_reader *r, struct dictionary *dict)
652 v = pool_alloc (r->pool, sizeof *v * nv);
653 for (i = 0; i < nv; i++)
656 read_string (r, name);
658 v[i] = dict_lookup_var (dict, name);
660 error (r, _("Unknown variable %s while parsing value labels."), name);
662 if (v[0]->width != v[i]->width)
663 error (r, _("Cannot assign value labels to %s and %s, which "
664 "have different variable types or widths."),
665 v[0]->name, v[i]->name);
668 n_labels = read_int (r);
669 for (i = 0; i < n_labels; i++)
675 val = parse_value (r, v[0]);
676 read_string (r, label);
678 /* Assign the value_label's to each variable. */
679 for (j = 0; j < nv; j++)
681 struct variable *var = v[j];
683 if (!val_labs_replace (var->val_labs, val, label))
686 if (var->type == NUMERIC)
687 error (r, _("Duplicate label for value %g for variable %s."),
690 error (r, _("Duplicate label for value `%.*s' for variable %s."),
691 var->width, val.s, var->name);
696 /* Reads one case from portable file R into C. */
698 pfm_read_case (struct pfm_reader *r, struct ccase *c)
703 if (setjmp (r->bail_out))
706 /* Check for end of file. */
711 for (i = 0; i < r->var_cnt; i++)
713 int width = r->widths[i];
717 case_data_rw (c, idx)->f = read_float (r);
723 read_string (r, string);
724 st_bare_pad_copy (case_data_rw (c, idx)->s, string, width);
725 idx += DIV_RND_UP (width, MAX_SHORT_STRING);