1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
4 Code for parsing floating-point numbers adapted from GNU C
7 This program is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public License as
9 published by the Free Software Foundation; either version 2 of the
10 License, or (at your option) any later version.
12 This program is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 #include "por-file-reader.h"
35 #include "dictionary.h"
36 #include "file-handle-def.h"
43 #include "value-labels.h"
47 #define _(msgid) gettext (msgid)
49 #include "debug-print.h"
51 /* portable_to_local[PORTABLE] translates the given portable
52 character into the local character set. */
53 static const char portable_to_local[256] =
56 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ."
57 "<(+|&[]!$*);^-/|,%_>?`:$@'=\" ~- 0123456789 -() {}\\ "
61 /* Portable file reader. */
64 struct pool *pool; /* All the portable file state. */
66 jmp_buf bail_out; /* longjmp() target for error handling. */
68 struct file_handle *fh; /* File handle. */
69 FILE *file; /* File stream. */
70 char cc; /* Current character. */
71 char *trans; /* 256-byte character set translation table. */
72 int var_cnt; /* Number of variables. */
73 int weight_index; /* 0-based index of weight variable, or -1. */
74 int *widths; /* Variable widths, 0 for numeric. */
75 int value_cnt; /* Number of `value's per case. */
76 bool ok; /* Set false on I/O error. */
80 error (struct pfm_reader *r, const char *msg,...)
83 /* Displays MSG as an error message and aborts reading the
84 portable file via longjmp(). */
86 error (struct pfm_reader *r, const char *msg, ...)
94 e.where.filename = NULL;
95 e.where.line_number = 0;
96 filename = fh_get_filename (r->fh);
97 e.title = title = pool_alloc (r->pool, strlen (filename) + 80);
98 sprintf (title, _("portable file %s corrupt at offset %ld: "),
99 filename, ftell (r->file));
101 va_start (args, msg);
102 err_vmsg (&e, msg, args);
107 longjmp (r->bail_out, 1);
110 /* Closes portable file reader R, after we're done with it. */
112 pfm_close_reader (struct pfm_reader *r)
115 pool_destroy (r->pool);
118 /* Read a single character into cur_char. */
120 advance (struct pfm_reader *r)
124 while ((c = getc (r->file)) == '\r' || c == '\n')
127 error (r, _("unexpected end of file"));
129 if (r->trans != NULL)
134 /* Skip a single character if present, and return whether it was
137 match (struct pfm_reader *r, int c)
148 static void read_header (struct pfm_reader *);
149 static void read_version_data (struct pfm_reader *, struct pfm_read_info *);
150 static void read_variables (struct pfm_reader *, struct dictionary *);
151 static void read_value_label (struct pfm_reader *, struct dictionary *);
152 void dump_dictionary (struct dictionary *);
154 /* Reads the dictionary from file with handle H, and returns it in a
155 dictionary structure. This dictionary may be modified in order to
156 rename, reorder, and delete variables, etc. */
158 pfm_open_reader (struct file_handle *fh, struct dictionary **dict,
159 struct pfm_read_info *info)
161 struct pool *volatile pool = NULL;
162 struct pfm_reader *volatile r = NULL;
164 *dict = dict_create ();
165 if (!fh_open (fh, FH_REF_FILE, "portable file", "rs"))
168 /* Create and initialize reader. */
169 pool = pool_create ();
170 r = pool_alloc (pool, sizeof *r);
172 if (setjmp (r->bail_out))
175 r->file = pool_fopen (r->pool, fh_get_filename (r->fh), "rb");
176 r->weight_index = -1;
183 /* Check that file open succeeded, prime reading. */
186 msg (ME, _("An error occurred while opening \"%s\" for reading "
187 "as a portable file: %s."),
188 fh_get_filename (r->fh), strerror (errno));
192 /* Read header, version, date info, product id, variables. */
194 read_version_data (r, info);
195 read_variables (r, *dict);
197 /* Read value labels. */
198 while (match (r, 'D'))
199 read_value_label (r, *dict);
201 /* Check that we've made it to the data. */
203 error (r, _("Data record expected."));
208 pfm_close_reader (r);
209 dict_destroy (*dict);
214 /* Returns the value of base-30 digit C,
215 or -1 if C is not a base-30 digit. */
217 base_30_value (unsigned char c)
219 static const char base_30_digits[] = "0123456789ABCDEFGHIJKLMNOPQRST";
220 const char *p = strchr (base_30_digits, c);
221 return p != NULL ? p - base_30_digits : -1;
224 /* Read a floating point value and return its value. */
226 read_float (struct pfm_reader *r)
230 bool got_dot = false; /* Seen a decimal point? */
231 bool got_digit = false; /* Seen any digits? */
232 bool negative = false; /* Number is negative? */
234 /* Skip leading spaces. */
235 while (match (r, ' '))
238 /* `*' indicates system-missing. */
241 advance (r); /* Probably a dot (.) but doesn't appear to matter. */
245 negative = match (r, '-');
248 int digit = base_30_value (r->cc);
253 /* Make sure that multiplication by 30 will not overflow. */
254 if (num > DBL_MAX * (1. / 30.))
255 /* The value of the digit doesn't matter, since we have already
256 gotten as many digits as can be represented in a `double'.
257 This doesn't necessarily mean the result will overflow.
258 The exponent may reduce it to within range.
260 We just need to record that there was another
261 digit so that we can multiply by 10 later. */
264 num = (num * 30.0) + digit;
266 /* Keep track of the number of digits after the decimal point.
267 If we just divided by 30 here, we would lose precision. */
271 else if (!got_dot && r->cc == '.')
272 /* Record that we have found the decimal point. */
275 /* Any other character terminates the number. */
281 /* Check that we had some digits. */
283 error (r, "Number expected.");
285 /* Get exponent if any. */
286 if (r->cc == '+' || r->cc == '-')
289 bool negative_exponent = r->cc == '-';
292 for (advance (r); (digit = base_30_value (r->cc)) != -1; advance (r))
294 if (exp > LONG_MAX / 30)
299 exp = exp * 30 + digit;
302 /* We don't check whether there were actually any digits, but we
304 if (negative_exponent)
309 /* Numbers must end with `/'. */
311 error (r, _("Missing numeric terminator."));
313 /* Multiply `num' by 30 to the `exponent' power, checking for
316 num *= pow (30.0, (double) exponent);
317 else if (exponent > 0)
319 if (num > DBL_MAX * pow (30.0, (double) -exponent))
322 num *= pow (30.0, (double) exponent);
325 return negative ? -num : num;
328 /* Read an integer and return its value. */
330 read_int (struct pfm_reader *r)
332 double f = read_float (r);
333 if (floor (f) != f || f >= INT_MAX || f <= INT_MIN)
334 error (r, _("Invalid integer."));
338 /* Reads a string into BUF, which must have room for 256
341 read_string (struct pfm_reader *r, char *buf)
343 int n = read_int (r);
344 if (n < 0 || n > 255)
345 error (r, _("Bad string length %d."), n);
355 /* Reads a string and returns a copy of it allocated from R's
358 read_pool_string (struct pfm_reader *r)
361 read_string (r, string);
362 return pool_strdup (r->pool, string);
365 /* Reads the 464-byte file header. */
367 read_header (struct pfm_reader *r)
372 /* Read and ignore vanity splash strings. */
373 for (i = 0; i < 200; i++)
376 /* Skip the first 64 characters of the translation table.
377 We don't care about these. They are probably all set to
378 '0', marking them as untranslatable, and that would screw
379 up our actual translation of the real '0'. */
380 for (i = 0; i < 64; i++)
383 /* Read the rest of the translation table. */
384 trans = pool_malloc (r->pool, 256);
385 memset (trans, 0, 256);
394 trans[c] = portable_to_local[i];
397 /* Set up the translation table, then read the first
398 translated character. */
402 /* Skip and verify signature. */
403 for (i = 0; i < 8; i++)
404 if (!match (r, "SPSSPORT"[i]))
406 msg (SE, _("%s: Not a portable file."), fh_get_filename (r->fh));
407 longjmp (r->bail_out, 1);
411 /* Reads the version and date info record, as well as product and
412 subproduct identification records if present. */
414 read_version_data (struct pfm_reader *r, struct pfm_read_info *info)
416 static char empty_string[] = "";
417 char *date, *time, *product, *author, *subproduct;
422 error (r, "Unrecognized version code `%c'.", r->cc);
423 date = read_pool_string (r);
424 time = read_pool_string (r);
425 product = match (r, '1') ? read_pool_string (r) : empty_string;
426 author = match (r, '2') ? read_pool_string (r) : empty_string;
427 subproduct = match (r, '3') ? read_pool_string (r) : empty_string;
430 if (strlen (date) != 8)
431 error (r, _("Bad date string length %d."), strlen (date));
432 if (strlen (time) != 6)
433 error (r, _("Bad time string length %d."), strlen (time));
435 /* Save file info. */
439 for (i = 0; i < 8; i++)
441 static const int map[] = {6, 7, 8, 9, 3, 4, 0, 1};
442 info->creation_date[map[i]] = date[i];
444 info->creation_date[2] = info->creation_date[5] = ' ';
445 info->creation_date[10] = 0;
448 for (i = 0; i < 6; i++)
450 static const int map[] = {0, 1, 3, 4, 6, 7};
451 info->creation_time[map[i]] = time[i];
453 info->creation_time[2] = info->creation_time[5] = ' ';
454 info->creation_time[8] = 0;
457 str_copy_trunc (info->product, sizeof info->product, product);
458 str_copy_trunc (info->subproduct, sizeof info->subproduct, subproduct);
462 /* Translates a format specification read from portable file R as
463 the three integers INTS into a normal format specifier FORMAT,
464 checking that the format is appropriate for variable V. */
466 convert_format (struct pfm_reader *r, const int portable_format[3],
467 struct fmt_spec *format, struct variable *v)
469 format->type = translate_fmt (portable_format[0]);
470 if (format->type == -1)
471 error (r, _("%s: Bad format specifier byte (%d)."),
472 v->name, portable_format[0]);
473 format->w = portable_format[1];
474 format->d = portable_format[2];
476 if (!check_output_specifier (format, false)
477 || !check_specifier_width (format, v->width, false))
478 error (r, _("%s variable %s has invalid format specifier %s."),
479 v->type == NUMERIC ? _("Numeric") : _("String"),
480 v->name, fmt_to_string (format));
483 static union value parse_value (struct pfm_reader *, struct variable *);
485 /* Read information on all the variables. */
487 read_variables (struct pfm_reader *r, struct dictionary *dict)
489 char *weight_name = NULL;
493 error (r, _("Expected variable count record."));
495 r->var_cnt = read_int (r);
496 if (r->var_cnt <= 0 || r->var_cnt == NOT_INT)
497 error (r, _("Invalid number of variables %d."), r->var_cnt);
498 r->widths = pool_nalloc (r->pool, r->var_cnt, sizeof *r->widths);
500 /* Purpose of this value is unknown. It is typically 161. */
505 weight_name = read_pool_string (r);
506 if (strlen (weight_name) > SHORT_NAME_LEN)
507 error (r, _("Weight variable name (%s) truncated."), weight_name);
510 for (i = 0; i < r->var_cnt; i++)
519 error (r, _("Expected variable record."));
521 width = read_int (r);
523 error (r, _("Invalid variable width %d."), width);
524 r->widths[i] = width;
526 read_string (r, name);
527 for (j = 0; j < 6; j++)
528 fmt[j] = read_int (r);
530 if (!var_is_valid_name (name, false) || *name == '#' || *name == '$')
531 error (r, _("position %d: Invalid variable name `%s'."), i, name);
532 str_uppercase (name);
534 if (width < 0 || width > 255)
535 error (r, "Bad width %d for variable %s.", width, name);
537 v = dict_create_var (dict, name, width);
539 error (r, _("Duplicate variable name %s."), name);
541 convert_format (r, &fmt[0], &v->print, v);
542 convert_format (r, &fmt[3], &v->write, v);
544 /* Range missing values. */
547 double x = read_float (r);
548 double y = read_float (r);
549 mv_add_num_range (&v->miss, x, y);
551 else if (match (r, 'A'))
552 mv_add_num_range (&v->miss, read_float (r), HIGHEST);
553 else if (match (r, '9'))
554 mv_add_num_range (&v->miss, LOWEST, read_float (r));
556 /* Single missing values. */
557 while (match (r, '8'))
559 union value value = parse_value (r, v);
560 mv_add_value (&v->miss, &value);
566 read_string (r, label);
567 v->label = xstrdup (label);
571 if (weight_name != NULL)
573 struct variable *weight_var = dict_lookup_var (dict, weight_name);
574 if (weight_var == NULL)
575 error (r, _("Weighting variable %s not present in dictionary."),
578 dict_set_weight (dict, weight_var);
582 /* Parse a value for variable VV into value V. */
584 parse_value (struct pfm_reader *r, struct variable *vv)
588 if (vv->type == ALPHA)
591 read_string (r, string);
592 buf_copy_str_rpad (v.s, 8, string);
595 v.f = read_float (r);
600 /* Parse a value label record and return success. */
602 read_value_label (struct pfm_reader *r, struct dictionary *dict)
614 v = pool_nalloc (r->pool, nv, sizeof *v);
615 for (i = 0; i < nv; i++)
618 read_string (r, name);
620 v[i] = dict_lookup_var (dict, name);
622 error (r, _("Unknown variable %s while parsing value labels."), name);
624 if (v[0]->width != v[i]->width)
625 error (r, _("Cannot assign value labels to %s and %s, which "
626 "have different variable types or widths."),
627 v[0]->name, v[i]->name);
630 n_labels = read_int (r);
631 for (i = 0; i < n_labels; i++)
637 val = parse_value (r, v[0]);
638 read_string (r, label);
640 /* Assign the value_label's to each variable. */
641 for (j = 0; j < nv; j++)
643 struct variable *var = v[j];
645 if (!val_labs_replace (var->val_labs, val, label))
648 if (var->type == NUMERIC)
649 error (r, _("Duplicate label for value %g for variable %s."),
652 error (r, _("Duplicate label for value `%.*s' for variable %s."),
653 var->width, val.s, var->name);
658 /* Reads one case from portable file R into C. */
660 pfm_read_case (struct pfm_reader *r, struct ccase *c)
665 setjmp (r->bail_out);
669 /* Check for end of file. */
674 for (i = 0; i < r->var_cnt; i++)
676 int width = r->widths[i];
680 case_data_rw (c, idx)->f = read_float (r);
686 read_string (r, string);
687 buf_copy_str_rpad (case_data_rw (c, idx)->s, width, string);
688 idx += DIV_RND_UP (width, MAX_SHORT_STRING);
695 /* Returns true if an I/O error has occurred on READER, false
698 pfm_read_error (const struct pfm_reader *reader)
703 /* Returns true if FILE is an SPSS portable file,
706 pfm_detect (FILE *file)
708 unsigned char header[464];
710 int cooked_cnt, raw_cnt;
713 cooked_cnt = raw_cnt = 0;
714 while (cooked_cnt < sizeof header)
717 if (c == EOF || raw_cnt++ > 512)
719 else if (c != '\n' && c != '\r')
720 header[cooked_cnt++] = c;
723 memset (trans, 0, 256);
724 for (i = 64; i < 256; i++)
726 unsigned char c = header[i + 200];
728 trans[c] = portable_to_local[i];
731 for (i = 0; i < 8; i++)
732 if (trans[header[i + 456]] != "SPSSPORT"[i])