1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
4 Code for parsing floating-point numbers adapted from GNU C
7 This program is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public License as
9 published by the Free Software Foundation; either version 2 of the
10 License, or (at your option) any later version.
12 This program is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 #include "por-file-reader.h"
24 #include <libpspp/message.h>
32 #include <libpspp/alloc.h>
35 #include <libpspp/compiler.h>
36 #include "dictionary.h"
37 #include "file-handle-def.h"
39 #include <libpspp/hash.h>
40 #include <libpspp/magic.h>
41 #include <libpspp/misc.h>
42 #include <libpspp/pool.h>
43 #include <libpspp/str.h>
44 #include "value-labels.h"
48 #define _(msgid) gettext (msgid)
50 /* portable_to_local[PORTABLE] translates the given portable
51 character into the local character set. */
52 static const char portable_to_local[256] =
55 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ."
56 "<(+|&[]!$*);^-/|,%_>?`:$@'=\" ~- 0123456789 -() {}\\ "
60 /* Portable file reader. */
63 struct pool *pool; /* All the portable file state. */
65 jmp_buf bail_out; /* longjmp() target for error handling. */
67 struct file_handle *fh; /* File handle. */
68 FILE *file; /* File stream. */
69 char cc; /* Current character. */
70 char *trans; /* 256-byte character set translation table. */
71 int var_cnt; /* Number of variables. */
72 int weight_index; /* 0-based index of weight variable, or -1. */
73 int *widths; /* Variable widths, 0 for numeric. */
74 int value_cnt; /* Number of `value's per case. */
75 bool ok; /* Set false on I/O error. */
79 error (struct pfm_reader *r, const char *msg,...)
83 /* Displays MSG as an error message and aborts reading the
84 portable file via longjmp(). */
86 error (struct pfm_reader *r, const char *msg, ...)
89 const char *file_name;
93 e.category = MSG_GENERAL;
94 e.severity = MSG_ERROR;
95 e.where.file_name = NULL;
96 e.where.line_number = 0;
97 file_name = fh_get_file_name (r->fh);
98 e.title = title = pool_alloc (r->pool, strlen (file_name) + 80);
99 sprintf (title, _("portable file %s corrupt at offset %ld: "),
100 file_name, ftell (r->file));
102 va_start (args, msg);
103 err_vmsg (&e, msg, args);
108 longjmp (r->bail_out, 1);
111 /* Closes portable file reader R, after we're done with it. */
113 pfm_close_reader (struct pfm_reader *r)
116 pool_destroy (r->pool);
119 /* Read a single character into cur_char. */
121 advance (struct pfm_reader *r)
125 while ((c = getc (r->file)) == '\r' || c == '\n')
128 error (r, _("unexpected end of file"));
130 if (r->trans != NULL)
135 /* Skip a single character if present, and return whether it was
138 match (struct pfm_reader *r, int c)
149 static void read_header (struct pfm_reader *);
150 static void read_version_data (struct pfm_reader *, struct pfm_read_info *);
151 static void read_variables (struct pfm_reader *, struct dictionary *);
152 static void read_value_label (struct pfm_reader *, struct dictionary *);
153 void dump_dictionary (struct dictionary *);
155 /* Reads the dictionary from file with handle H, and returns it in a
156 dictionary structure. This dictionary may be modified in order to
157 rename, reorder, and delete variables, etc. */
159 pfm_open_reader (struct file_handle *fh, struct dictionary **dict,
160 struct pfm_read_info *info)
162 struct pool *volatile pool = NULL;
163 struct pfm_reader *volatile r = NULL;
165 *dict = dict_create ();
166 if (!fh_open (fh, FH_REF_FILE, "portable file", "rs"))
169 /* Create and initialize reader. */
170 pool = pool_create ();
171 r = pool_alloc (pool, sizeof *r);
173 if (setjmp (r->bail_out))
176 r->file = pool_fopen (r->pool, fh_get_file_name (r->fh), "rb");
177 r->weight_index = -1;
184 /* Check that file open succeeded, prime reading. */
187 msg (ME, _("An error occurred while opening \"%s\" for reading "
188 "as a portable file: %s."),
189 fh_get_file_name (r->fh), strerror (errno));
193 /* Read header, version, date info, product id, variables. */
195 read_version_data (r, info);
196 read_variables (r, *dict);
198 /* Read value labels. */
199 while (match (r, 'D'))
200 read_value_label (r, *dict);
202 /* Check that we've made it to the data. */
204 error (r, _("Data record expected."));
209 pfm_close_reader (r);
210 dict_destroy (*dict);
215 /* Returns the value of base-30 digit C,
216 or -1 if C is not a base-30 digit. */
218 base_30_value (unsigned char c)
220 static const char base_30_digits[] = "0123456789ABCDEFGHIJKLMNOPQRST";
221 const char *p = strchr (base_30_digits, c);
222 return p != NULL ? p - base_30_digits : -1;
225 /* Read a floating point value and return its value. */
227 read_float (struct pfm_reader *r)
231 bool got_dot = false; /* Seen a decimal point? */
232 bool got_digit = false; /* Seen any digits? */
233 bool negative = false; /* Number is negative? */
235 /* Skip leading spaces. */
236 while (match (r, ' '))
239 /* `*' indicates system-missing. */
242 advance (r); /* Probably a dot (.) but doesn't appear to matter. */
246 negative = match (r, '-');
249 int digit = base_30_value (r->cc);
254 /* Make sure that multiplication by 30 will not overflow. */
255 if (num > DBL_MAX * (1. / 30.))
256 /* The value of the digit doesn't matter, since we have already
257 gotten as many digits as can be represented in a `double'.
258 This doesn't necessarily mean the result will overflow.
259 The exponent may reduce it to within range.
261 We just need to record that there was another
262 digit so that we can multiply by 10 later. */
265 num = (num * 30.0) + digit;
267 /* Keep track of the number of digits after the decimal point.
268 If we just divided by 30 here, we would lose precision. */
272 else if (!got_dot && r->cc == '.')
273 /* Record that we have found the decimal point. */
276 /* Any other character terminates the number. */
282 /* Check that we had some digits. */
284 error (r, "Number expected.");
286 /* Get exponent if any. */
287 if (r->cc == '+' || r->cc == '-')
290 bool negative_exponent = r->cc == '-';
293 for (advance (r); (digit = base_30_value (r->cc)) != -1; advance (r))
295 if (exp > LONG_MAX / 30)
300 exp = exp * 30 + digit;
303 /* We don't check whether there were actually any digits, but we
305 if (negative_exponent)
310 /* Numbers must end with `/'. */
312 error (r, _("Missing numeric terminator."));
314 /* Multiply `num' by 30 to the `exponent' power, checking for
317 num *= pow (30.0, (double) exponent);
318 else if (exponent > 0)
320 if (num > DBL_MAX * pow (30.0, (double) -exponent))
323 num *= pow (30.0, (double) exponent);
326 return negative ? -num : num;
329 /* Read an integer and return its value. */
331 read_int (struct pfm_reader *r)
333 double f = read_float (r);
334 if (floor (f) != f || f >= INT_MAX || f <= INT_MIN)
335 error (r, _("Invalid integer."));
339 /* Reads a string into BUF, which must have room for 256
342 read_string (struct pfm_reader *r, char *buf)
344 int n = read_int (r);
345 if (n < 0 || n > 255)
346 error (r, _("Bad string length %d."), n);
356 /* Reads a string and returns a copy of it allocated from R's
359 read_pool_string (struct pfm_reader *r)
362 read_string (r, string);
363 return pool_strdup (r->pool, string);
366 /* Reads the 464-byte file header. */
368 read_header (struct pfm_reader *r)
373 /* Read and ignore vanity splash strings. */
374 for (i = 0; i < 200; i++)
377 /* Skip the first 64 characters of the translation table.
378 We don't care about these. They are probably all set to
379 '0', marking them as untranslatable, and that would screw
380 up our actual translation of the real '0'. */
381 for (i = 0; i < 64; i++)
384 /* Read the rest of the translation table. */
385 trans = pool_malloc (r->pool, 256);
386 memset (trans, 0, 256);
395 trans[c] = portable_to_local[i];
398 /* Set up the translation table, then read the first
399 translated character. */
403 /* Skip and verify signature. */
404 for (i = 0; i < 8; i++)
405 if (!match (r, "SPSSPORT"[i]))
407 msg (SE, _("%s: Not a portable file."), fh_get_file_name (r->fh));
408 longjmp (r->bail_out, 1);
412 /* Reads the version and date info record, as well as product and
413 subproduct identification records if present. */
415 read_version_data (struct pfm_reader *r, struct pfm_read_info *info)
417 static char empty_string[] = "";
418 char *date, *time, *product, *author, *subproduct;
423 error (r, "Unrecognized version code `%c'.", r->cc);
424 date = read_pool_string (r);
425 time = read_pool_string (r);
426 product = match (r, '1') ? read_pool_string (r) : empty_string;
427 author = match (r, '2') ? read_pool_string (r) : empty_string;
428 subproduct = match (r, '3') ? read_pool_string (r) : empty_string;
431 if (strlen (date) != 8)
432 error (r, _("Bad date string length %d."), strlen (date));
433 if (strlen (time) != 6)
434 error (r, _("Bad time string length %d."), strlen (time));
436 /* Save file info. */
440 for (i = 0; i < 8; i++)
442 static const int map[] = {6, 7, 8, 9, 3, 4, 0, 1};
443 info->creation_date[map[i]] = date[i];
445 info->creation_date[2] = info->creation_date[5] = ' ';
446 info->creation_date[10] = 0;
449 for (i = 0; i < 6; i++)
451 static const int map[] = {0, 1, 3, 4, 6, 7};
452 info->creation_time[map[i]] = time[i];
454 info->creation_time[2] = info->creation_time[5] = ' ';
455 info->creation_time[8] = 0;
458 str_copy_trunc (info->product, sizeof info->product, product);
459 str_copy_trunc (info->subproduct, sizeof info->subproduct, subproduct);
463 /* Translates a format specification read from portable file R as
464 the three integers INTS into a normal format specifier FORMAT,
465 checking that the format is appropriate for variable V. */
467 convert_format (struct pfm_reader *r, const int portable_format[3],
468 struct fmt_spec *format, struct variable *v)
470 format->type = translate_fmt (portable_format[0]);
471 if (format->type == -1)
472 error (r, _("%s: Bad format specifier byte (%d)."),
473 v->name, portable_format[0]);
474 format->w = portable_format[1];
475 format->d = portable_format[2];
477 if (!check_output_specifier (format, false)
478 || !check_specifier_width (format, v->width, false))
479 error (r, _("%s variable %s has invalid format specifier %s."),
480 v->type == NUMERIC ? _("Numeric") : _("String"),
481 v->name, fmt_to_string (format));
484 static union value parse_value (struct pfm_reader *, struct variable *);
486 /* Read information on all the variables. */
488 read_variables (struct pfm_reader *r, struct dictionary *dict)
490 char *weight_name = NULL;
494 error (r, _("Expected variable count record."));
496 r->var_cnt = read_int (r);
497 if (r->var_cnt <= 0 || r->var_cnt == NOT_INT)
498 error (r, _("Invalid number of variables %d."), r->var_cnt);
499 r->widths = pool_nalloc (r->pool, r->var_cnt, sizeof *r->widths);
501 /* Purpose of this value is unknown. It is typically 161. */
506 weight_name = read_pool_string (r);
507 if (strlen (weight_name) > SHORT_NAME_LEN)
508 error (r, _("Weight variable name (%s) truncated."), weight_name);
511 for (i = 0; i < r->var_cnt; i++)
520 error (r, _("Expected variable record."));
522 width = read_int (r);
524 error (r, _("Invalid variable width %d."), width);
525 r->widths[i] = width;
527 read_string (r, name);
528 for (j = 0; j < 6; j++)
529 fmt[j] = read_int (r);
531 if (!var_is_valid_name (name, false) || *name == '#' || *name == '$')
532 error (r, _("position %d: Invalid variable name `%s'."), i, name);
533 str_uppercase (name);
535 if (width < 0 || width > 255)
536 error (r, "Bad width %d for variable %s.", width, name);
538 v = dict_create_var (dict, name, width);
540 error (r, _("Duplicate variable name %s."), name);
542 convert_format (r, &fmt[0], &v->print, v);
543 convert_format (r, &fmt[3], &v->write, v);
545 /* Range missing values. */
548 double x = read_float (r);
549 double y = read_float (r);
550 mv_add_num_range (&v->miss, x, y);
552 else if (match (r, 'A'))
553 mv_add_num_range (&v->miss, read_float (r), HIGHEST);
554 else if (match (r, '9'))
555 mv_add_num_range (&v->miss, LOWEST, read_float (r));
557 /* Single missing values. */
558 while (match (r, '8'))
560 union value value = parse_value (r, v);
561 mv_add_value (&v->miss, &value);
567 read_string (r, label);
568 v->label = xstrdup (label);
572 if (weight_name != NULL)
574 struct variable *weight_var = dict_lookup_var (dict, weight_name);
575 if (weight_var == NULL)
576 error (r, _("Weighting variable %s not present in dictionary."),
579 dict_set_weight (dict, weight_var);
583 /* Parse a value for variable VV into value V. */
585 parse_value (struct pfm_reader *r, struct variable *vv)
589 if (vv->type == ALPHA)
592 read_string (r, string);
593 buf_copy_str_rpad (v.s, 8, string);
596 v.f = read_float (r);
601 /* Parse a value label record and return success. */
603 read_value_label (struct pfm_reader *r, struct dictionary *dict)
615 v = pool_nalloc (r->pool, nv, sizeof *v);
616 for (i = 0; i < nv; i++)
619 read_string (r, name);
621 v[i] = dict_lookup_var (dict, name);
623 error (r, _("Unknown variable %s while parsing value labels."), name);
625 if (v[0]->width != v[i]->width)
626 error (r, _("Cannot assign value labels to %s and %s, which "
627 "have different variable types or widths."),
628 v[0]->name, v[i]->name);
631 n_labels = read_int (r);
632 for (i = 0; i < n_labels; i++)
638 val = parse_value (r, v[0]);
639 read_string (r, label);
641 /* Assign the value_label's to each variable. */
642 for (j = 0; j < nv; j++)
644 struct variable *var = v[j];
646 if (!val_labs_replace (var->val_labs, val, label))
649 if (var->type == NUMERIC)
650 error (r, _("Duplicate label for value %g for variable %s."),
653 error (r, _("Duplicate label for value `%.*s' for variable %s."),
654 var->width, val.s, var->name);
659 /* Reads one case from portable file R into C. */
661 pfm_read_case (struct pfm_reader *r, struct ccase *c)
666 setjmp (r->bail_out);
670 /* Check for end of file. */
675 for (i = 0; i < r->var_cnt; i++)
677 int width = r->widths[i];
681 case_data_rw (c, idx)->f = read_float (r);
687 read_string (r, string);
688 buf_copy_str_rpad (case_data_rw (c, idx)->s, width, string);
689 idx += DIV_RND_UP (width, MAX_SHORT_STRING);
696 /* Returns true if an I/O error has occurred on READER, false
699 pfm_read_error (const struct pfm_reader *reader)
704 /* Returns true if FILE is an SPSS portable file,
707 pfm_detect (FILE *file)
709 unsigned char header[464];
711 int cooked_cnt, raw_cnt;
714 cooked_cnt = raw_cnt = 0;
715 while (cooked_cnt < sizeof header)
718 if (c == EOF || raw_cnt++ > 512)
720 else if (c != '\n' && c != '\r')
721 header[cooked_cnt++] = c;
724 memset (trans, 0, 256);
725 for (i = 64; i < 256; i++)
727 unsigned char c = header[i + 200];
729 trans[c] = portable_to_local[i];
732 for (i = 0; i < 8; i++)
733 if (trans[header[i + 456]] != "SPSSPORT"[i])