1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
4 Code for parsing floating-point numbers adapted from GNU C
7 This program is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public License as
9 published by the Free Software Foundation; either version 2 of the
10 License, or (at your option) any later version.
12 This program is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
35 #include "dictionary.h"
36 #include "file-handle.h"
44 #include "value-labels.h"
48 #define _(msgid) gettext (msgid)
50 #include "debug-print.h"
52 /* Portable file reader. */
55 struct pool *pool; /* All the portable file state. */
57 jmp_buf bail_out; /* longjmp() target for error handling. */
59 struct file_handle *fh; /* File handle. */
60 FILE *file; /* File stream. */
61 char cc; /* Current character. */
62 unsigned char *trans; /* 256-byte character set translation table. */
64 int var_cnt; /* Number of variables. */
65 int weight_index; /* 0-based index of weight variable, or -1. */
66 int *widths; /* Variable widths, 0 for numeric. */
67 int value_cnt; /* Number of `value's per case. */
71 error (struct pfm_reader *r, const char *msg,...)
74 /* Displays MSG as an error message and aborts reading the
75 portable file via longjmp(). */
77 error (struct pfm_reader *r, const char *msg, ...)
85 getl_location (&e.where.filename, &e.where.line_number);
86 filename = handle_get_filename (r->fh);
87 e.title = title = pool_alloc (r->pool, strlen (filename) + 80);
88 sprintf (title, _("portable file %s corrupt at offset %ld: "),
89 filename, ftell (r->file));
92 err_vmsg (&e, msg, args);
95 longjmp (r->bail_out, 1);
98 /* Closes portable file reader R, after we're done with it. */
100 pfm_close_reader (struct pfm_reader *r)
103 pool_destroy (r->pool);
106 /* Read a single character into cur_char. */
108 advance (struct pfm_reader *r)
112 while ((c = getc (r->file)) == '\r' || c == '\n')
115 error (r, _("unexpected end of file"));
117 if (r->trans != NULL)
122 /* Skip a single character if present, and return whether it was
125 match (struct pfm_reader *r, int c)
136 static void read_header (struct pfm_reader *);
137 static void read_version_data (struct pfm_reader *, struct pfm_read_info *);
138 static void read_variables (struct pfm_reader *, struct dictionary *);
139 static void read_value_label (struct pfm_reader *, struct dictionary *);
140 void dump_dictionary (struct dictionary *);
142 /* Reads the dictionary from file with handle H, and returns it in a
143 dictionary structure. This dictionary may be modified in order to
144 rename, reorder, and delete variables, etc. */
146 pfm_open_reader (struct file_handle *fh, struct dictionary **dict,
147 struct pfm_read_info *info)
149 struct pool *volatile pool = NULL;
150 struct pfm_reader *volatile r = NULL;
152 *dict = dict_create ();
153 if (!fh_open (fh, "portable file", "rs"))
156 /* Create and initialize reader. */
157 pool = pool_create ();
158 r = pool_alloc (pool, sizeof *r);
160 if (setjmp (r->bail_out))
163 r->file = pool_fopen (r->pool, handle_get_filename (r->fh), "rb");
164 r->weight_index = -1;
170 /* Check that file open succeeded, prime reading. */
173 msg (ME, _("An error occurred while opening \"%s\" for reading "
174 "as a portable file: %s."),
175 handle_get_filename (r->fh), strerror (errno));
180 /* Read header, version, date info, product id, variables. */
182 read_version_data (r, info);
183 read_variables (r, *dict);
185 /* Read value labels. */
186 while (match (r, 'D'))
187 read_value_label (r, *dict);
189 /* Check that we've made it to the data. */
191 error (r, _("Data record expected."));
196 pfm_close_reader (r);
197 dict_destroy (*dict);
202 /* Returns the value of base-30 digit C,
203 or -1 if C is not a base-30 digit. */
205 base_30_value (unsigned char c)
207 static const char base_30_digits[] = "0123456789ABCDEFGHIJKLMNOPQRST";
208 const char *p = strchr (base_30_digits, c);
209 return p != NULL ? p - base_30_digits : -1;
212 /* Read a floating point value and return its value. */
214 read_float (struct pfm_reader *r)
218 bool got_dot = false; /* Seen a decimal point? */
219 bool got_digit = false; /* Seen any digits? */
220 bool negative = false; /* Number is negative? */
222 /* Skip leading spaces. */
223 while (match (r, ' '))
226 /* `*' indicates system-missing. */
229 advance (r); /* Probably a dot (.) but doesn't appear to matter. */
233 negative = match (r, '-');
236 int digit = base_30_value (r->cc);
241 /* Make sure that multiplication by 30 will not overflow. */
242 if (num > DBL_MAX * (1. / 30.))
243 /* The value of the digit doesn't matter, since we have already
244 gotten as many digits as can be represented in a `double'.
245 This doesn't necessarily mean the result will overflow.
246 The exponent may reduce it to within range.
248 We just need to record that there was another
249 digit so that we can multiply by 10 later. */
252 num = (num * 30.0) + digit;
254 /* Keep track of the number of digits after the decimal point.
255 If we just divided by 30 here, we would lose precision. */
259 else if (!got_dot && r->cc == '.')
260 /* Record that we have found the decimal point. */
263 /* Any other character terminates the number. */
269 /* Check that we had some digits. */
271 error (r, "Number expected.");
273 /* Get exponent if any. */
274 if (r->cc == '+' || r->cc == '-')
277 bool negative_exponent = r->cc == '-';
280 for (advance (r); (digit = base_30_value (r->cc)) != -1; advance (r))
282 if (exp > LONG_MAX / 30)
287 exp = exp * 30 + digit;
290 /* We don't check whether there were actually any digits, but we
292 if (negative_exponent)
297 /* Numbers must end with `/'. */
299 error (r, _("Missing numeric terminator."));
301 /* Multiply `num' by 30 to the `exponent' power, checking for
304 num *= pow (30.0, (double) exponent);
305 else if (exponent > 0)
307 if (num > DBL_MAX * pow (30.0, (double) -exponent))
310 num *= pow (30.0, (double) exponent);
313 return negative ? -num : num;
316 /* Read an integer and return its value. */
318 read_int (struct pfm_reader *r)
320 double f = read_float (r);
321 if (floor (f) != f || f >= INT_MAX || f <= INT_MIN)
322 error (r, _("Invalid integer."));
326 /* Reads a string into BUF, which must have room for 256
329 read_string (struct pfm_reader *r, char *buf)
331 int n = read_int (r);
332 if (n < 0 || n > 255)
333 error (r, _("Bad string length %d."), n);
343 /* Reads a string and returns a copy of it allocated from R's
345 static unsigned char *
346 read_pool_string (struct pfm_reader *r)
349 read_string (r, string);
350 return pool_strdup (r->pool, string);
353 /* Reads the 464-byte file header. */
355 read_header (struct pfm_reader *r)
357 /* portable_to_local[PORTABLE] translates the given portable
358 character into the local character set. */
359 static const unsigned char portable_to_local[256] =
362 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ."
363 "<(+|&[]!$*);^-/|,%_>?`:$@'=\" ~- 0123456789 -() {}\\ "
367 unsigned char *trans;
370 /* Read and ignore vanity splash strings. */
371 for (i = 0; i < 200; i++)
374 /* Skip the first 64 characters of the translation table.
375 We don't care about these. They are probably all set to
376 '0', marking them as untranslatable, and that would screw
377 up our actual translation of the real '0'. */
378 for (i = 0; i < 64; i++)
381 /* Read the rest of the translation table. */
382 trans = pool_malloc (r->pool, 256);
383 memset (trans, 0, 256);
392 trans[c] = portable_to_local[i];
395 /* Set up the translation table, then read the first
396 translated character. */
400 /* Skip and verify signature. */
401 for (i = 0; i < 8; i++)
402 if (!match (r, "SPSSPORT"[i]))
404 msg (SE, _("%s: Not a portable file."), handle_get_filename (r->fh));
405 longjmp (r->bail_out, 1);
409 /* Reads the version and date info record, as well as product and
410 subproduct identification records if present. */
412 read_version_data (struct pfm_reader *r, struct pfm_read_info *info)
414 char *date, *time, *product, *author, *subproduct;
419 error (r, "Unrecognized version code `%c'.", r->cc);
420 date = read_pool_string (r);
421 time = read_pool_string (r);
422 product = match (r, '1') ? read_pool_string (r) : (unsigned char *) "";
423 author = match (r, '2') ? read_pool_string (r) : (unsigned char *) "";
425 = match (r, '3') ? read_pool_string (r) : (unsigned char *) "";
428 if (strlen (date) != 8)
429 error (r, _("Bad date string length %d."), strlen (date));
430 if (strlen (time) != 6)
431 error (r, _("Bad time string length %d."), strlen (time));
433 /* Save file info. */
437 for (i = 0; i < 8; i++)
439 static const int map[] = {6, 7, 8, 9, 3, 4, 0, 1};
440 info->creation_date[map[i]] = date[i];
442 info->creation_date[2] = info->creation_date[5] = ' ';
443 info->creation_date[10] = 0;
446 for (i = 0; i < 6; i++)
448 static const int map[] = {0, 1, 3, 4, 6, 7};
449 info->creation_time[map[i]] = time[i];
451 info->creation_time[2] = info->creation_time[5] = ' ';
452 info->creation_time[8] = 0;
455 str_copy_trunc (info->product, sizeof info->product, product);
456 str_copy_trunc (info->subproduct, sizeof info->subproduct, subproduct);
460 /* Translates a format specification read from portable file R as
461 the three integers INTS into a normal format specifier FORMAT,
462 checking that the format is appropriate for variable V. */
464 convert_format (struct pfm_reader *r, const int portable_format[3],
465 struct fmt_spec *format, struct variable *v)
467 format->type = translate_fmt (portable_format[0]);
468 if (format->type == -1)
469 error (r, _("%s: Bad format specifier byte (%d)."),
470 v->name, portable_format[0]);
471 format->w = portable_format[1];
472 format->d = portable_format[2];
474 if (!check_output_specifier (format, false)
475 || !check_specifier_width (format, v->width, false))
476 error (r, _("%s variable %s has invalid format specifier %s."),
477 v->type == NUMERIC ? _("Numeric") : _("String"),
478 v->name, fmt_to_string (format));
481 static union value parse_value (struct pfm_reader *, struct variable *);
483 /* Read information on all the variables. */
485 read_variables (struct pfm_reader *r, struct dictionary *dict)
487 char *weight_name = NULL;
491 error (r, _("Expected variable count record."));
493 r->var_cnt = read_int (r);
494 if (r->var_cnt <= 0 || r->var_cnt == NOT_INT)
495 error (r, _("Invalid number of variables %d."), r->var_cnt);
496 r->widths = pool_alloc (r->pool, sizeof *r->widths * r->var_cnt);
498 /* Purpose of this value is unknown. It is typically 161. */
503 weight_name = read_pool_string (r);
504 if (strlen (weight_name) > SHORT_NAME_LEN)
505 error (r, _("Weight variable name (%s) truncated."), weight_name);
508 for (i = 0; i < r->var_cnt; i++)
517 error (r, _("Expected variable record."));
519 width = read_int (r);
521 error (r, _("Invalid variable width %d."), width);
522 r->widths[i] = width;
524 read_string (r, name);
525 for (j = 0; j < 6; j++)
526 fmt[j] = read_int (r);
528 if (!var_is_valid_name (name, false) || *name == '#' || *name == '$')
529 error (r, _("position %d: Invalid variable name `%s'."), i, name);
530 str_uppercase (name);
532 if (width < 0 || width > 255)
533 error (r, "Bad width %d for variable %s.", width, name);
535 v = dict_create_var (dict, name, width);
537 error (r, _("Duplicate variable name %s."), name);
539 convert_format (r, &fmt[0], &v->print, v);
540 convert_format (r, &fmt[3], &v->write, v);
542 /* Range missing values. */
545 v->miss_type = MISSING_RANGE;
546 v->missing[0] = parse_value (r, v);
547 v->missing[1] = parse_value (r, v);
549 else if (match (r, 'A'))
551 v->miss_type = MISSING_HIGH;
552 v->missing[0] = parse_value (r, v);
554 else if (match (r, '9'))
556 v->miss_type = MISSING_LOW;
557 v->missing[0] = parse_value (r, v);
560 /* Single missing values. */
561 while (match (r, '8'))
563 static const int map_next[MISSING_COUNT] =
565 MISSING_1, MISSING_2, MISSING_3, -1,
566 MISSING_RANGE_1, MISSING_LOW_1, MISSING_HIGH_1,
570 static const int map_ofs[MISSING_COUNT] =
572 -1, 0, 1, 2, -1, -1, -1, 2, 1, 1,
575 v->miss_type = map_next[v->miss_type];
576 if (v->miss_type == -1)
577 error (r, _("Bad missing values for %s."), v->name);
579 assert (map_ofs[v->miss_type] != -1);
580 v->missing[map_ofs[v->miss_type]] = parse_value (r, v);
586 read_string (r, label);
587 v->label = xstrdup (label);
591 if (weight_name != NULL)
593 struct variable *weight_var = dict_lookup_var (dict, weight_name);
594 if (weight_var == NULL)
595 error (r, _("Weighting variable %s not present in dictionary."),
598 dict_set_weight (dict, weight_var);
602 /* Parse a value for variable VV into value V. */
604 parse_value (struct pfm_reader *r, struct variable *vv)
608 if (vv->type == ALPHA)
611 read_string (r, string);
612 buf_copy_str_rpad (v.s, 8, string);
615 v.f = read_float (r);
620 /* Parse a value label record and return success. */
622 read_value_label (struct pfm_reader *r, struct dictionary *dict)
634 v = pool_alloc (r->pool, sizeof *v * nv);
635 for (i = 0; i < nv; i++)
638 read_string (r, name);
640 v[i] = dict_lookup_var (dict, name);
642 error (r, _("Unknown variable %s while parsing value labels."), name);
644 if (v[0]->width != v[i]->width)
645 error (r, _("Cannot assign value labels to %s and %s, which "
646 "have different variable types or widths."),
647 v[0]->name, v[i]->name);
650 n_labels = read_int (r);
651 for (i = 0; i < n_labels; i++)
657 val = parse_value (r, v[0]);
658 read_string (r, label);
660 /* Assign the value_label's to each variable. */
661 for (j = 0; j < nv; j++)
663 struct variable *var = v[j];
665 if (!val_labs_replace (var->val_labs, val, label))
668 if (var->type == NUMERIC)
669 error (r, _("Duplicate label for value %g for variable %s."),
672 error (r, _("Duplicate label for value `%.*s' for variable %s."),
673 var->width, val.s, var->name);
678 /* Reads one case from portable file R into C. */
680 pfm_read_case (struct pfm_reader *r, struct ccase *c)
685 if (setjmp (r->bail_out))
688 /* Check for end of file. */
693 for (i = 0; i < r->var_cnt; i++)
695 int width = r->widths[i];
699 case_data_rw (c, idx)->f = read_float (r);
705 read_string (r, string);
706 buf_copy_str_rpad (case_data_rw (c, idx)->s, width, string);
707 idx += DIV_RND_UP (width, MAX_SHORT_STRING);