1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
4 Code for parsing floating-point numbers adapted from GNU C
7 This program is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public License as
9 published by the Free Software Foundation; either version 2 of the
10 License, or (at your option) any later version.
12 This program is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
35 #include "dictionary.h"
36 #include "file-handle.h"
44 #include "value-labels.h"
48 #define _(msgid) gettext (msgid)
50 #include "debug-print.h"
52 /* Portable file reader. */
55 struct pool *pool; /* All the portable file state. */
57 jmp_buf bail_out; /* longjmp() target for error handling. */
59 struct file_handle *fh; /* File handle. */
60 FILE *file; /* File stream. */
61 char cc; /* Current character. */
62 char *trans; /* 256-byte character set translation table. */
63 int var_cnt; /* Number of variables. */
64 int weight_index; /* 0-based index of weight variable, or -1. */
65 int *widths; /* Variable widths, 0 for numeric. */
66 int value_cnt; /* Number of `value's per case. */
70 error (struct pfm_reader *r, const char *msg,...)
73 /* Displays MSG as an error message and aborts reading the
74 portable file via longjmp(). */
76 error (struct pfm_reader *r, const char *msg, ...)
84 getl_location (&e.where.filename, &e.where.line_number);
85 filename = handle_get_filename (r->fh);
86 e.title = title = pool_alloc (r->pool, strlen (filename) + 80);
87 sprintf (title, _("portable file %s corrupt at offset %ld: "),
88 filename, ftell (r->file));
91 err_vmsg (&e, msg, args);
94 longjmp (r->bail_out, 1);
97 /* Closes portable file reader R, after we're done with it. */
99 pfm_close_reader (struct pfm_reader *r)
102 pool_destroy (r->pool);
105 /* Read a single character into cur_char. */
107 advance (struct pfm_reader *r)
111 while ((c = getc (r->file)) == '\r' || c == '\n')
114 error (r, _("unexpected end of file"));
116 if (r->trans != NULL)
121 /* Skip a single character if present, and return whether it was
124 match (struct pfm_reader *r, int c)
135 static void read_header (struct pfm_reader *);
136 static void read_version_data (struct pfm_reader *, struct pfm_read_info *);
137 static void read_variables (struct pfm_reader *, struct dictionary *);
138 static void read_value_label (struct pfm_reader *, struct dictionary *);
139 void dump_dictionary (struct dictionary *);
141 /* Reads the dictionary from file with handle H, and returns it in a
142 dictionary structure. This dictionary may be modified in order to
143 rename, reorder, and delete variables, etc. */
145 pfm_open_reader (struct file_handle *fh, struct dictionary **dict,
146 struct pfm_read_info *info)
148 struct pool *volatile pool = NULL;
149 struct pfm_reader *volatile r = NULL;
151 *dict = dict_create ();
152 if (!fh_open (fh, "portable file", "rs"))
155 /* Create and initialize reader. */
156 pool = pool_create ();
157 r = pool_alloc (pool, sizeof *r);
159 if (setjmp (r->bail_out))
162 r->file = pool_fopen (r->pool, handle_get_filename (r->fh), "rb");
163 r->weight_index = -1;
169 /* Check that file open succeeded, prime reading. */
172 msg (ME, _("An error occurred while opening \"%s\" for reading "
173 "as a portable file: %s."),
174 handle_get_filename (r->fh), strerror (errno));
179 /* Read header, version, date info, product id, variables. */
181 read_version_data (r, info);
182 read_variables (r, *dict);
184 /* Read value labels. */
185 while (match (r, 'D'))
186 read_value_label (r, *dict);
188 /* Check that we've made it to the data. */
190 error (r, _("Data record expected."));
195 pfm_close_reader (r);
196 dict_destroy (*dict);
201 /* Returns the value of base-30 digit C,
202 or -1 if C is not a base-30 digit. */
204 base_30_value (unsigned char c)
206 static const char base_30_digits[] = "0123456789ABCDEFGHIJKLMNOPQRST";
207 const char *p = strchr (base_30_digits, c);
208 return p != NULL ? p - base_30_digits : -1;
211 /* Read a floating point value and return its value. */
213 read_float (struct pfm_reader *r)
217 bool got_dot = false; /* Seen a decimal point? */
218 bool got_digit = false; /* Seen any digits? */
219 bool negative = false; /* Number is negative? */
221 /* Skip leading spaces. */
222 while (match (r, ' '))
225 /* `*' indicates system-missing. */
228 advance (r); /* Probably a dot (.) but doesn't appear to matter. */
232 negative = match (r, '-');
235 int digit = base_30_value (r->cc);
240 /* Make sure that multiplication by 30 will not overflow. */
241 if (num > DBL_MAX * (1. / 30.))
242 /* The value of the digit doesn't matter, since we have already
243 gotten as many digits as can be represented in a `double'.
244 This doesn't necessarily mean the result will overflow.
245 The exponent may reduce it to within range.
247 We just need to record that there was another
248 digit so that we can multiply by 10 later. */
251 num = (num * 30.0) + digit;
253 /* Keep track of the number of digits after the decimal point.
254 If we just divided by 30 here, we would lose precision. */
258 else if (!got_dot && r->cc == '.')
259 /* Record that we have found the decimal point. */
262 /* Any other character terminates the number. */
268 /* Check that we had some digits. */
270 error (r, "Number expected.");
272 /* Get exponent if any. */
273 if (r->cc == '+' || r->cc == '-')
276 bool negative_exponent = r->cc == '-';
279 for (advance (r); (digit = base_30_value (r->cc)) != -1; advance (r))
281 if (exp > LONG_MAX / 30)
286 exp = exp * 30 + digit;
289 /* We don't check whether there were actually any digits, but we
291 if (negative_exponent)
296 /* Numbers must end with `/'. */
298 error (r, _("Missing numeric terminator."));
300 /* Multiply `num' by 30 to the `exponent' power, checking for
303 num *= pow (30.0, (double) exponent);
304 else if (exponent > 0)
306 if (num > DBL_MAX * pow (30.0, (double) -exponent))
309 num *= pow (30.0, (double) exponent);
312 return negative ? -num : num;
315 /* Read an integer and return its value. */
317 read_int (struct pfm_reader *r)
319 double f = read_float (r);
320 if (floor (f) != f || f >= INT_MAX || f <= INT_MIN)
321 error (r, _("Invalid integer."));
325 /* Reads a string into BUF, which must have room for 256
328 read_string (struct pfm_reader *r, char *buf)
330 int n = read_int (r);
331 if (n < 0 || n > 255)
332 error (r, _("Bad string length %d."), n);
342 /* Reads a string and returns a copy of it allocated from R's
345 read_pool_string (struct pfm_reader *r)
348 read_string (r, string);
349 return pool_strdup (r->pool, string);
352 /* Reads the 464-byte file header. */
354 read_header (struct pfm_reader *r)
356 /* portable_to_local[PORTABLE] translates the given portable
357 character into the local character set. */
358 static const char portable_to_local[256] =
361 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ."
362 "<(+|&[]!$*);^-/|,%_>?`:$@'=\" ~- 0123456789 -() {}\\ "
369 /* Read and ignore vanity splash strings. */
370 for (i = 0; i < 200; i++)
373 /* Skip the first 64 characters of the translation table.
374 We don't care about these. They are probably all set to
375 '0', marking them as untranslatable, and that would screw
376 up our actual translation of the real '0'. */
377 for (i = 0; i < 64; i++)
380 /* Read the rest of the translation table. */
381 trans = pool_malloc (r->pool, 256);
382 memset (trans, 0, 256);
391 trans[c] = portable_to_local[i];
394 /* Set up the translation table, then read the first
395 translated character. */
399 /* Skip and verify signature. */
400 for (i = 0; i < 8; i++)
401 if (!match (r, "SPSSPORT"[i]))
403 msg (SE, _("%s: Not a portable file."), handle_get_filename (r->fh));
404 longjmp (r->bail_out, 1);
408 /* Reads the version and date info record, as well as product and
409 subproduct identification records if present. */
411 read_version_data (struct pfm_reader *r, struct pfm_read_info *info)
413 static char empty_string[] = "";
414 char *date, *time, *product, *author, *subproduct;
419 error (r, "Unrecognized version code `%c'.", r->cc);
420 date = read_pool_string (r);
421 time = read_pool_string (r);
422 product = match (r, '1') ? read_pool_string (r) : empty_string;
423 author = match (r, '2') ? read_pool_string (r) : empty_string;
424 subproduct = match (r, '3') ? read_pool_string (r) : empty_string;
427 if (strlen (date) != 8)
428 error (r, _("Bad date string length %d."), strlen (date));
429 if (strlen (time) != 6)
430 error (r, _("Bad time string length %d."), strlen (time));
432 /* Save file info. */
436 for (i = 0; i < 8; i++)
438 static const int map[] = {6, 7, 8, 9, 3, 4, 0, 1};
439 info->creation_date[map[i]] = date[i];
441 info->creation_date[2] = info->creation_date[5] = ' ';
442 info->creation_date[10] = 0;
445 for (i = 0; i < 6; i++)
447 static const int map[] = {0, 1, 3, 4, 6, 7};
448 info->creation_time[map[i]] = time[i];
450 info->creation_time[2] = info->creation_time[5] = ' ';
451 info->creation_time[8] = 0;
454 str_copy_trunc (info->product, sizeof info->product, product);
455 str_copy_trunc (info->subproduct, sizeof info->subproduct, subproduct);
459 /* Translates a format specification read from portable file R as
460 the three integers INTS into a normal format specifier FORMAT,
461 checking that the format is appropriate for variable V. */
463 convert_format (struct pfm_reader *r, const int portable_format[3],
464 struct fmt_spec *format, struct variable *v)
466 format->type = translate_fmt (portable_format[0]);
467 if (format->type == -1)
468 error (r, _("%s: Bad format specifier byte (%d)."),
469 v->name, portable_format[0]);
470 format->w = portable_format[1];
471 format->d = portable_format[2];
473 if (!check_output_specifier (format, false)
474 || !check_specifier_width (format, v->width, false))
475 error (r, _("%s variable %s has invalid format specifier %s."),
476 v->type == NUMERIC ? _("Numeric") : _("String"),
477 v->name, fmt_to_string (format));
480 static union value parse_value (struct pfm_reader *, struct variable *);
482 /* Read information on all the variables. */
484 read_variables (struct pfm_reader *r, struct dictionary *dict)
486 char *weight_name = NULL;
490 error (r, _("Expected variable count record."));
492 r->var_cnt = read_int (r);
493 if (r->var_cnt <= 0 || r->var_cnt == NOT_INT)
494 error (r, _("Invalid number of variables %d."), r->var_cnt);
495 r->widths = pool_alloc (r->pool, sizeof *r->widths * r->var_cnt);
497 /* Purpose of this value is unknown. It is typically 161. */
502 weight_name = read_pool_string (r);
503 if (strlen (weight_name) > SHORT_NAME_LEN)
504 error (r, _("Weight variable name (%s) truncated."), weight_name);
507 for (i = 0; i < r->var_cnt; i++)
516 error (r, _("Expected variable record."));
518 width = read_int (r);
520 error (r, _("Invalid variable width %d."), width);
521 r->widths[i] = width;
523 read_string (r, name);
524 for (j = 0; j < 6; j++)
525 fmt[j] = read_int (r);
527 if (!var_is_valid_name (name, false) || *name == '#' || *name == '$')
528 error (r, _("position %d: Invalid variable name `%s'."), i, name);
529 str_uppercase (name);
531 if (width < 0 || width > 255)
532 error (r, "Bad width %d for variable %s.", width, name);
534 v = dict_create_var (dict, name, width);
536 error (r, _("Duplicate variable name %s."), name);
538 convert_format (r, &fmt[0], &v->print, v);
539 convert_format (r, &fmt[3], &v->write, v);
541 /* Range missing values. */
544 double x = read_float (r);
545 double y = read_float (r);
546 mv_add_num_range (&v->miss, x, y);
548 else if (match (r, 'A'))
549 mv_add_num_range (&v->miss, read_float (r), HIGHEST);
550 else if (match (r, '9'))
551 mv_add_num_range (&v->miss, LOWEST, read_float (r));
553 /* Single missing values. */
554 while (match (r, '8'))
556 union value value = parse_value (r, v);
557 mv_add_value (&v->miss, &value);
563 read_string (r, label);
564 v->label = xstrdup (label);
568 if (weight_name != NULL)
570 struct variable *weight_var = dict_lookup_var (dict, weight_name);
571 if (weight_var == NULL)
572 error (r, _("Weighting variable %s not present in dictionary."),
575 dict_set_weight (dict, weight_var);
579 /* Parse a value for variable VV into value V. */
581 parse_value (struct pfm_reader *r, struct variable *vv)
585 if (vv->type == ALPHA)
588 read_string (r, string);
589 buf_copy_str_rpad (v.s, 8, string);
592 v.f = read_float (r);
597 /* Parse a value label record and return success. */
599 read_value_label (struct pfm_reader *r, struct dictionary *dict)
611 v = pool_alloc (r->pool, sizeof *v * nv);
612 for (i = 0; i < nv; i++)
615 read_string (r, name);
617 v[i] = dict_lookup_var (dict, name);
619 error (r, _("Unknown variable %s while parsing value labels."), name);
621 if (v[0]->width != v[i]->width)
622 error (r, _("Cannot assign value labels to %s and %s, which "
623 "have different variable types or widths."),
624 v[0]->name, v[i]->name);
627 n_labels = read_int (r);
628 for (i = 0; i < n_labels; i++)
634 val = parse_value (r, v[0]);
635 read_string (r, label);
637 /* Assign the value_label's to each variable. */
638 for (j = 0; j < nv; j++)
640 struct variable *var = v[j];
642 if (!val_labs_replace (var->val_labs, val, label))
645 if (var->type == NUMERIC)
646 error (r, _("Duplicate label for value %g for variable %s."),
649 error (r, _("Duplicate label for value `%.*s' for variable %s."),
650 var->width, val.s, var->name);
655 /* Reads one case from portable file R into C. */
657 pfm_read_case (struct pfm_reader *r, struct ccase *c)
662 if (setjmp (r->bail_out))
665 /* Check for end of file. */
670 for (i = 0; i < r->var_cnt; i++)
672 int width = r->widths[i];
676 case_data_rw (c, idx)->f = read_float (r);
682 read_string (r, string);
683 buf_copy_str_rpad (case_data_rw (c, idx)->s, width, string);
684 idx += DIV_RND_UP (width, MAX_SHORT_STRING);