X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fsfm-read.c;h=986dede94fd2947a62d8a944d1777cf319c688b3;hb=92fb12eb06716d14c05b781f5d9dcde956d77c30;hp=59fc1840d370c43130549892099333f7dd9bd89e;hpb=4fdeb2145d081ff1b84e3f6c99f9d1c048c0d64a;p=pspp diff --git a/src/sfm-read.c b/src/sfm-read.c index 59fc1840d3..986dede94f 100644 --- a/src/sfm-read.c +++ b/src/sfm-read.c @@ -1,5 +1,5 @@ /* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. Written by Ben Pfaff . This program is free software; you can redistribute it and/or @@ -33,7 +33,7 @@ #include "file-handle.h" #include "filename.h" #include "format.h" -#include "getline.h" +#include "getl.h" #include "hash.h" #include "magic.h" #include "misc.h" @@ -41,6 +41,9 @@ #include "str.h" #include "var.h" +#include "gettext.h" +#define _(msgid) gettext (msgid) + #include "debug-print.h" /* System file reader. */ @@ -59,7 +62,6 @@ struct sfm_reader /* Variables. */ struct sfm_var *vars; /* Variables. */ - size_t var_cnt; /* Number of variables. */ /* File's special constants. */ flt64 sysmis; @@ -87,18 +89,18 @@ struct sfm_var /* Swap bytes *A and *B. */ static inline void -bswap (unsigned char *a, unsigned char *b) +bswap (char *a, char *b) { - unsigned char t = *a; + char t = *a; *a = *b; *b = t; } -/* bswap_int32(): Reverse the byte order of 32-bit integer *X. */ +/* Reverse the byte order of 32-bit integer *X. */ static inline void bswap_int32 (int32 *x_) { - unsigned char *x = (unsigned char *) x_; + char *x = (char *) x_; bswap (x + 0, x + 3); bswap (x + 1, x + 2); } @@ -107,7 +109,7 @@ bswap_int32 (int32 *x_) static inline void bswap_flt64 (flt64 *x_) { - unsigned char *x = (unsigned char *) x_; + char *x = (char *) x_; bswap (x + 0, x + 7); bswap (x + 1, x + 6); bswap (x + 2, x + 5); @@ -141,15 +143,17 @@ sfm_close_reader (struct sfm_reader *r) if (r == NULL) return; + if (r->file) + { + if (fn_close (fh_get_filename (r->fh), r->file) == EOF) + msg (ME, _("%s: Closing system file: %s."), + fh_get_filename (r->fh), strerror (errno)); + r->file = NULL; + } + if (r->fh != NULL) fh_close (r->fh, "system file", "rs"); - if ( r->file ) { - if (fn_close (handle_get_filename (r->fh), r->file) == EOF) - msg (ME, _("%s: Closing system file: %s."), - handle_get_filename (r->fh), strerror (errno)); - r->file = NULL; - } free (r->vars); free (r->buf); free (r); @@ -157,6 +161,8 @@ sfm_close_reader (struct sfm_reader *r) /* Dictionary reader. */ +static void buf_unread(struct sfm_reader *r, size_t byte_cnt); + static void *buf_read (struct sfm_reader *, void *buf, size_t byte_cnt, size_t min_alloc); @@ -202,13 +208,13 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, struct variable **var_by_idx = NULL; *dict = dict_create (); - if (!fh_open (fh, "system file", "rs")) + if (!fh_open (fh, FH_REF_FILE, "system file", "rs")) goto error; /* Create and initialize reader. */ r = xmalloc (sizeof *r); r->fh = fh; - r->file = fn_open (handle_get_filename (fh), "rb"); + r->file = fn_open (fh_get_filename (fh), "rb"); r->reverse_endian = 0; r->fix_specials = 0; @@ -219,7 +225,6 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, r->weight_idx = -1; r->vars = NULL; - r->var_cnt = 0; r->sysmis = -FLT64_MAX; r->highest = FLT64_MAX; @@ -233,7 +238,7 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, { msg (ME, _("An error occurred while opening \"%s\" for reading " "as a system file: %s."), - handle_get_filename (r->fh), strerror (errno)); + fh_get_filename (r->fh), strerror (errno)); err_cond_fail (); goto error; } @@ -242,18 +247,27 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, if (!read_header (r, *dict, info) || !read_variables (r, *dict, &var_by_idx)) goto error; + /* Handle weighting. */ if (r->weight_idx != -1) { - struct variable *weight_var = var_by_idx[r->weight_idx]; + struct variable *weight_var; + + if (r->weight_idx < 0 || r->weight_idx >= r->value_cnt) + lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 " + "and number of elements per case (%d)."), + fh_get_filename (r->fh), r->weight_idx, r->value_cnt)); + + + weight_var = var_by_idx[r->weight_idx]; if (weight_var == NULL) lose ((ME, _("%s: Weighting variable may not be a continuation of " - "a long string variable."), handle_get_filename (fh))); + "a long string variable."), fh_get_filename (fh))); else if (weight_var->type == ALPHA) lose ((ME, _("%s: Weighting variable may not be a string variable."), - handle_get_filename (fh))); + fh_get_filename (fh))); dict_set_weight (*dict, weight_var); } @@ -280,7 +294,7 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 " "records must always immediately follow type 3 " "records."), - handle_get_filename (r->fh))); + fh_get_filename (r->fh))); case 6: if (!read_documents (r, *dict)) @@ -296,6 +310,7 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, int32 count P; } data; + unsigned long bytes; int skip = 0; @@ -306,6 +321,10 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, bswap_int32 (&data.size); bswap_int32 (&data.count); } + bytes = data.size * data.count; + if (bytes < data.size || bytes < data.count) + lose ((ME, "%s: Record type %d subtype %d too large.", + fh_get_filename (r->fh), rec_type, data.subtype)); switch (data.subtype) { @@ -328,15 +347,15 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, { const int n_vars = data.count / 3 ; int i; - if ( data.count % 3 ) + if ( data.count % 3 || n_vars > dict_get_var_cnt(*dict) ) { msg (MW, _("%s: Invalid subrecord length. " "Record: 7; Subrecord: 11"), - handle_get_filename (r->fh)); + fh_get_filename (r->fh)); skip = 1; } - for ( i = 0 ; i < n_vars ; ++i ) + for ( i = 0 ; i < min(n_vars, dict_get_var_cnt(*dict)) ; ++i ) { struct { @@ -361,25 +380,79 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, case 13: /* SPSS 12.0 Long variable name map */ { - - char *s; - char *buf = xmalloc(data.size * data.count + 1); - char *tbuf ; - assertive_buf_read (r, buf, data.size * data.count, 0); - buf[data.size * data.count]='\0'; - - s = strtok_r(buf, "\t", &tbuf); - while ( s ) + char *buf, *short_name, *save_ptr; + int idx; + + /* Read data. */ + buf = xmalloc (bytes + 1); + if (!buf_read (r, buf, bytes, 0)) + { + free (buf); + goto error; + } + buf[bytes] = '\0'; + + /* Parse data. */ + for (short_name = strtok_r (buf, "=", &save_ptr), idx = 0; + short_name != NULL; + short_name = strtok_r (NULL, "=", &save_ptr), idx++) { - char *shortname, *longname; - shortname = strsep(&s,"="); - longname = strsep(&s,"="); - - dict_add_longvar_entry(*dict, shortname, longname); - - s = strtok_r(0,"\t", &tbuf); + char *long_name = strtok_r (NULL, "\t", &save_ptr); + struct variable *v; + + /* Validate long name. */ + if (long_name == NULL) + { + msg (MW, _("%s: Trailing garbage in long variable " + "name map."), + fh_get_filename (r->fh)); + break; + } + if (!var_is_valid_name (long_name, false)) + { + msg (MW, _("%s: Long variable mapping to invalid " + "variable name `%s'."), + fh_get_filename (r->fh), long_name); + break; + } + + /* Find variable using short name. */ + v = dict_lookup_var (*dict, short_name); + if (v == NULL) + { + msg (MW, _("%s: Long variable mapping for " + "nonexistent variable %s."), + fh_get_filename (r->fh), short_name); + break; + } + + /* Identify any duplicates. */ + if ( compare_var_names(short_name, long_name, 0) && + NULL != dict_lookup_var (*dict, long_name)) + { + lose ((ME, _("%s: Duplicate long variable name `%s' " + "within system file."), + fh_get_filename (r->fh), long_name)); + break; + } + + /* Set long name. + Renaming a variable may clear the short + name, but we want to retain it, so + re-set it explicitly. */ + dict_rename_var (*dict, v, long_name); + var_set_short_name (v, short_name); + + /* For compatability, make sure dictionary + is in long variable name map order. In + the common case, this has no effect, + because the dictionary and the long + variable name map are already in the + same order. */ + dict_reorder_var (*dict, v, idx); } - + + /* Free data. */ free (buf); } break; @@ -387,7 +460,7 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, default: msg (MW, _("%s: Unrecognized record type 7, subtype %d " "encountered in system file."), - handle_get_filename (r->fh), data.subtype); + fh_get_filename (r->fh), data.subtype); skip = 1; } @@ -410,8 +483,8 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, } default: - lose ((ME, _("%s: Unrecognized record type %d."), - handle_get_filename (r->fh), rec_type)); + corrupt_msg(MW, _("%s: Unrecognized record type %d."), + fh_get_filename (r->fh), rec_type); } } @@ -444,7 +517,7 @@ read_machine_int32_info (struct sfm_reader *r, int size, int count) if (size != sizeof (int32) || count != 8) lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, " "subtype 3. Expected size %d, count 8."), - handle_get_filename (r->fh), size, count, sizeof (int32))); + fh_get_filename (r->fh), size, count, sizeof (int32))); assertive_buf_read (r, data, sizeof data, 0); if (r->reverse_endian) @@ -456,7 +529,7 @@ read_machine_int32_info (struct sfm_reader *r, int size, int count) lose ((ME, _("%s: Floating-point representation in system file is not " "IEEE-754. PSPP cannot convert between floating-point " "formats."), - handle_get_filename (r->fh))); + fh_get_filename (r->fh))); #else #error Add support for your floating-point format. #endif @@ -471,7 +544,7 @@ read_machine_int32_info (struct sfm_reader *r, int size, int count) if (file_bigendian ^ (data[6] == 1)) lose ((ME, _("%s: File-indicated endianness (%s) does not match " "endianness intuited from file header (%s)."), - handle_get_filename (r->fh), + fh_get_filename (r->fh), file_bigendian ? _("big-endian") : _("little-endian"), data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian") : _("unknown")))); @@ -480,7 +553,7 @@ read_machine_int32_info (struct sfm_reader *r, int size, int count) if (data[7] != 2 && data[7] != 3) lose ((ME, _("%s: File-indicated character representation code (%s) is " "not ASCII."), - handle_get_filename (r->fh), + fh_get_filename (r->fh), (data[7] == 1 ? "EBCDIC" : (data[7] == 4 ? _("DEC Kanji") : _("Unknown"))))); @@ -500,7 +573,7 @@ read_machine_flt64_info (struct sfm_reader *r, int size, int count) if (size != sizeof (flt64) || count != 3) lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, " "subtype 4. Expected size %d, count 8."), - handle_get_filename (r->fh), size, count, sizeof (flt64))); + fh_get_filename (r->fh), size, count, sizeof (flt64))); assertive_buf_read (r, data, sizeof data, 0); if (r->reverse_endian) @@ -517,7 +590,7 @@ read_machine_flt64_info (struct sfm_reader *r, int size, int count) "for at least one of the three system values. SYSMIS: " "indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: " "%g, %g."), - handle_get_filename (r->fh), (double) data[0], (double) SYSMIS, + fh_get_filename (r->fh), (double) data[0], (double) SYSMIS, (double) data[1], (double) FLT64_MAX, (double) data[2], (double) second_lowest_flt64); } @@ -542,7 +615,7 @@ read_header (struct sfm_reader *r, if (strncmp ("$FL2", hdr.rec_type, 4) != 0) lose ((ME, _("%s: Bad magic. Proper system files begin with " "the four characters `$FL2'. This file will not be read."), - handle_get_filename (r->fh))); + fh_get_filename (r->fh))); /* Check eye-catcher string. */ memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name); @@ -584,7 +657,7 @@ read_header (struct sfm_reader *r, if (hdr.layout_code != 2) lose ((ME, _("%s: File layout code has unexpected value %d. Value " "should be 2, in big-endian or little-endian format."), - handle_get_filename (r->fh), hdr.layout_code)); + fh_get_filename (r->fh), hdr.layout_code)); r->reverse_endian = 1; bswap_int32 (&hdr.case_size); @@ -594,34 +667,30 @@ read_header (struct sfm_reader *r, bswap_flt64 (&hdr.bias); } + /* Copy basic info and verify correctness. */ r->value_cnt = hdr.case_size; - if (r->value_cnt <= 0 - || r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2)) - lose ((ME, _("%s: Number of elements per case (%d) is not between 1 " - "and %d."), - handle_get_filename (r->fh), r->value_cnt, - INT_MAX / sizeof (union value) / 2)); + + /* If value count is rediculous, then force it to -1 (a sentinel value) */ + if ( r->value_cnt < 0 || + r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2)) + r->value_cnt = -1; r->compressed = hdr.compress; r->weight_idx = hdr.weight_idx - 1; - if (hdr.weight_idx < 0 || hdr.weight_idx > r->value_cnt) - lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 " - "and number of elements per case (%d)."), - handle_get_filename (r->fh), hdr.weight_idx, r->value_cnt)); r->case_cnt = hdr.case_cnt; if (r->case_cnt < -1 || r->case_cnt > INT_MAX / 2) lose ((ME, _("%s: Number of cases in file (%ld) is not between -1 and %d."), - handle_get_filename (r->fh), (long) r->case_cnt, INT_MAX / 2)); + fh_get_filename (r->fh), (long) r->case_cnt, INT_MAX / 2)); r->bias = hdr.bias; if (r->bias != 100.0) corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual " "value of 100."), - handle_get_filename (r->fh), r->bias); + fh_get_filename (r->fh), r->bias); /* Make a file label only on the condition that the given label is not all spaces or nulls. */ @@ -685,22 +754,31 @@ read_variables (struct sfm_reader *r, int long_string_count = 0; /* # of long string continuation records still expected. */ int next_value = 0; /* Index to next `value' structure. */ - size_t var_cap = 0; assert(r); - /* Allocate variables. */ - *var_by_idx = xmalloc (sizeof **var_by_idx * r->value_cnt); + *var_by_idx = 0; + + /* Pre-allocate variables. */ + if (r->value_cnt != -1) + { + *var_by_idx = xnmalloc (r->value_cnt, sizeof **var_by_idx); + r->vars = xnmalloc (r->value_cnt, sizeof *r->vars); + } + /* Read in the entry for each variable and use the info to initialize the dictionary. */ - for (i = 0; i < r->value_cnt; i++) + for (i = 0; ; ++i) { struct variable *vv; - char name[9]; + char name[SHORT_NAME_LEN + 1]; int nv; int j; + if ( r->value_cnt != -1 && i >= r->value_cnt ) + break; + assertive_buf_read (r, &sv, sizeof sv, 0); if (r->reverse_endian) @@ -713,10 +791,19 @@ read_variables (struct sfm_reader *r, bswap_int32 (&sv.write); } + /* We've come to the end of the variable entries */ if (sv.rec_type != 2) - lose ((ME, _("%s: position %d: Bad record type (%d); " - "the expected value was 2."), - handle_get_filename (r->fh), i, sv.rec_type)); + { + buf_unread(r, sizeof sv); + r->value_cnt = i; + break; + } + + if ( -1 == r->value_cnt ) + { + *var_by_idx = xnrealloc (*var_by_idx, i + 1, sizeof **var_by_idx); + r->vars = xnrealloc (r->vars, i + 1, sizeof *r->vars); + } /* If there was a long string previously, make sure that the continuations are present; otherwise make sure there aren't @@ -726,8 +813,10 @@ read_variables (struct sfm_reader *r, if (sv.type != -1) lose ((ME, _("%s: position %d: String variable does not have " "proper number of continuation records."), - handle_get_filename (r->fh), i)); + fh_get_filename (r->fh), i)); + + r->vars[i].width = -1; (*var_by_idx)[i] = NULL; long_string_count--; continue; @@ -735,35 +824,35 @@ read_variables (struct sfm_reader *r, else if (sv.type == -1) lose ((ME, _("%s: position %d: Superfluous long string continuation " "record."), - handle_get_filename (r->fh), i)); + fh_get_filename (r->fh), i)); /* Check fields for validity. */ if (sv.type < 0 || sv.type > 255) lose ((ME, _("%s: position %d: Bad variable type code %d."), - handle_get_filename (r->fh), i, sv.type)); + fh_get_filename (r->fh), i, sv.type)); if (sv.has_var_label != 0 && sv.has_var_label != 1) lose ((ME, _("%s: position %d: Variable label indicator field is not " - "0 or 1."), handle_get_filename (r->fh), i)); + "0 or 1."), fh_get_filename (r->fh), i)); if (sv.n_missing_values < -3 || sv.n_missing_values > 3 || sv.n_missing_values == -1) lose ((ME, _("%s: position %d: Missing value indicator field is not " - "-3, -2, 0, 1, 2, or 3."), handle_get_filename (r->fh), i)); + "-3, -2, 0, 1, 2, or 3."), fh_get_filename (r->fh), i)); /* Copy first character of variable name. */ if (!isalpha ((unsigned char) sv.name[0]) && sv.name[0] != '@' && sv.name[0] != '#') lose ((ME, _("%s: position %d: Variable name begins with invalid " "character."), - handle_get_filename (r->fh), i)); + fh_get_filename (r->fh), i)); if (islower ((unsigned char) sv.name[0])) msg (MW, _("%s: position %d: Variable name begins with lowercase letter " "%c."), - handle_get_filename (r->fh), i, sv.name[0]); + fh_get_filename (r->fh), i, sv.name[0]); if (sv.name[0] == '#') msg (MW, _("%s: position %d: Variable name begins with octothorpe " "(`#'). Scratch variables should not appear in system " "files."), - handle_get_filename (r->fh), i); + fh_get_filename (r->fh), i); name[0] = toupper ((unsigned char) (sv.name[0])); /* Copy remaining characters of variable name. */ @@ -777,7 +866,7 @@ read_variables (struct sfm_reader *r, { msg (MW, _("%s: position %d: Variable name character %d is " "lowercase letter %c."), - handle_get_filename (r->fh), i, j + 1, sv.name[j]); + fh_get_filename (r->fh), i, j + 1, sv.name[j]); name[j] = toupper ((unsigned char) (c)); } else if (isalnum (c) || c == '.' || c == '@' @@ -786,15 +875,22 @@ read_variables (struct sfm_reader *r, else lose ((ME, _("%s: position %d: character `\\%03o' (%c) is not valid in a " "variable name."), - handle_get_filename (r->fh), i, c, c)); + fh_get_filename (r->fh), i, c, c)); } name[j] = 0; + if ( ! var_is_valid_name(name, false) ) + lose ((ME, _("%s: Invalid variable name `%s' within system file."), + fh_get_filename (r->fh), name)); + /* Create variable. */ - vv = (*var_by_idx)[i] = dict_create_var_from_short (dict, name, sv.type); + + vv = (*var_by_idx)[i] = dict_create_var (dict, name, sv.type); if (vv == NULL) lose ((ME, _("%s: Duplicate variable name `%s' within system file."), - handle_get_filename (r->fh), name)); + fh_get_filename (r->fh), name)); + + var_set_short_name (vv, vv->name); /* Case reading data. */ nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64)); @@ -816,7 +912,7 @@ read_variables (struct sfm_reader *r, if (len < 0 || len > 255) lose ((ME, _("%s: Variable %s indicates variable label of invalid " "length %d."), - handle_get_filename (r->fh), vv->name, len)); + fh_get_filename (r->fh), vv->name, len)); if ( len != 0 ) { @@ -832,88 +928,66 @@ read_variables (struct sfm_reader *r, if (sv.n_missing_values != 0) { flt64 mv[3]; + int mv_cnt = abs (sv.n_missing_values); if (vv->width > MAX_SHORT_STRING) lose ((ME, _("%s: Long string variable %s may not have missing " "values."), - handle_get_filename (r->fh), vv->name)); + fh_get_filename (r->fh), vv->name)); - assertive_buf_read (r, mv, sizeof *mv * abs (sv.n_missing_values), 0); + assertive_buf_read (r, mv, sizeof *mv * mv_cnt, 0); if (r->reverse_endian && vv->type == NUMERIC) - for (j = 0; j < abs (sv.n_missing_values); j++) + for (j = 0; j < mv_cnt; j++) bswap_flt64 (&mv[j]); if (sv.n_missing_values > 0) { - vv->miss_type = sv.n_missing_values; - if (vv->type == NUMERIC) - for (j = 0; j < sv.n_missing_values; j++) - vv->missing[j].f = mv[j]; - else - for (j = 0; j < sv.n_missing_values; j++) - memcpy (vv->missing[j].s, &mv[j], vv->width); + for (j = 0; j < sv.n_missing_values; j++) + if (vv->type == NUMERIC) + mv_add_num (&vv->miss, mv[j]); + else + mv_add_str (&vv->miss, (char *) &mv[j]); } else { - int x = 0; - if (vv->type == ALPHA) lose ((ME, _("%s: String variable %s may not have missing " "values specified as a range."), - handle_get_filename (r->fh), vv->name)); + fh_get_filename (r->fh), vv->name)); if (mv[0] == r->lowest) - { - vv->miss_type = MISSING_LOW; - vv->missing[x++].f = mv[1]; - } + mv_add_num_range (&vv->miss, LOWEST, mv[1]); else if (mv[1] == r->highest) - { - vv->miss_type = MISSING_HIGH; - vv->missing[x++].f = mv[0]; - } + mv_add_num_range (&vv->miss, mv[0], HIGHEST); else - { - vv->miss_type = MISSING_RANGE; - vv->missing[x++].f = mv[0]; - vv->missing[x++].f = mv[1]; - } + mv_add_num_range (&vv->miss, mv[0], mv[1]); if (sv.n_missing_values == -3) - { - vv->miss_type += 3; - vv->missing[x++].f = mv[2]; - } + mv_add_num (&vv->miss, mv[2]); } } - else - vv->miss_type = MISSING_NONE; if (!parse_format_spec (r, sv.print, &vv->print, vv) || !parse_format_spec (r, sv.write, &vv->write, vv)) goto error; - /* Add variable to list. */ - if (var_cap >= r->var_cnt) - { - var_cap = 2 + r->var_cnt * 2; - r->vars = xrealloc (r->vars, var_cap * sizeof *r->vars); - } - r->vars[r->var_cnt].width = vv->width; - r->vars[r->var_cnt].fv = vv->fv; - r->var_cnt++; + r->vars[i].width = vv->width; + r->vars[i].fv = vv->fv; + } /* Some consistency checks. */ if (long_string_count != 0) lose ((ME, _("%s: Long string continuation records omitted at end of " "dictionary."), - handle_get_filename (r->fh))); + fh_get_filename (r->fh))); + if (next_value != r->value_cnt) - lose ((ME, _("%s: System file header indicates %d variable positions but " + corrupt_msg(MW, _("%s: System file header indicates %d variable positions but " "%d were read from file."), - handle_get_filename (r->fh), r->value_cnt, next_value)); + fh_get_filename (r->fh), r->value_cnt, next_value); + return 1; @@ -924,27 +998,32 @@ error: /* Translates the format spec from sysfile format to internal format. */ static int -parse_format_spec (struct sfm_reader *r, int32 s, struct fmt_spec *v, struct variable *vv) +parse_format_spec (struct sfm_reader *r, int32 s, + struct fmt_spec *f, struct variable *v) { - v->type = translate_fmt ((s >> 16) & 0xff); - if (v->type == -1) + f->type = translate_fmt ((s >> 16) & 0xff); + if (f->type == -1) lose ((ME, _("%s: Bad format specifier byte (%d)."), - handle_get_filename (r->fh), (s >> 16) & 0xff)); - v->w = (s >> 8) & 0xff; - v->d = s & 0xff; - - /* FIXME? Should verify the resulting specifier more thoroughly. */ + fh_get_filename (r->fh), (s >> 16) & 0xff)); + f->w = (s >> 8) & 0xff; + f->d = s & 0xff; - if (v->type == -1) - lose ((ME, _("%s: Bad format specifier byte (%d)."), - handle_get_filename (r->fh), (s >> 16) & 0xff)); - if ((vv->type == ALPHA) ^ ((formats[v->type].cat & FCAT_STRING) != 0)) + if ((v->type == ALPHA) ^ ((formats[f->type].cat & FCAT_STRING) != 0)) lose ((ME, _("%s: %s variable %s has %s format specifier %s."), - handle_get_filename (r->fh), - vv->type == ALPHA ? _("String") : _("Numeric"), - vv->name, - formats[v->type].cat & FCAT_STRING ? _("string") : _("numeric"), - formats[v->type].name)); + fh_get_filename (r->fh), + v->type == ALPHA ? _("String") : _("Numeric"), + v->name, + formats[f->type].cat & FCAT_STRING ? _("string") : _("numeric"), + formats[f->type].name)); + + if (!check_output_specifier (f, false) + || !check_specifier_width (f, v->width, false)) + { + msg (ME, _("%s variable %s has invalid format specifier %s."), + v->type == NUMERIC ? _("Numeric") : _("String"), + v->name, fmt_to_string (f)); + *f = v->type == NUMERIC ? f8_2 : make_output_format (FMT_A, v->width, 0); + } return 1; error: @@ -959,7 +1038,7 @@ read_value_labels (struct sfm_reader *r, { struct label { - unsigned char raw_value[8]; /* Value as uninterpreted bytes. */ + char raw_value[8]; /* Value as uninterpreted bytes. */ union value value; /* Value. */ char *label; /* Null-terminated label string. */ }; @@ -981,8 +1060,15 @@ read_value_labels (struct sfm_reader *r, if (r->reverse_endian) bswap_int32 (&n_labels); + if ( n_labels >= ((int32) ~0) / sizeof *labels) + { + corrupt_msg(MW, _("%s: Invalid number of labels: %d. Ignoring labels."), + fh_get_filename (r->fh), n_labels); + n_labels = 0; + } + /* Allocate memory. */ - labels = xmalloc (n_labels * sizeof *labels); + labels = xcalloc (n_labels, sizeof *labels); for (i = 0; i < n_labels; i++) labels[i].label = NULL; @@ -1020,7 +1106,7 @@ read_value_labels (struct sfm_reader *r, if (rec_type != 4) lose ((ME, _("%s: Variable index record (type 4) does not immediately " "follow value label record (type 3) as it should."), - handle_get_filename (r->fh))); + fh_get_filename (r->fh))); } /* Read number of variables associated with value label from type 4 @@ -1031,10 +1117,10 @@ read_value_labels (struct sfm_reader *r, if (n_vars < 1 || n_vars > dict_get_var_cnt (dict)) lose ((ME, _("%s: Number of variables associated with a value label (%d) " "is not between 1 and the number of variables (%d)."), - handle_get_filename (r->fh), n_vars, dict_get_var_cnt (dict))); + fh_get_filename (r->fh), n_vars, dict_get_var_cnt (dict))); /* Read the list of variables. */ - var = xmalloc (n_vars * sizeof *var); + var = xnmalloc (n_vars, sizeof *var); for (i = 0; i < n_vars; i++) { int32 var_idx; @@ -1047,7 +1133,7 @@ read_value_labels (struct sfm_reader *r, if (var_idx < 1 || var_idx > r->value_cnt) lose ((ME, _("%s: Variable index associated with value label (%d) is " "not between 1 and the number of values (%d)."), - handle_get_filename (r->fh), var_idx, r->value_cnt)); + fh_get_filename (r->fh), var_idx, r->value_cnt)); /* Make sure it's a real variable. */ v = var_by_idx[var_idx - 1]; @@ -1055,11 +1141,11 @@ read_value_labels (struct sfm_reader *r, lose ((ME, _("%s: Variable index associated with value label (%d) " "refers to a continuation of a string variable, not to " "an actual variable."), - handle_get_filename (r->fh), var_idx)); + fh_get_filename (r->fh), var_idx)); if (v->type == ALPHA && v->width > MAX_SHORT_STRING) lose ((ME, _("%s: Value labels are not allowed on long string " "variables (%s)."), - handle_get_filename (r->fh), v->name)); + fh_get_filename (r->fh), v->name)); /* Add it to the list of variables. */ var[i] = v; @@ -1071,7 +1157,7 @@ read_value_labels (struct sfm_reader *r, lose ((ME, _("%s: Variables associated with value label are not all of " "identical type. Variable %s has %s type, but variable " "%s has %s type."), - handle_get_filename (r->fh), + fh_get_filename (r->fh), var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"), var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric"))); @@ -1082,8 +1168,8 @@ read_value_labels (struct sfm_reader *r, if (var[0]->type == ALPHA) { - const int copy_len = min (sizeof (label->raw_value), - sizeof (label->label)); + const int copy_len = min (sizeof label->raw_value, + sizeof label->label); memcpy (label->value.s, label->raw_value, copy_len); } else { flt64 f; @@ -1111,11 +1197,11 @@ read_value_labels (struct sfm_reader *r, if (var[0]->type == NUMERIC) msg (MW, _("%s: File contains duplicate label for value %g for " "variable %s."), - handle_get_filename (r->fh), label->value.f, v->name); + fh_get_filename (r->fh), label->value.f, v->name); else msg (MW, _("%s: File contains duplicate label for value `%.*s' " "for variable %s."), - handle_get_filename (r->fh), v->width, label->value.s, v->name); + fh_get_filename (r->fh), v->width, label->value.s, v->name); } } @@ -1156,15 +1242,28 @@ buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc) { if (ferror (r->file)) msg (ME, _("%s: Reading system file: %s."), - handle_get_filename (r->fh), strerror (errno)); + fh_get_filename (r->fh), strerror (errno)); else corrupt_msg (ME, _("%s: Unexpected end of file."), - handle_get_filename (r->fh)); + fh_get_filename (r->fh)); return NULL; } return buf; } +/* Winds the reader BYTE_CNT bytes back in the reader stream. */ +void +buf_unread(struct sfm_reader *r, size_t byte_cnt) +{ + assert(byte_cnt > 0); + + if ( 0 != fseek(r->file, -byte_cnt, SEEK_CUR)) + { + msg (ME, _("%s: Seeking system file: %s."), + fh_get_filename (r->fh), strerror (errno)); + } +} + /* Reads a document record, type 6, from system file R, and sets up the documents and n_documents fields in the associated dictionary. */ @@ -1177,13 +1276,13 @@ read_documents (struct sfm_reader *r, struct dictionary *dict) if (dict_get_documents (dict) != NULL) lose ((ME, _("%s: System file contains multiple " "type 6 (document) records."), - handle_get_filename (r->fh))); + fh_get_filename (r->fh))); assertive_buf_read (r, &line_cnt, sizeof line_cnt, 0); if (line_cnt <= 0) lose ((ME, _("%s: Number of document lines (%ld) " "must be greater than 0."), - handle_get_filename (r->fh), (long) line_cnt)); + fh_get_filename (r->fh), (long) line_cnt)); documents = buf_read (r, NULL, 80 * line_cnt, line_cnt * 80 + 1); /* FIXME? Run through asciify. */ @@ -1209,12 +1308,12 @@ buffer_input (struct sfm_reader *r) size_t amt; if (r->buf == NULL) - r->buf = xmalloc (sizeof *r->buf * 128); + r->buf = xnmalloc (128, sizeof *r->buf); amt = fread (r->buf, sizeof *r->buf, 128, r->file); if (ferror (r->file)) { msg (ME, _("%s: Error reading file: %s."), - handle_get_filename (r->fh), strerror (errno)); + fh_get_filename (r->fh), strerror (errno)); return 0; } r->ptr = r->buf; @@ -1241,7 +1340,7 @@ read_compressed_data (struct sfm_reader *r, flt64 *buf) for (;;) { - for (; p < p_end; p++) + for (; p < p_end; p++){ switch (*p) { case 0: @@ -1252,7 +1351,7 @@ read_compressed_data (struct sfm_reader *r, flt64 *buf) if (buf_beg != buf) lose ((ME, _("%s: Compressed data is corrupted. Data ends " "in partial case."), - handle_get_filename (r->fh))); + fh_get_filename (r->fh))); goto error; case 253: /* Code 253 indicates that the value is stored explicitly @@ -1261,7 +1360,7 @@ read_compressed_data (struct sfm_reader *r, flt64 *buf) if (!buffer_input (r)) { lose ((ME, _("%s: Unexpected end of file."), - handle_get_filename (r->fh))); + fh_get_filename (r->fh))); goto error; } memcpy (buf++, r->ptr++, sizeof *buf); @@ -1295,7 +1394,7 @@ read_compressed_data (struct sfm_reader *r, flt64 *buf) goto success; break; } - + } /* We have reached the end of this instruction octet. Read another. */ if (r->ptr == NULL || r->ptr >= r->end) @@ -1303,7 +1402,7 @@ read_compressed_data (struct sfm_reader *r, flt64 *buf) { if (buf_beg != buf) lose ((ME, _("%s: Unexpected end of file."), - handle_get_filename (r->fh))); + fh_get_filename (r->fh))); goto error; } memcpy (r->x, r->ptr++, sizeof *buf); @@ -1346,7 +1445,7 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c) { int i; - for (i = 0; i < r->var_cnt; i++) + for (i = 0; i < r->value_cnt; i++) if (r->vars[i].width == 0) bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f); } @@ -1358,7 +1457,7 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c) { int i; - for (i = 0; i < r->var_cnt; i++) + for (i = 0; i < r->value_cnt; i++) if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis) case_data_rw (c, r->vars[i].fv)->f = SYSMIS; } @@ -1386,7 +1485,7 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c) return 0; } - for (i = 0; i < r->var_cnt; i++) + for (i = 0; i < r->value_cnt; i++) { struct sfm_var *v = &r->vars[i]; @@ -1395,9 +1494,9 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c) flt64 f = *bounce_cur++; if (r->reverse_endian) bswap_flt64 (&f); - case_data_rw (c, i)->f = f == r->sysmis ? SYSMIS : f; + case_data_rw (c, v->fv)->f = f == r->sysmis ? SYSMIS : f; } - else + else if (v->width != -1) { memcpy (case_data_rw (c, v->fv)->s, bounce_cur, v->width); bounce_cur += DIV_RND_UP (v->width, sizeof (flt64)); @@ -1420,10 +1519,24 @@ fread_ok (struct sfm_reader *r, void *buffer, size_t byte_cnt) { if (ferror (r->file)) msg (ME, _("%s: Reading system file: %s."), - handle_get_filename (r->fh), strerror (errno)); + fh_get_filename (r->fh), strerror (errno)); else if (read_bytes != 0) msg (ME, _("%s: Partial record at end of system file."), - handle_get_filename (r->fh)); + fh_get_filename (r->fh)); return 0; } } + +/* Returns true if FILE is an SPSS system file, + false otherwise. */ +bool +sfm_detect (FILE *file) +{ + struct sysfile_header hdr; + + if (fread (&hdr, sizeof hdr, 1, file) != 1) + return false; + if (strncmp ("$FL2", hdr.rec_type, 4)) + return false; + return true; +}