X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fsfm-read.c;h=12df275e7f557cc7824709cf0b4a278db8c3fb5f;hb=6cec94f8545d5895b5bdfa73798b386b453ffa18;hp=40a366cf1e9cafa1cc408e0f6b2de035ea792eb6;hpb=e48df05eeeb85838526a03a3371964f5f6b14321;p=pspp diff --git a/src/sfm-read.c b/src/sfm-read.c index 40a366cf1e..12df275e7f 100644 --- a/src/sfm-read.c +++ b/src/sfm-read.c @@ -14,8 +14,8 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. */ + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ #include #include "sfm-read.h" @@ -59,7 +59,6 @@ struct sfm_reader /* Variables. */ struct sfm_var *vars; /* Variables. */ - size_t var_cnt; /* Number of variables. */ /* File's special constants. */ flt64 sysmis; @@ -122,26 +121,16 @@ corrupt_msg (int class, const char *format,...) static void corrupt_msg (int class, const char *format,...) { - char buf[1024]; - - { - va_list args; - - va_start (args, format); - vsnprintf (buf, 1024, format, args); - va_end (args); - } - - { - struct error e; + struct error e; + va_list args; - e.class = class; - getl_location (&e.where.filename, &e.where.line_number); - e.title = _("corrupt system file: "); - e.text = buf; + e.class = class; + getl_location (&e.where.filename, &e.where.line_number); + e.title = _("corrupt system file: "); - err_vmsg (&e); - } + va_start (args, format); + err_vmsg (&e, format, args); + va_end (args); } /* Closes a system file after we're done with it. */ @@ -167,6 +156,8 @@ sfm_close_reader (struct sfm_reader *r) /* Dictionary reader. */ +static void buf_unread(struct sfm_reader *r, size_t byte_cnt); + static void *buf_read (struct sfm_reader *, void *buf, size_t byte_cnt, size_t min_alloc); @@ -229,7 +220,6 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, r->weight_idx = -1; r->vars = NULL; - r->var_cnt = 0; r->sysmis = -FLT64_MAX; r->highest = FLT64_MAX; @@ -252,10 +242,19 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, if (!read_header (r, *dict, info) || !read_variables (r, *dict, &var_by_idx)) goto error; + /* Handle weighting. */ if (r->weight_idx != -1) { - struct variable *weight_var = var_by_idx[r->weight_idx]; + struct variable *weight_var; + + if (r->weight_idx < 0 || r->weight_idx >= r->value_cnt) + lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 " + "and number of elements per case (%d)."), + handle_get_filename (r->fh), r->weight_idx, r->value_cnt)); + + + weight_var = var_by_idx[r->weight_idx]; if (weight_var == NULL) lose ((ME, @@ -306,6 +305,7 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, int32 count P; } data; + unsigned long bytes; int skip = 0; @@ -316,6 +316,10 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, bswap_int32 (&data.size); bswap_int32 (&data.count); } + bytes = data.size * data.count; + if (bytes < data.size || bytes < data.count) + lose ((ME, "%s: Record type %d subtype %d too large.", + handle_get_filename (r->fh), rec_type, data.subtype)); switch (data.subtype) { @@ -330,10 +334,113 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, break; case 5: - case 6: - case 11: /* ?? Used by SPSS 8.0. */ + case 6: /* ?? Used by SPSS 8.0. */ skip = 1; break; + + case 11: /* Variable display parameters */ + { + const int n_vars = data.count / 3 ; + int i; + if ( data.count % 3 ) + { + msg (MW, _("%s: Invalid subrecord length. " + "Record: 7; Subrecord: 11"), + handle_get_filename (r->fh)); + skip = 1; + } + + for ( i = 0 ; i < n_vars ; ++i ) + { + struct + { + int32 measure P; + int32 width P; + int32 align P; + } + params; + + struct variable *v; + + assertive_buf_read (r, ¶ms, sizeof(params), 0); + + v = dict_get_var(*dict, i); + + v->measure = params.measure; + v->display_width = params.width; + v->alignment = params.align; + } + } + break; + + case 13: /* SPSS 12.0 Long variable name map */ + { + char *buf, *short_name, *save_ptr; + int idx; + + /* Read data. */ + buf = xmalloc (bytes + 1); + if (!buf_read (r, buf, bytes, 0)) + { + free (buf); + goto error; + } + buf[bytes] = '\0'; + + /* Parse data. */ + for (short_name = strtok_r (buf, "=", &save_ptr), idx = 0; + short_name != NULL; + short_name = strtok_r (NULL, "=", &save_ptr), idx++) + { + char *long_name = strtok_r (NULL, "\t", &save_ptr); + struct variable *v; + + /* Validate long name. */ + if (long_name == NULL) + { + msg (MW, _("%s: Trailing garbage in long variable " + "name map."), + handle_get_filename (r->fh)); + break; + } + if (!var_is_valid_name (long_name, false)) + { + msg (MW, _("%s: Long variable mapping to invalid " + "variable name `%s'."), + handle_get_filename (r->fh), long_name); + break; + } + + /* Find variable using short name. */ + v = dict_lookup_var (*dict, short_name); + if (v == NULL) + { + msg (MW, _("%s: Long variable mapping for " + "nonexistent variable %s."), + handle_get_filename (r->fh), short_name); + break; + } + + /* Set long name. + Renaming a variable may clear the short + name, but we want to retain it, so + re-set it explicitly. */ + dict_rename_var (*dict, v, long_name); + var_set_short_name (v, short_name); + + /* For compatability, make sure dictionary + is in long variable name map order. In + the common case, this has no effect, + because the dictionary and the long + variable name map are already in the + same order. */ + dict_reorder_var (*dict, v, idx); + } + + /* Free data. */ + free (buf); + } + break; default: msg (MW, _("%s: Unrecognized record type 7, subtype %d " @@ -361,8 +468,8 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, } default: - lose ((ME, _("%s: Unrecognized record type %d."), - handle_get_filename (r->fh), rec_type)); + corrupt_msg(MW, _("%s: Unrecognized record type %d."), + handle_get_filename (r->fh), rec_type); } } @@ -545,22 +652,18 @@ read_header (struct sfm_reader *r, bswap_flt64 (&hdr.bias); } + /* Copy basic info and verify correctness. */ r->value_cnt = hdr.case_size; - if (r->value_cnt <= 0 - || r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2)) - lose ((ME, _("%s: Number of elements per case (%d) is not between 1 " - "and %d."), - handle_get_filename (r->fh), r->value_cnt, - INT_MAX / sizeof (union value) / 2)); + + /* If value count is rediculous, then force it to -1 (a sentinel value) */ + if ( r->value_cnt < 0 || + r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2)) + r->value_cnt = -1; r->compressed = hdr.compress; r->weight_idx = hdr.weight_idx - 1; - if (hdr.weight_idx < 0 || hdr.weight_idx > r->value_cnt) - lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 " - "and number of elements per case (%d)."), - handle_get_filename (r->fh), hdr.weight_idx, r->value_cnt)); r->case_cnt = hdr.case_cnt; if (r->case_cnt < -1 || r->case_cnt > INT_MAX / 2) @@ -636,20 +739,31 @@ read_variables (struct sfm_reader *r, int long_string_count = 0; /* # of long string continuation records still expected. */ int next_value = 0; /* Index to next `value' structure. */ - size_t var_cap = 0; - /* Allocate variables. */ - *var_by_idx = xmalloc (sizeof **var_by_idx * r->value_cnt); + assert(r); + + *var_by_idx = 0; + + /* Pre-allocate variables. */ + if ( r->value_cnt != -1 ) + { + *var_by_idx = xmalloc(r->value_cnt * sizeof (**var_by_idx)); + r->vars = xmalloc( r->value_cnt * sizeof (*r->vars) ); + } + /* Read in the entry for each variable and use the info to initialize the dictionary. */ - for (i = 0; i < r->value_cnt; i++) + for (i = 0; ; ++i) { struct variable *vv; - char name[9]; + char name[SHORT_NAME_LEN + 1]; int nv; int j; + if ( r->value_cnt != -1 && i >= r->value_cnt ) + break; + assertive_buf_read (r, &sv, sizeof sv, 0); if (r->reverse_endian) @@ -662,10 +776,19 @@ read_variables (struct sfm_reader *r, bswap_int32 (&sv.write); } + /* We've come to the end of the variable entries */ if (sv.rec_type != 2) - lose ((ME, _("%s: position %d: Bad record type (%d); " - "the expected value was 2."), - handle_get_filename (r->fh), i, sv.rec_type)); + { + buf_unread(r, sizeof sv); + r->value_cnt = i; + break; + } + + if ( -1 == r->value_cnt ) + { + *var_by_idx = xrealloc (*var_by_idx, sizeof **var_by_idx * (i + 1)); + r->vars = xrealloc(r->vars, (i + 1) * sizeof (*r->vars) ); + } /* If there was a long string previously, make sure that the continuations are present; otherwise make sure there aren't @@ -677,6 +800,8 @@ read_variables (struct sfm_reader *r, "proper number of continuation records."), handle_get_filename (r->fh), i)); + + r->vars[i].width = -1; (*var_by_idx)[i] = NULL; long_string_count--; continue; @@ -716,7 +841,7 @@ read_variables (struct sfm_reader *r, name[0] = toupper ((unsigned char) (sv.name[0])); /* Copy remaining characters of variable name. */ - for (j = 1; j < 8; j++) + for (j = 1; j < SHORT_NAME_LEN; j++) { int c = (unsigned char) sv.name[j]; @@ -744,6 +869,7 @@ read_variables (struct sfm_reader *r, if (vv == NULL) lose ((ME, _("%s: Duplicate variable name `%s' within system file."), handle_get_filename (r->fh), name)); + var_set_short_name (vv, vv->name); /* Case reading data. */ nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64)); @@ -767,11 +893,14 @@ read_variables (struct sfm_reader *r, "length %d."), handle_get_filename (r->fh), vv->name, len)); - /* Read label into variable structure. */ - vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32)), len + 1); - if (vv->label == NULL) - goto error; - vv->label[len] = '\0'; + if ( len != 0 ) + { + /* Read label into variable structure. */ + vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32)), len + 1); + if (vv->label == NULL) + goto error; + vv->label[len] = '\0'; + } } /* Set missing values. */ @@ -840,15 +969,9 @@ read_variables (struct sfm_reader *r, || !parse_format_spec (r, sv.write, &vv->write, vv)) goto error; - /* Add variable to list. */ - if (var_cap >= r->var_cnt) - { - var_cap = 2 + r->var_cnt * 2; - r->vars = xrealloc (r->vars, var_cap * sizeof *r->vars); - } - r->vars[r->var_cnt].width = vv->width; - r->vars[r->var_cnt].fv = vv->fv; - r->var_cnt++; + r->vars[i].width = vv->width; + r->vars[i].fv = vv->fv; + } /* Some consistency checks. */ @@ -856,10 +979,12 @@ read_variables (struct sfm_reader *r, lose ((ME, _("%s: Long string continuation records omitted at end of " "dictionary."), handle_get_filename (r->fh))); + if (next_value != r->value_cnt) - lose ((ME, _("%s: System file header indicates %d variable positions but " + corrupt_msg(MW, _("%s: System file header indicates %d variable positions but " "%d were read from file."), - handle_get_filename (r->fh), r->value_cnt, next_value)); + handle_get_filename (r->fh), r->value_cnt, next_value); + return 1; @@ -870,27 +995,32 @@ error: /* Translates the format spec from sysfile format to internal format. */ static int -parse_format_spec (struct sfm_reader *r, int32 s, struct fmt_spec *v, struct variable *vv) +parse_format_spec (struct sfm_reader *r, int32 s, + struct fmt_spec *f, struct variable *v) { - v->type = translate_fmt ((s >> 16) & 0xff); - if (v->type == -1) + f->type = translate_fmt ((s >> 16) & 0xff); + if (f->type == -1) lose ((ME, _("%s: Bad format specifier byte (%d)."), handle_get_filename (r->fh), (s >> 16) & 0xff)); - v->w = (s >> 8) & 0xff; - v->d = s & 0xff; - - /* FIXME? Should verify the resulting specifier more thoroughly. */ + f->w = (s >> 8) & 0xff; + f->d = s & 0xff; - if (v->type == -1) - lose ((ME, _("%s: Bad format specifier byte (%d)."), - handle_get_filename (r->fh), (s >> 16) & 0xff)); - if ((vv->type == ALPHA) ^ ((formats[v->type].cat & FCAT_STRING) != 0)) + if ((v->type == ALPHA) ^ ((formats[f->type].cat & FCAT_STRING) != 0)) lose ((ME, _("%s: %s variable %s has %s format specifier %s."), handle_get_filename (r->fh), - vv->type == ALPHA ? _("String") : _("Numeric"), - vv->name, - formats[v->type].cat & FCAT_STRING ? _("string") : _("numeric"), - formats[v->type].name)); + v->type == ALPHA ? _("String") : _("Numeric"), + v->name, + formats[f->type].cat & FCAT_STRING ? _("string") : _("numeric"), + formats[f->type].name)); + + if (!check_output_specifier (f, false) + || !check_specifier_width (f, v->width, false)) + { + msg (ME, _("%s variable %s has invalid format specifier %s."), + v->type == NUMERIC ? _("Numeric") : _("String"), + v->name, fmt_to_string (f)); + *f = v->type == NUMERIC ? f8_2 : make_output_format (FMT_A, v->width, 0); + } return 1; error: @@ -1089,8 +1219,15 @@ error: static void * buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc) { - if (buf == NULL) + assert (r); + + if (buf == NULL && byte_cnt > 0 ) buf = xmalloc (max (byte_cnt, min_alloc)); + + if ( byte_cnt == 0 ) + return buf; + + if (1 != fread (buf, byte_cnt, 1, r->file)) { if (ferror (r->file)) @@ -1104,6 +1241,19 @@ buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc) return buf; } +/* Winds the reader BYTE_CNT bytes back in the reader stream. */ +void +buf_unread(struct sfm_reader *r, size_t byte_cnt) +{ + assert(byte_cnt > 0); + + if ( 0 != fseek(r->file, -byte_cnt, SEEK_CUR)) + { + msg (ME, _("%s: Seeking system file: %s."), + handle_get_filename (r->fh), strerror (errno)); + } +} + /* Reads a document record, type 6, from system file R, and sets up the documents and n_documents fields in the associated dictionary. */ @@ -1180,7 +1330,7 @@ read_compressed_data (struct sfm_reader *r, flt64 *buf) for (;;) { - for (; p < p_end; p++) + for (; p < p_end; p++){ switch (*p) { case 0: @@ -1234,7 +1384,7 @@ read_compressed_data (struct sfm_reader *r, flt64 *buf) goto success; break; } - + } /* We have reached the end of this instruction octet. Read another. */ if (r->ptr == NULL || r->ptr >= r->end) @@ -1285,7 +1435,7 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c) { int i; - for (i = 0; i < r->var_cnt; i++) + for (i = 0; i < r->value_cnt; i++) if (r->vars[i].width == 0) bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f); } @@ -1297,7 +1447,7 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c) { int i; - for (i = 0; i < r->var_cnt; i++) + for (i = 0; i < r->value_cnt; i++) if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis) case_data_rw (c, r->vars[i].fv)->f = SYSMIS; } @@ -1325,7 +1475,7 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c) return 0; } - for (i = 0; i < r->var_cnt; i++) + for (i = 0; i < r->value_cnt; i++) { struct sfm_var *v = &r->vars[i]; @@ -1334,9 +1484,9 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c) flt64 f = *bounce_cur++; if (r->reverse_endian) bswap_flt64 (&f); - case_data_rw (c, i)->f = f == r->sysmis ? SYSMIS : f; + case_data_rw (c, v->fv)->f = f == r->sysmis ? SYSMIS : f; } - else + else if (v->width != -1) { memcpy (case_data_rw (c, v->fv)->s, bounce_cur, v->width); bounce_cur += DIV_RND_UP (v->width, sizeof (flt64));