X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fsys-file-reader.c;h=491df7f996c349a853c3492d0a49ca575e717161;hb=36bba0ffbec3b8432d4ececb720bf033053f5d46;hp=7684acbc576a40c77c78c7a2ea6b7d57360127ac;hpb=339f1956cc727eda788638644ef93ab7852b31cd;p=pspp diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index 7684acbc57..491df7f996 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -41,6 +41,7 @@ #include "data/value-labels.h" #include "data/value.h" #include "data/variable.h" +#include "data/varset.h" #include "libpspp/array.h" #include "libpspp/assertion.h" #include "libpspp/compiler.h" @@ -371,6 +372,9 @@ static void parse_long_string_value_labels (struct sfm_reader *, static void parse_long_string_missing_values ( struct sfm_reader *, const struct sfm_extension_record *, struct dictionary *); +static void parse_var_sets (struct sfm_reader *, + const struct sfm_extension_record *, + struct dictionary *); /* Frees the strings inside INFO. */ void @@ -744,7 +748,7 @@ sfm_get_strings (const struct any_reader *r_, struct pool *pool, return aux.n; } -/* Decodes the dictionary read from R, saving it into into *DICT. Character +/* Decodes the dictionary read from R, saving it into *DICT. Character strings in R are decoded using ENCODING, or an encoding obtained from R if ENCODING is null, or the locale encoding if R specifies no encoding. @@ -840,6 +844,8 @@ sfm_decode (struct any_reader *r_, const char *encoding, if (r->extensions[EXT_LONG_MISSING] != NULL) parse_long_string_missing_values (r, r->extensions[EXT_LONG_MISSING], dict); + if (r->extensions[EXT_VAR_SETS]) + parse_var_sets (r, r->extensions[EXT_VAR_SETS], dict); /* Warn if the actual amount of data per case differs from the amount that the header claims. SPSS version 13 gets this @@ -1284,6 +1290,7 @@ read_extension_record (struct sfm_reader *r, int subtype, /* Implemented record types. */ { EXT_INTEGER, 4, 8 }, { EXT_FLOAT, 8, 3 }, + { EXT_VAR_SETS, 1, 0 }, { EXT_MRSETS, 1, 0 }, { EXT_PRODUCT_INFO, 1, 0 }, { EXT_DISPLAY, 4, 0 }, @@ -1298,7 +1305,6 @@ read_extension_record (struct sfm_reader *r, int subtype, { EXT_LONG_MISSING, 1, 0 }, /* Ignored record types. */ - { EXT_VAR_SETS, 0, 0 }, { EXT_DATE, 0, 0 }, { EXT_DATA_ENTRY, 0, 0 }, { EXT_DATAVIEW, 0, 0 }, @@ -1433,8 +1439,7 @@ parse_variable_records (struct sfm_reader *r, struct dictionary *dict, } struct variable *var; - if (!dict_id_is_valid (dict, name, false) - || name[0] == '$' || name[0] == '#') + if (!dict_id_is_valid (dict, name) || name[0] == '$' || name[0] == '#') { var = add_var_with_generated_name (dict, rec->width); sys_warn (r, rec->pos, _("Renaming variable with invalid name " @@ -1541,9 +1546,9 @@ parse_format_spec (struct sfm_reader *r, off_t pos, unsigned int format, if (fmt_from_u32 (format, var_get_width (v), false, &f)) { if (which == PRINT_FORMAT) - var_set_print_format (v, &f); + var_set_print_format (v, f); else - var_set_write_format (v, &f); + var_set_write_format (v, f); } else if (format == 0) { @@ -1817,7 +1822,7 @@ decode_mrsets (struct sfm_reader *r, struct dictionary *dict) size_t i; name = recode_string ("UTF-8", r->encoding, s->name, -1); - if (!mrset_is_valid_name (name, dict_get_encoding (dict), false)) + if (!mrset_is_valid_name (name, dict_get_encoding (dict))) { sys_warn (r, -1, _("Invalid multiple response set name `%s'."), name); @@ -2051,7 +2056,7 @@ parse_long_var_name_map (struct sfm_reader *r, while (read_variable_to_value_pair (r, dict, text, &var, &long_name)) { /* Validate long name. */ - if (!dict_id_is_valid (dict, long_name, false) + if (!dict_id_is_valid (dict, long_name) || long_name[0] == '$' || long_name[0] == '#') { sys_warn (r, record->pos, @@ -2136,7 +2141,6 @@ parse_long_string_map (struct sfm_reader *r, var_set_width (var, length); } close_text_record (r, text); - dict_compact_values (dict); return true; } @@ -2478,21 +2482,28 @@ assign_variable_roles (struct sfm_reader *r, struct dictionary *dict) } static bool -check_overflow (struct sfm_reader *r, - const struct sfm_extension_record *record, - size_t ofs, size_t length) +check_overflow__ (const struct sfm_extension_record *record, + size_t ofs, size_t length) { size_t end = record->size * record->count; if (length >= end || ofs + length > end) - { - sys_warn (r, record->pos + end, - _("Extension record subtype %d ends unexpectedly."), - record->subtype); - return false; - } + return false; return true; } +static bool +check_overflow (struct sfm_reader *r, + const struct sfm_extension_record *record, + size_t ofs, size_t length) +{ + bool ok = check_overflow__ (record, ofs, length); + if (!ok) + sys_warn (r, record->pos + record->size * record->count, + _("Extension record subtype %d ends unexpectedly."), + record->subtype); + return ok; +} + static void parse_long_string_value_labels (struct sfm_reader *r, const struct sfm_extension_record *record, @@ -2619,6 +2630,7 @@ parse_long_string_missing_values (struct sfm_reader *r, size_t end = record->size * record->count; size_t ofs = 0; + bool warned = false; while (ofs < end) { struct missing_values mv; @@ -2667,17 +2679,32 @@ parse_long_string_missing_values (struct sfm_reader *r, var = NULL; } + /* Parse value length. */ + if (!check_overflow (r, record, ofs, 4)) + return; + size_t value_length = parse_int (r, record->data, ofs); + ofs += 4; + /* Parse values. */ mv_init_pool (r->pool, &mv, var ? var_get_width (var) : 8); for (i = 0; i < n_missing_values; i++) { - size_t value_length; - - /* Parse value length. */ - if (!check_overflow (r, record, ofs, 4)) - return; - value_length = parse_int (r, record->data, ofs); - ofs += 4; + /* Tolerate files written by old, buggy versions of PSPP where we + believed that the value_length was repeated before each missing + value. */ + if (check_overflow__ (record, ofs, value_length) + && parse_int (r, record->data, ofs) == 8) + { + if (!warned) + { + sys_warn (r, record->pos + ofs, + _("This file has corrupted metadata written by a " + "buggy version of PSPP. To fix it, save a new " + "copy of the file.")); + warned = true; + } + ofs += 4; + } /* Parse value. */ if (!check_overflow (r, record, ofs, value_length)) @@ -2697,6 +2724,57 @@ parse_long_string_missing_values (struct sfm_reader *r, var_set_missing_values (var, &mv); } } + +static void +parse_var_sets (struct sfm_reader *r, + const struct sfm_extension_record *record, + struct dictionary *dict) +{ + struct text_record *text = open_text_record (r, record, true); + for (;;) + { + char *varset_name = text_get_token (text, ss_cstr ("="), NULL); + if (!varset_name) + break; + + struct varset *varset = xmalloc (sizeof *varset); + *varset = (struct varset) { + .name = xstrdup (varset_name), + }; + + text_match (text, ' '); + + size_t allocated_vars = 0; + char delimiter; + do + { + char *var_name = text_get_token (text, ss_cstr (" \n"), &delimiter); + if (!var_name) + break; + + size_t len = strlen (var_name); + if (len > 0 && var_name[len - 1] == '\r') + var_name[len - 1] = '\0'; + + struct variable *var = dict_lookup_var (dict, var_name); + if (var) + { + if (varset->n_vars >= allocated_vars) + varset->vars = x2nrealloc (varset->vars, &allocated_vars, + sizeof *varset->vars); + varset->vars[varset->n_vars++] = var; + } + else + sys_warn (r, record->pos, + _("Variable set %s contains unknown variable %s."), + varset_name, var_name); + } + while (delimiter == ' '); + + dict_add_varset (dict, varset); + } + close_text_record (r, text); +} /* Case reader. */