X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fsys-file-reader.c;h=b2db755732311d6c6ba8dda47044225edffa80c2;hb=850523d4cd0a281c4ad6b283585c69633b97c7c7;hp=54788deb6e217bb918d8873077e2cb305ffbea77;hpb=8e27b1a0dba7f33b7acb0d8894efe2045b0bb98f;p=pspp diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index 54788deb6e..b2db755732 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -203,6 +203,7 @@ struct sfm_reader size_t sfm_var_cnt; /* Number of variables. */ int case_cnt; /* Number of cases */ const char *encoding; /* String encoding. */ + bool written_by_readstat; /* From https://github.com/WizardMac/ReadStat? */ /* Decompression. */ enum any_compression compression; @@ -861,7 +862,7 @@ sfm_decode (struct any_reader *r_, const char *encoding, amount that the header claims. SPSS version 13 gets this wrong when very long strings are involved, so don't warn in that case. */ - if (r->header.nominal_case_size != -1 + if (r->header.nominal_case_size > 0 && r->header.nominal_case_size != r->n_vars && r->info.version_major != 13) sys_warn (r, -1, _("File header claims %d variable positions but " @@ -967,6 +968,8 @@ read_header (struct sfm_reader *r, struct any_read_info *info, if (!read_string (r, header->magic, sizeof header->magic) || !read_string (r, header->eye_catcher, sizeof header->eye_catcher)) return false; + r->written_by_readstat = strstr (header->eye_catcher, + "https://github.com/WizardMac/ReadStat"); if (!strcmp (ASCII_MAGIC, header->magic) || !strcmp (EBCDIC_MAGIC, header->magic)) @@ -1454,11 +1457,13 @@ parse_variable_records (struct sfm_reader *r, struct dictionary *dict, "`%s' to `%s'."), name, new_name); var = rec->var = dict_create_var_assert (dict, new_name, rec->width); + var_set_short_name (var, 0, new_name); free (new_name); } - /* Set the short name the same as the long name. */ - var_set_short_name (var, 0, name); + /* Set the short name the same as the long name (even if we renamed + it). */ + var_set_short_name (var, 0, var_get_name (var)); /* Get variable label, if any. */ if (rec->label) @@ -1828,10 +1833,9 @@ decode_mrsets (struct sfm_reader *r, struct dictionary *dict) size_t i; name = recode_string ("UTF-8", r->encoding, s->name, -1); - if (name[0] != '$') + if (!mrset_is_valid_name (name, dict_get_encoding (dict), false)) { - sys_warn (r, -1, _("Multiple response set name `%s' does not begin " - "with `$'."), + sys_warn (r, -1, _("Invalid multiple response set name `%s'."), name); free (name); continue; @@ -1993,8 +1997,9 @@ parse_display_parameters (struct sfm_reader *r, } static void -rename_var_and_save_short_names (struct dictionary *dict, struct variable *var, - const char *new_name) +rename_var_and_save_short_names (struct sfm_reader *r, off_t pos, + struct dictionary *dict, + struct variable *var, const char *new_name) { size_t n_short_names; char **short_names; @@ -2012,7 +2017,8 @@ rename_var_and_save_short_names (struct dictionary *dict, struct variable *var, } /* Set long name. */ - dict_rename_var (dict, var, new_name); + if (!dict_try_rename_var (dict, var, new_name)) + sys_warn (r, pos, _("Duplicate long variable name `%s'."), new_name); /* Restore short names. */ for (i = 0; i < n_short_names; i++) @@ -2046,7 +2052,7 @@ parse_long_var_name_map (struct sfm_reader *r, char *new_name; new_name = utf8_to_lower (var_get_name (var)); - rename_var_and_save_short_names (dict, var, new_name); + rename_var_and_save_short_names (r, -1, dict, var, new_name); free (new_name); } @@ -2071,16 +2077,7 @@ parse_long_var_name_map (struct sfm_reader *r, continue; } - /* Identify any duplicates. */ - if (utf8_strcasecmp (var_get_short_name (var, 0), long_name) - && dict_lookup_var (dict, long_name) != NULL) - { - sys_warn (r, record->pos, - _("Duplicate long variable name `%s'."), long_name); - continue; - } - - rename_var_and_save_short_names (dict, var, long_name); + rename_var_and_save_short_names (r, record->pos, dict, var, long_name); } close_text_record (r, text); } @@ -2228,7 +2225,15 @@ parse_value_labels (struct sfm_reader *r, struct dictionary *dict, if (!var_add_value_label (var, &value, utf8_labels[j])) { - if (var_is_numeric (var)) + if (r->written_by_readstat) + { + /* Ignore the problem. ReadStat is buggy and emits value + labels whose values are longer than string variables' + widths, that are identical in the actual width of the + variable, e.g. both values "ABC123" and "ABC456" for a + string variable with width 3. */ + } + else if (var_is_numeric (var)) sys_warn (r, record->pos, _("Duplicate value label for %g on %s."), value.f, var_get_name (var)); @@ -2330,7 +2335,14 @@ parse_attributes (struct sfm_reader *r, struct text_record *text, break; } if (attrs != NULL) - attrset_add (attrs, attr); + { + if (!attrset_try_add (attrs, attr)) + { + text_warn (r, text, _("Duplicate attribute %s."), + attribute_get_name (attr)); + attribute_destroy (attr); + } + } else attribute_destroy (attr); } @@ -2975,7 +2987,7 @@ open_text_record (struct sfm_reader *r, } /* Closes TEXT, frees its storage, and issues a final warning - about suppressed warnings if necesary. */ + about suppressed warnings if necessary. */ static void close_text_record (struct sfm_reader *r, struct text_record *text) { @@ -3492,7 +3504,7 @@ read_ztrailer (struct sfm_reader *r, if (fstat (fileno (r->file), &s)) { - sys_error (ME, 0, _("%s: stat failed (%s)."), + sys_error (r, 0, _("%s: stat failed (%s)."), fh_get_file_name (r->fh), strerror (errno)); return false; }