X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fsys-file-reader.c;h=b2db755732311d6c6ba8dda47044225edffa80c2;hb=850523d4cd0a281c4ad6b283585c69633b97c7c7;hp=d1676564d8aa965f831465418a10ad22cfd13400;hpb=41c6f5447941e5d36d0554ba874671649353752f;p=pspp diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index d1676564d8..b2db755732 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -203,6 +203,7 @@ struct sfm_reader size_t sfm_var_cnt; /* Number of variables. */ int case_cnt; /* Number of cases */ const char *encoding; /* String encoding. */ + bool written_by_readstat; /* From https://github.com/WizardMac/ReadStat? */ /* Decompression. */ enum any_compression compression; @@ -282,7 +283,7 @@ static bool read_variable_record (struct sfm_reader *, struct sfm_var_record *); static bool read_value_label_record (struct sfm_reader *, struct sfm_value_label_record *); -static struct sfm_document_record *read_document_record (struct sfm_reader *); +static bool read_document_record (struct sfm_reader *); static bool read_extension_record (struct sfm_reader *, int subtype, struct sfm_extension_record **); static bool skip_extension_record (struct sfm_reader *, int subtype); @@ -500,8 +501,7 @@ read_record (struct sfm_reader *r, int type, sys_error (r, r->pos, _("Duplicate type 6 (document) record.")); return false; } - r->document = read_document_record (r); - return r->document != NULL; + return read_document_record (r); case 7: if (!read_int (r, &subtype)) @@ -862,7 +862,7 @@ sfm_decode (struct any_reader *r_, const char *encoding, amount that the header claims. SPSS version 13 gets this wrong when very long strings are involved, so don't warn in that case. */ - if (r->header.nominal_case_size != -1 + if (r->header.nominal_case_size > 0 && r->header.nominal_case_size != r->n_vars && r->info.version_major != 13) sys_warn (r, -1, _("File header claims %d variable positions but " @@ -968,6 +968,8 @@ read_header (struct sfm_reader *r, struct any_read_info *info, if (!read_string (r, header->magic, sizeof header->magic) || !read_string (r, header->eye_catcher, sizeof header->eye_catcher)) return false; + r->written_by_readstat = strstr (header->eye_catcher, + "https://github.com/WizardMac/ReadStat"); if (!strcmp (ASCII_MAGIC, header->magic) || !strcmp (EBCDIC_MAGIC, header->magic)) @@ -1229,33 +1231,35 @@ read_value_label_record (struct sfm_reader *r, return true; } -/* Reads a document record from R and returns it. */ -static struct sfm_document_record * +/* Reads a document record from R. Returns true if successful, false on + error. */ +static bool read_document_record (struct sfm_reader *r) { - struct sfm_document_record *record; int n_lines; - - record = pool_malloc (r->pool, sizeof *record); - record->pos = r->pos; - if (!read_int (r, &n_lines)) - return NULL; - if (n_lines <= 0 || n_lines >= INT_MAX / DOC_LINE_LENGTH) + return false; + else if (n_lines == 0) + return true; + else if (n_lines < 0 || n_lines >= INT_MAX / DOC_LINE_LENGTH) { - sys_error (r, record->pos, + sys_error (r, r->pos, _("Number of document lines (%d) " "must be greater than 0 and less than %d."), n_lines, INT_MAX / DOC_LINE_LENGTH); - return NULL; + return false; } + struct sfm_document_record *record; + record = pool_malloc (r->pool, sizeof *record); + record->pos = r->pos; record->n_lines = n_lines; record->documents = pool_malloc (r->pool, DOC_LINE_LENGTH * n_lines); if (!read_bytes (r, record->documents, DOC_LINE_LENGTH * n_lines)) - return NULL; + return false; - return record; + r->document = record; + return true; } static bool @@ -1453,11 +1457,13 @@ parse_variable_records (struct sfm_reader *r, struct dictionary *dict, "`%s' to `%s'."), name, new_name); var = rec->var = dict_create_var_assert (dict, new_name, rec->width); + var_set_short_name (var, 0, new_name); free (new_name); } - /* Set the short name the same as the long name. */ - var_set_short_name (var, 0, name); + /* Set the short name the same as the long name (even if we renamed + it). */ + var_set_short_name (var, 0, var_get_name (var)); /* Get variable label, if any. */ if (rec->label) @@ -1827,10 +1833,9 @@ decode_mrsets (struct sfm_reader *r, struct dictionary *dict) size_t i; name = recode_string ("UTF-8", r->encoding, s->name, -1); - if (name[0] != '$') + if (!mrset_is_valid_name (name, dict_get_encoding (dict), false)) { - sys_warn (r, -1, _("Multiple response set name `%s' does not begin " - "with `$'."), + sys_warn (r, -1, _("Invalid multiple response set name `%s'."), name); free (name); continue; @@ -1992,8 +1997,9 @@ parse_display_parameters (struct sfm_reader *r, } static void -rename_var_and_save_short_names (struct dictionary *dict, struct variable *var, - const char *new_name) +rename_var_and_save_short_names (struct sfm_reader *r, off_t pos, + struct dictionary *dict, + struct variable *var, const char *new_name) { size_t n_short_names; char **short_names; @@ -2011,7 +2017,8 @@ rename_var_and_save_short_names (struct dictionary *dict, struct variable *var, } /* Set long name. */ - dict_rename_var (dict, var, new_name); + if (!dict_try_rename_var (dict, var, new_name)) + sys_warn (r, pos, _("Duplicate long variable name `%s'."), new_name); /* Restore short names. */ for (i = 0; i < n_short_names; i++) @@ -2045,7 +2052,7 @@ parse_long_var_name_map (struct sfm_reader *r, char *new_name; new_name = utf8_to_lower (var_get_name (var)); - rename_var_and_save_short_names (dict, var, new_name); + rename_var_and_save_short_names (r, -1, dict, var, new_name); free (new_name); } @@ -2070,16 +2077,7 @@ parse_long_var_name_map (struct sfm_reader *r, continue; } - /* Identify any duplicates. */ - if (utf8_strcasecmp (var_get_short_name (var, 0), long_name) - && dict_lookup_var (dict, long_name) != NULL) - { - sys_warn (r, record->pos, - _("Duplicate long variable name `%s'."), long_name); - continue; - } - - rename_var_and_save_short_names (dict, var, long_name); + rename_var_and_save_short_names (r, record->pos, dict, var, long_name); } close_text_record (r, text); } @@ -2227,7 +2225,15 @@ parse_value_labels (struct sfm_reader *r, struct dictionary *dict, if (!var_add_value_label (var, &value, utf8_labels[j])) { - if (var_is_numeric (var)) + if (r->written_by_readstat) + { + /* Ignore the problem. ReadStat is buggy and emits value + labels whose values are longer than string variables' + widths, that are identical in the actual width of the + variable, e.g. both values "ABC123" and "ABC456" for a + string variable with width 3. */ + } + else if (var_is_numeric (var)) sys_warn (r, record->pos, _("Duplicate value label for %g on %s."), value.f, var_get_name (var)); @@ -2329,7 +2335,14 @@ parse_attributes (struct sfm_reader *r, struct text_record *text, break; } if (attrs != NULL) - attrset_add (attrs, attr); + { + if (!attrset_try_add (attrs, attr)) + { + text_warn (r, text, _("Duplicate attribute %s."), + attribute_get_name (attr)); + attribute_destroy (attr); + } + } else attribute_destroy (attr); } @@ -2974,7 +2987,7 @@ open_text_record (struct sfm_reader *r, } /* Closes TEXT, frees its storage, and issues a final warning - about suppressed warnings if necesary. */ + about suppressed warnings if necessary. */ static void close_text_record (struct sfm_reader *r, struct text_record *text) { @@ -3491,7 +3504,7 @@ read_ztrailer (struct sfm_reader *r, if (fstat (fileno (r->file), &s)) { - sys_error (ME, 0, _("%s: stat failed (%s)."), + sys_error (r, 0, _("%s: stat failed (%s)."), fh_get_file_name (r->fh), strerror (errno)); return false; }