X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=src%2Fdata%2Fsys-file-reader.c;h=af8f1b457678e60c5a927969e737d39cc169c4f9;hb=69597a0423aad2fd5974c2cbc08a680208b94b6e;hp=a2c0830ce604fcab341b9dbce6f95dd062740603;hpb=ee4b996842013f259812700adafdea36a594d394;p=pspp diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index a2c0830ce6..af8f1b4576 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -203,6 +203,7 @@ struct sfm_reader size_t sfm_var_cnt; /* Number of variables. */ int case_cnt; /* Number of cases */ const char *encoding; /* String encoding. */ + bool written_by_readstat; /* From https://github.com/WizardMac/ReadStat? */ /* Decompression. */ enum any_compression compression; @@ -282,7 +283,7 @@ static bool read_variable_record (struct sfm_reader *, struct sfm_var_record *); static bool read_value_label_record (struct sfm_reader *, struct sfm_value_label_record *); -static struct sfm_document_record *read_document_record (struct sfm_reader *); +static bool read_document_record (struct sfm_reader *); static bool read_extension_record (struct sfm_reader *, int subtype, struct sfm_extension_record **); static bool skip_extension_record (struct sfm_reader *, int subtype); @@ -500,8 +501,7 @@ read_record (struct sfm_reader *r, int type, sys_error (r, r->pos, _("Duplicate type 6 (document) record.")); return false; } - r->document = read_document_record (r); - return r->document != NULL; + return read_document_record (r); case 7: if (!read_int (r, &subtype)) @@ -523,7 +523,7 @@ read_record (struct sfm_reader *r, int type, 18. I'm surprised that SPSS puts up with this. */ struct sfm_extension_record *ext; bool ok = read_extension_record (r, subtype, &ext); - if (ok) + if (ok && ext) ll_push_tail (&r->var_attrs, &ext->ll); return ok; } @@ -862,7 +862,7 @@ sfm_decode (struct any_reader *r_, const char *encoding, amount that the header claims. SPSS version 13 gets this wrong when very long strings are involved, so don't warn in that case. */ - if (r->header.nominal_case_size != -1 + if (r->header.nominal_case_size > 0 && r->header.nominal_case_size != r->n_vars && r->info.version_major != 13) sys_warn (r, -1, _("File header claims %d variable positions but " @@ -891,7 +891,7 @@ sfm_decode (struct any_reader *r_, const char *encoding, error: sfm_close (r_); - dict_destroy (dict); + dict_unref (dict); *dictp = NULL; return NULL; } @@ -968,6 +968,8 @@ read_header (struct sfm_reader *r, struct any_read_info *info, if (!read_string (r, header->magic, sizeof header->magic) || !read_string (r, header->eye_catcher, sizeof header->eye_catcher)) return false; + r->written_by_readstat = strstr (header->eye_catcher, + "https://github.com/WizardMac/ReadStat"); if (!strcmp (ASCII_MAGIC, header->magic) || !strcmp (EBCDIC_MAGIC, header->magic)) @@ -1229,33 +1231,35 @@ read_value_label_record (struct sfm_reader *r, return true; } -/* Reads a document record from R and returns it. */ -static struct sfm_document_record * +/* Reads a document record from R. Returns true if successful, false on + error. */ +static bool read_document_record (struct sfm_reader *r) { - struct sfm_document_record *record; int n_lines; - - record = pool_malloc (r->pool, sizeof *record); - record->pos = r->pos; - if (!read_int (r, &n_lines)) - return NULL; - if (n_lines <= 0 || n_lines >= INT_MAX / DOC_LINE_LENGTH) + return false; + else if (n_lines == 0) + return true; + else if (n_lines < 0 || n_lines >= INT_MAX / DOC_LINE_LENGTH) { - sys_error (r, record->pos, + sys_error (r, r->pos, _("Number of document lines (%d) " "must be greater than 0 and less than %d."), n_lines, INT_MAX / DOC_LINE_LENGTH); - return NULL; + return false; } + struct sfm_document_record *record; + record = pool_malloc (r->pool, sizeof *record); + record->pos = r->pos; record->n_lines = n_lines; record->documents = pool_malloc (r->pool, DOC_LINE_LENGTH * n_lines); if (!read_bytes (r, record->documents, DOC_LINE_LENGTH * n_lines)) - return NULL; + return false; - return record; + r->document = record; + return true; } static bool @@ -1453,11 +1457,13 @@ parse_variable_records (struct sfm_reader *r, struct dictionary *dict, "`%s' to `%s'."), name, new_name); var = rec->var = dict_create_var_assert (dict, new_name, rec->width); + var_set_short_name (var, 0, new_name); free (new_name); } - /* Set the short name the same as the long name. */ - var_set_short_name (var, 0, name); + /* Set the short name the same as the long name (even if we renamed + it). */ + var_set_short_name (var, 0, var_get_name (var)); /* Get variable label, if any. */ if (rec->label) @@ -1538,22 +1544,9 @@ parse_format_spec (struct sfm_reader *r, off_t pos, unsigned int format, int *n_warnings) { const int max_warnings = 8; - uint8_t raw_type = format >> 16; - uint8_t w = format >> 8; - uint8_t d = format; struct fmt_spec f; - bool ok; - - f.w = w; - f.d = d; - msg_disable (); - ok = (fmt_from_io (raw_type, &f.type) - && fmt_check_output (&f) - && fmt_check_width_compat (&f, var_get_width (v))); - msg_enable (); - - if (ok) + if (fmt_from_u32 (format, var_get_width (v), false, &f)) { if (which == PRINT_FORMAT) var_set_print_format (v, &f); @@ -1756,7 +1749,12 @@ parse_mrsets (struct sfm_reader *r, const struct sfm_extension_record *record, } number = text_get_token (text, ss_cstr (" "), NULL); - if (!strcmp (number, "11")) + if (!number) + sys_warn (r, record->pos, + _("Missing label source value " + "following `E' at offset %zu in MRSETS record."), + text_pos (text)); + else if (!strcmp (number, "11")) mrset->label_from_var_label = true; else if (strcmp (number, "1")) sys_warn (r, record->pos, @@ -1827,10 +1825,9 @@ decode_mrsets (struct sfm_reader *r, struct dictionary *dict) size_t i; name = recode_string ("UTF-8", r->encoding, s->name, -1); - if (name[0] != '$') + if (!mrset_is_valid_name (name, dict_get_encoding (dict), false)) { - sys_warn (r, -1, _("Multiple response set name `%s' does not begin " - "with `$'."), + sys_warn (r, -1, _("Invalid multiple response set name `%s'."), name); free (name); continue; @@ -1992,8 +1989,9 @@ parse_display_parameters (struct sfm_reader *r, } static void -rename_var_and_save_short_names (struct dictionary *dict, struct variable *var, - const char *new_name) +rename_var_and_save_short_names (struct sfm_reader *r, off_t pos, + struct dictionary *dict, + struct variable *var, const char *new_name) { size_t n_short_names; char **short_names; @@ -2011,7 +2009,8 @@ rename_var_and_save_short_names (struct dictionary *dict, struct variable *var, } /* Set long name. */ - dict_rename_var (dict, var, new_name); + if (!dict_try_rename_var (dict, var, new_name)) + sys_warn (r, pos, _("Duplicate long variable name `%s'."), new_name); /* Restore short names. */ for (i = 0; i < n_short_names; i++) @@ -2045,7 +2044,7 @@ parse_long_var_name_map (struct sfm_reader *r, char *new_name; new_name = utf8_to_lower (var_get_name (var)); - rename_var_and_save_short_names (dict, var, new_name); + rename_var_and_save_short_names (r, -1, dict, var, new_name); free (new_name); } @@ -2070,16 +2069,7 @@ parse_long_var_name_map (struct sfm_reader *r, continue; } - /* Identify any duplicates. */ - if (utf8_strcasecmp (var_get_short_name (var, 0), long_name) - && dict_lookup_var (dict, long_name) != NULL) - { - sys_warn (r, record->pos, - _("Duplicate long variable name `%s'."), long_name); - continue; - } - - rename_var_and_save_short_names (dict, var, long_name); + rename_var_and_save_short_names (r, record->pos, dict, var, long_name); } close_text_record (r, text); } @@ -2227,7 +2217,15 @@ parse_value_labels (struct sfm_reader *r, struct dictionary *dict, if (!var_add_value_label (var, &value, utf8_labels[j])) { - if (var_is_numeric (var)) + if (r->written_by_readstat) + { + /* Ignore the problem. ReadStat is buggy and emits value + labels whose values are longer than string variables' + widths, that are identical in the actual width of the + variable, e.g. both values "ABC123" and "ABC456" for a + string variable with width 3. */ + } + else if (var_is_numeric (var)) sys_warn (r, record->pos, _("Duplicate value label for %g on %s."), value.f, var_get_name (var)); @@ -2308,28 +2306,35 @@ parse_attributes (struct sfm_reader *r, struct text_record *text, text_warn (r, text, _("Error parsing attribute value %s[%d]."), key, index); break; - } + } length = strlen (value); - if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'') + if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'') { value[length - 1] = '\0'; - attribute_add_value (attr, value + 1); + attribute_add_value (attr, value + 1); } - else + else { text_warn (r, text, _("Attribute value %s[%d] is not quoted: %s."), key, index, value); - attribute_add_value (attr, value); + attribute_add_value (attr, value); } /* Was this the last value for this attribute? */ if (text_match (text, ')')) break; } - if (attrs != NULL) - attrset_add (attrs, attr); + if (attrs != NULL && attribute_get_n_values (attr) > 0) + { + if (!attrset_try_add (attrs, attr)) + { + text_warn (r, text, _("Duplicate attribute %s."), + attribute_get_name (attr)); + attribute_destroy (attr); + } + } else attribute_destroy (attr); } @@ -2375,7 +2380,7 @@ assign_variable_roles (struct sfm_reader *r, struct dictionary *dict) struct variable *var = dict_get_var (dict, i); struct attrset *attrs = var_get_attributes (var); const struct attribute *attr = attrset_lookup (attrs, "$@Role"); - if (attr != NULL) + if (attr != NULL && attribute_get_n_values (attr) > 0) { int value = atoi (attribute_get_value (attr, 0)); enum var_role role; @@ -2463,7 +2468,8 @@ parse_long_string_value_labels (struct sfm_reader *r, ofs += 4; /* Parse variable name, width, and number of labels. */ - if (!check_overflow (r, record, ofs, var_name_len + 8)) + if (!check_overflow (r, record, ofs, var_name_len) + || !check_overflow (r, record, ofs, var_name_len + 8)) return; var_name = recode_string_pool ("UTF-8", dict_encoding, (const char *) record->data + ofs, @@ -2581,7 +2587,8 @@ parse_long_string_missing_values (struct sfm_reader *r, ofs += 4; /* Parse variable name. */ - if (!check_overflow (r, record, ofs, var_name_len + 1)) + if (!check_overflow (r, record, ofs, var_name_len) + || !check_overflow (r, record, ofs, var_name_len + 1)) return; var_name = recode_string_pool ("UTF-8", dict_encoding, (const char *) record->data + ofs, @@ -2972,7 +2979,7 @@ open_text_record (struct sfm_reader *r, } /* Closes TEXT, frees its storage, and issues a final warning - about suppressed warnings if necesary. */ + about suppressed warnings if necessary. */ static void close_text_record (struct sfm_reader *r, struct text_record *text) { @@ -2995,7 +3002,7 @@ read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict, { if (!text_read_short_name (r, dict, text, ss_cstr ("="), var)) return false; - + *value = text_get_token (text, ss_buffer ("\t\0", 2), NULL); if (*value == NULL) return false; @@ -3051,7 +3058,7 @@ static void text_warn (struct sfm_reader *r, struct text_record *text, const char *format, ...) { - if (text->n_warnings++ < MAX_TEXT_WARNINGS) + if (text->n_warnings++ < MAX_TEXT_WARNINGS) { va_list args; @@ -3142,8 +3149,8 @@ text_match (struct text_record *text, char c) { if (text->pos >= text->buffer.length) return false; - - if (text->buffer.string[text->pos] == c) + + if (text->buffer.string[text->pos] == c) { text->pos++; return true; @@ -3173,7 +3180,6 @@ static void sys_msg (struct sfm_reader *r, off_t offset, int class, const char *format, va_list args) { - struct msg m; struct string text; ds_init_empty (&text); @@ -3184,15 +3190,11 @@ sys_msg (struct sfm_reader *r, off_t offset, ds_put_format (&text, _("`%s': "), fh_get_file_name (r->fh)); ds_put_vformat (&text, format, args); - m.category = msg_class_to_category (class); - m.severity = msg_class_to_severity (class); - m.file_name = NULL; - m.first_line = 0; - m.last_line = 0; - m.first_column = 0; - m.last_column = 0; - m.text = ds_cstr (&text); - + struct msg m = { + .category = msg_class_to_category (class), + .severity = msg_class_to_severity (class), + .text = ds_cstr (&text), + }; msg_emit (&m); } @@ -3489,7 +3491,7 @@ read_ztrailer (struct sfm_reader *r, if (fstat (fileno (r->file), &s)) { - sys_error (ME, 0, _("%s: stat failed (%s)."), + sys_error (r, 0, _("%s: stat failed (%s)."), fh_get_file_name (r->fh), strerror (errno)); return false; }