X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fsys-file-reader.c;h=1a17eb89a1ac38a248b3c269f5ee9259c54c2f0d;hb=033725a375c3ffe41c3e6025e8c65962bf726062;hp=b4923230cf3629ff980142da4290efa1822d4b8a;hpb=2331dc47df5d45733218e418163f7f5ae99a6324;p=pspp diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index b4923230cf..1a17eb89a1 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -234,10 +234,6 @@ sfm_reader_cast (const struct any_reader *r_) static bool sfm_close (struct any_reader *); -static struct variable *lookup_var_by_index (struct sfm_reader *, off_t, - const struct sfm_var_record *, - size_t n, int idx); - static void sys_msg (struct sfm_reader *r, off_t, int class, const char *format, va_list args) PRINTF_FORMAT (4, 0); @@ -358,10 +354,10 @@ static void parse_long_var_name_map (struct sfm_reader *, static bool parse_long_string_map (struct sfm_reader *, const struct sfm_extension_record *, struct dictionary *); -static bool parse_value_labels (struct sfm_reader *, struct dictionary *, - const struct sfm_var_record *, - size_t n_var_recs, - const struct sfm_value_label_record *); +static void parse_value_labels (struct sfm_reader *, struct dictionary *); +static struct variable *parse_weight_var (struct sfm_reader *, + const struct sfm_var_record *, size_t n_var_recs, + int idx); static void parse_data_file_attributes (struct sfm_reader *, const struct sfm_extension_record *, struct dictionary *); @@ -497,10 +493,7 @@ read_record (struct sfm_reader *r, int type, case 6: if (r->document != NULL) - { - sys_error (r, r->pos, _("Duplicate type 6 (document) record.")); - return false; - } + sys_warn (r, r->pos, _("Duplicate type 6 (document) record.")); return read_document_record (r); case 7: @@ -761,7 +754,6 @@ sfm_decode (struct any_reader *r_, const char *encoding, { struct sfm_reader *r = sfm_reader_cast (r_); struct dictionary *dict; - size_t i; if (encoding == NULL) { @@ -808,25 +800,10 @@ sfm_decode (struct any_reader *r_, const char *encoding, /* Parse value labels and the weight variable immediately after the variable records. These records use indexes into var_recs[], so we must parse them before those indexes become invalidated by very long string variables. */ - for (i = 0; i < r->n_labels; i++) - if (!parse_value_labels (r, dict, r->vars, r->n_vars, &r->labels[i])) - goto error; + parse_value_labels (r, dict); if (r->header.weight_idx != 0) - { - struct variable *weight_var; - - weight_var = lookup_var_by_index (r, 76, r->vars, r->n_vars, - r->header.weight_idx); - if (weight_var != NULL) - { - if (var_is_numeric (weight_var)) - dict_set_weight (dict, weight_var); - else - sys_warn (r, -1, _("Ignoring string variable `%s' set " - "as weighting variable."), - var_get_name (weight_var)); - } - } + dict_set_weight (dict, parse_weight_var (r, r->vars, r->n_vars, + r->header.weight_idx)); if (r->extensions[EXT_DISPLAY] != NULL) parse_display_parameters (r, r->extensions[EXT_DISPLAY], dict); @@ -1035,7 +1012,7 @@ read_header (struct sfm_reader *r, struct any_read_info *info, if (!read_int (r, &r->case_cnt)) return false; - if ( r->case_cnt > INT_MAX / 2) + if (r->case_cnt > INT_MAX / 2) r->case_cnt = -1; /* Identify floating-point format and obtain compression bias. */ @@ -1412,6 +1389,15 @@ parse_header (struct sfm_reader *r, const struct sfm_header_record *header, info->product = ss_xstrdup (product); } +static struct variable * +add_var_with_generated_name (struct dictionary *dict, int width) +{ + char *name = dict_make_unique_var_name (dict, NULL, NULL); + struct variable *var = dict_create_var_assert (dict, name, width); + free (name); + return var; +} + /* Reads a variable (type 2) record from R and adds the corresponding variable to DICT. Also skips past additional variable records for long string @@ -1424,9 +1410,8 @@ parse_variable_records (struct sfm_reader *r, struct dictionary *dict, struct sfm_var_record *rec; int n_warnings = 0; - for (rec = var_recs; rec < &var_recs[n_var_recs]; ) + for (rec = var_recs; rec < &var_recs[n_var_recs];) { - struct variable *var; size_t n_values; char *name; size_t i; @@ -1435,13 +1420,6 @@ parse_variable_records (struct sfm_reader *r, struct dictionary *dict, rec->name, -1, r->pool); name[strcspn (name, " ")] = '\0'; - if (!dict_id_is_valid (dict, name, false) - || name[0] == '$' || name[0] == '#') - { - sys_error (r, rec->pos, _("Invalid variable name `%s'."), name); - return false; - } - if (rec->width < 0 || rec->width > 255) { sys_error (r, rec->pos, @@ -1449,17 +1427,26 @@ parse_variable_records (struct sfm_reader *r, struct dictionary *dict, return false; } - var = rec->var = dict_create_var (dict, name, rec->width); - if (var == NULL) + struct variable *var; + if (!dict_id_is_valid (dict, name, false) + || name[0] == '$' || name[0] == '#') { - char *new_name = dict_make_unique_var_name (dict, NULL, NULL); - sys_warn (r, rec->pos, _("Renaming variable with duplicate name " - "`%s' to `%s'."), - name, new_name); - var = rec->var = dict_create_var_assert (dict, new_name, rec->width); - var_set_short_name (var, 0, new_name); - free (new_name); + var = add_var_with_generated_name (dict, rec->width); + sys_warn (r, rec->pos, _("Renaming variable with invalid name " + "`%s' to `%s'."), name, var_get_name (var)); + } + else + { + var = dict_create_var (dict, name, rec->width); + if (var == NULL) + { + var = add_var_with_generated_name (dict, rec->width); + sys_warn (r, rec->pos, _("Renaming variable with duplicate name " + "`%s' to `%s'."), + name, var_get_name (var)); + } } + rec->var = var; /* Set the short name the same as the long name (even if we renamed it). */ @@ -1544,22 +1531,9 @@ parse_format_spec (struct sfm_reader *r, off_t pos, unsigned int format, int *n_warnings) { const int max_warnings = 8; - uint8_t raw_type = format >> 16; - uint8_t w = format >> 8; - uint8_t d = format; struct fmt_spec f; - bool ok; - f.w = w; - f.d = d; - - msg_disable (); - ok = (fmt_from_io (raw_type, &f.type) - && fmt_check_output (&f) - && fmt_check_width_compat (&f, var_get_width (v))); - msg_enable (); - - if (ok) + if (fmt_from_u32 (format, var_get_width (v), false, &f)) { if (which == PRINT_FORMAT) var_set_print_format (v, &f); @@ -2163,61 +2137,99 @@ parse_long_string_map (struct sfm_reader *r, return true; } -static bool -parse_value_labels (struct sfm_reader *r, struct dictionary *dict, - const struct sfm_var_record *var_recs, size_t n_var_recs, - const struct sfm_value_label_record *record) +#define MAX_LABEL_WARNINGS 5 + +/* Displays a warning for offset OFFSET in the file. */ +static void +value_label_warning (struct sfm_reader *r, off_t offset, int *n_label_warnings, + const char *format, ...) { - struct variable **vars; - char **utf8_labels; - size_t i; + if (++*n_label_warnings > MAX_LABEL_WARNINGS) + return; - utf8_labels = pool_nmalloc (r->pool, record->n_labels, sizeof *utf8_labels); - for (i = 0; i < record->n_labels; i++) + va_list args; + + va_start (args, format); + sys_msg (r, offset, MW, format, args); + va_end (args); +} + +#define MAX_LABEL_WARNINGS 5 + +static void +parse_one_value_label_set (struct sfm_reader *r, struct dictionary *dict, + const struct sfm_var_record *var_recs, + size_t n_var_recs, + const struct sfm_value_label_record *record, + int *n_label_warnings) +{ + char **utf8_labels + = pool_nmalloc (r->pool, record->n_labels, sizeof *utf8_labels); + for (size_t i = 0; i < record->n_labels; i++) utf8_labels[i] = recode_string_pool ("UTF-8", dict_get_encoding (dict), record->labels[i].label, -1, r->pool); - vars = pool_nmalloc (r->pool, record->n_vars, sizeof *vars); - for (i = 0; i < record->n_vars; i++) + struct variable **vars = pool_nmalloc (r->pool, + record->n_vars, sizeof *vars); + unsigned int n_vars = 0; + for (size_t i = 0; i < record->n_vars; i++) { - vars[i] = lookup_var_by_index (r, record->pos, - var_recs, n_var_recs, record->vars[i]); - if (vars[i] == NULL) - return false; + int idx = record->vars[i]; + if (idx < 1 || idx > n_var_recs) + { + value_label_warning ( + r, record->pos, n_label_warnings, + _("Value label variable index %d not in valid range 1...%zu."), + idx, n_var_recs); + continue; + } + + const struct sfm_var_record *rec = &var_recs[idx - 1]; + if (rec->var == NULL) + { + value_label_warning ( + r, record->pos, n_label_warnings, + _("Value label variable index %d " + "refers to long string continuation."), idx); + continue; + } + + vars[n_vars++] = rec->var; } + if (!n_vars) + return; - for (i = 1; i < record->n_vars; i++) + for (size_t i = 1; i < n_vars; i++) if (var_get_type (vars[i]) != var_get_type (vars[0])) { - sys_error (r, record->pos, - _("Variables associated with value label are not all of " - "identical type. Variable %s is %s, but variable " - "%s is %s."), - var_get_name (vars[0]), - var_is_numeric (vars[0]) ? _("numeric") : _("string"), - var_get_name (vars[i]), - var_is_numeric (vars[i]) ? _("numeric") : _("string")); - return false; + value_label_warning ( + r, record->pos, n_label_warnings, + _("Variables associated with value label are not all of " + "identical type. Variable %s is %s, but variable " + "%s is %s."), + var_get_name (vars[0]), + var_is_numeric (vars[0]) ? _("numeric") : _("string"), + var_get_name (vars[i]), + var_is_numeric (vars[i]) ? _("numeric") : _("string")); + return; } - for (i = 0; i < record->n_vars; i++) + for (size_t i = 0; i < n_vars; i++) { struct variable *var = vars[i]; - int width; - size_t j; - - width = var_get_width (var); + int width = var_get_width (var); if (width > 8) { - sys_error (r, record->pos, - _("Value labels may not be added to long string " - "variables (e.g. %s) using records types 3 and 4."), - var_get_name (var)); - return false; + value_label_warning ( + r, record->pos, n_label_warnings, + _("Value labels may not be added to long string " + "variables (e.g. %s) using records types 3 and 4."), + var_get_name (var)); + continue; } - for (j = 0; j < record->n_labels; j++) + for (size_t j = 0; j < record->n_labels; j++) { struct sfm_value_label *label = &record->labels[j]; union value value; @@ -2226,7 +2238,7 @@ parse_value_labels (struct sfm_reader *r, struct dictionary *dict, if (width == 0) value.f = parse_float (r, label->value, 0); else - memcpy (value_str_rw (&value, width), label->value, width); + memcpy (value.s, label->value, width); if (!var_add_value_label (var, &value, utf8_labels[j])) { @@ -2239,14 +2251,14 @@ parse_value_labels (struct sfm_reader *r, struct dictionary *dict, string variable with width 3. */ } else if (var_is_numeric (var)) - sys_warn (r, record->pos, - _("Duplicate value label for %g on %s."), - value.f, var_get_name (var)); + value_label_warning (r, record->pos, n_label_warnings, + _("Duplicate value label for %g on %s."), + value.f, var_get_name (var)); else - sys_warn (r, record->pos, - _("Duplicate value label for `%.*s' on %s."), - width, value_str (&value, width), - var_get_name (var)); + value_label_warning ( + r, record->pos, n_label_warnings, + _("Duplicate value label for `%.*s' on %s."), + width, value.s, var_get_name (var)); } value_destroy (&value, width); @@ -2254,38 +2266,59 @@ parse_value_labels (struct sfm_reader *r, struct dictionary *dict, } pool_free (r->pool, vars); - for (i = 0; i < record->n_labels; i++) + for (size_t i = 0; i < record->n_labels; i++) pool_free (r->pool, utf8_labels[i]); pool_free (r->pool, utf8_labels); +} - return true; +static void +parse_value_labels (struct sfm_reader *r, struct dictionary *dict) +{ + int n_label_warnings = 0; + for (size_t i = 0; i < r->n_labels; i++) + parse_one_value_label_set (r, dict, r->vars, r->n_vars, &r->labels[i], + &n_label_warnings); + if (n_label_warnings > MAX_LABEL_WARNINGS) + sys_warn (r, -1, + _("Suppressed %d additional warnings for value labels."), + n_label_warnings - MAX_LABEL_WARNINGS); } static struct variable * -lookup_var_by_index (struct sfm_reader *r, off_t offset, - const struct sfm_var_record *var_recs, size_t n_var_recs, - int idx) +parse_weight_var (struct sfm_reader *r, + const struct sfm_var_record *var_recs, size_t n_var_recs, + int idx) { - const struct sfm_var_record *rec; + off_t offset = 76; /* Offset to variable index in header. */ if (idx < 1 || idx > n_var_recs) { - sys_error (r, offset, - _("Variable index %d not in valid range 1...%zu."), - idx, n_var_recs); + sys_warn (r, offset, + _("Weight variable index %d not in valid range 1...%zu. " + "Treating file as unweighted."), + idx, n_var_recs); return NULL; } - rec = &var_recs[idx - 1]; + const struct sfm_var_record *rec = &var_recs[idx - 1]; if (rec->var == NULL) { - sys_error (r, offset, - _("Variable index %d refers to long string continuation."), - idx); + sys_warn (r, offset, + _("Weight variable index %d refers to long string " + "continuation. Treating file as unweighted."), idx); return NULL; } - return rec->var; + struct variable *weight_var = rec->var; + if (!var_is_numeric (weight_var)) + { + sys_warn (r, offset, _("Ignoring string variable `%s' set " + "as weighting variable."), + var_get_name (weight_var)); + return NULL; + } + + return weight_var; } /* Parses a set of custom attributes from TEXT into ATTRS. @@ -2533,8 +2566,7 @@ parse_long_string_value_labels (struct sfm_reader *r, if (!skip) { if (value_length == width) - memcpy (value_str_rw (&value, width), - (const uint8_t *) record->data + ofs, width); + memcpy (value.s, (const uint8_t *) record->data + ofs, width); else { sys_warn (r, record->pos + ofs, @@ -2566,8 +2598,7 @@ parse_long_string_value_labels (struct sfm_reader *r, if (!var_add_value_label (var, &value, label)) sys_warn (r, record->pos + ofs, _("Duplicate value label for `%.*s' on %s."), - width, value_str (&value, width), - var_get_name (var)); + width, value.s, var_get_name (var)); pool_free (r->pool, label); } ofs += label_length; @@ -2701,8 +2732,7 @@ sys_file_casereader_read (struct casereader *reader, void *r_) retval = read_case_number (r, &v->f); else { - uint8_t *s = value_str_rw (v, sv->var_width); - retval = read_case_string (r, s + sv->offset, sv->segment_width); + retval = read_case_string (r, v->s + sv->offset, sv->segment_width); if (retval == 1) { retval = skip_whole_strings (r, ROUND_DOWN (sv->padding, 8));