X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fsys-file-reader.c;h=66552dc4b46b85cdc58ffb8f6b509b168058199f;hb=refs%2Fheads%2Fvariable-sets;hp=1a17eb89a1ac38a248b3c269f5ee9259c54c2f0d;hpb=3bf92c99a566c03d4100ada7e83cafac7219c4fb;p=pspp diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index 1a17eb89a1..66552dc4b4 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-2000, 2006-2007, 2009-2016 Free Software Foundation, Inc. + Copyright (C) 1997-2000, 2006-2007, 2009-2016, 2021 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -200,8 +200,8 @@ struct sfm_reader enum integer_format integer_format; /* On-disk integer format. */ enum float_format float_format; /* On-disk floating point format. */ struct sfm_var *sfm_vars; /* Variables. */ - size_t sfm_var_cnt; /* Number of variables. */ - int case_cnt; /* Number of cases */ + size_t sfm_n_vars; /* Number of variables. */ + int n_cases; /* Number of cases */ const char *encoding; /* String encoding. */ bool written_by_readstat; /* From https://github.com/WizardMac/ReadStat? */ @@ -331,7 +331,7 @@ static bool parse_variable_records (struct sfm_reader *, struct dictionary *, struct sfm_var_record *, size_t n); static void parse_format_spec (struct sfm_reader *, off_t pos, unsigned int format, enum which_format, - struct variable *, int *format_warning_cnt); + struct variable *, int *format_n_warnings); static void parse_document (struct dictionary *, struct sfm_document_record *); static void parse_display_parameters (struct sfm_reader *, const struct sfm_extension_record *, @@ -391,10 +391,9 @@ static struct any_reader * sfm_open (struct file_handle *fh) { size_t allocated_mrsets = 0; - struct sfm_reader *r; /* Create and initialize reader. */ - r = xzalloc (sizeof *r); + struct sfm_reader *r = XZALLOC (struct sfm_reader); r->any_reader.klass = &sys_file_reader_class; r->pool = pool_create (); pool_register (r->pool, free, r); @@ -640,6 +639,13 @@ add_id (struct get_strings_aux *aux, const char *id, const char *title, ...) va_end (args); } +static const char * +skip_prefix (const char *s, const char *prefix) +{ + size_t prefix_len = strlen (prefix); + return !strncmp (s, prefix, prefix_len) ? s + prefix_len : s; +} + /* Retrieves significant string data from R in its raw format, to allow the caller to try to detect the encoding in use. @@ -689,7 +695,7 @@ sfm_get_strings (const struct any_reader *r_, struct pool *pool, add_string (&aux, r->header.creation_date, _("Creation Date")); add_string (&aux, r->header.creation_time, _("Creation Time")); - add_string (&aux, r->header.eye_catcher, _("Product")); + add_string (&aux, skip_prefix (r->header.eye_catcher, "@(#) "), _("Product")); add_string (&aux, r->header.file_label, _("File Label")); if (r->extensions[EXT_PRODUCT_INFO]) @@ -738,7 +744,7 @@ sfm_get_strings (const struct any_reader *r_, struct pool *pool, return aux.n; } -/* Decodes the dictionary read from R, saving it into into *DICT. Character +/* Decodes the dictionary read from R, saving it into *DICT. Character strings in R are decoded using ENCODING, or an encoding obtained from R if ENCODING is null, or the locale encoding if R specifies no encoding. @@ -850,7 +856,7 @@ sfm_decode (struct any_reader *r_, const char *encoding, sfm_read_case to use. We cannot use the `struct variable's from the dictionary we created, because the caller owns the dictionary and may destroy or modify its variables. */ - sfm_dictionary_to_sfm_vars (dict, &r->sfm_vars, &r->sfm_var_cnt); + sfm_dictionary_to_sfm_vars (dict, &r->sfm_vars, &r->sfm_n_vars); pool_register (r->pool, free, r->sfm_vars); r->proto = caseproto_ref_pool (dict_get_proto (dict), r->pool); @@ -862,9 +868,8 @@ sfm_decode (struct any_reader *r_, const char *encoding, } return casereader_create_sequential - (NULL, r->proto, - r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt, - &sys_file_casereader_class, r); + (NULL, r->proto, r->n_cases == -1 ? CASENUMBER_MAX : r->n_cases, + &sys_file_casereader_class, r); error: sfm_close (r_); @@ -988,7 +993,7 @@ read_header (struct sfm_reader *r, struct any_read_info *info, r->compression = ANY_COMP_NONE; else if (compressed == 1) r->compression = ANY_COMP_SIMPLE; - else if (compressed != 0) + else { sys_error (r, 0, "System file header has invalid compression " "value %d.", compressed); @@ -1010,10 +1015,10 @@ read_header (struct sfm_reader *r, struct any_read_info *info, if (!read_int (r, &header->weight_idx)) return false; - if (!read_int (r, &r->case_cnt)) + if (!read_int (r, &r->n_cases)) return false; - if (r->case_cnt > INT_MAX / 2) - r->case_cnt = -1; + if (r->n_cases > INT_MAX / 2) + r->n_cases = -1; /* Identify floating-point format and obtain compression bias. */ if (!read_bytes (r, raw_bias, sizeof raw_bias)) @@ -1052,7 +1057,7 @@ read_header (struct sfm_reader *r, struct any_read_info *info, info->integer_format = r->integer_format; info->float_format = r->float_format; info->compression = r->compression; - info->case_cnt = r->case_cnt; + info->n_cases = r->n_cases; return true; } @@ -1428,8 +1433,7 @@ parse_variable_records (struct sfm_reader *r, struct dictionary *dict, } struct variable *var; - if (!dict_id_is_valid (dict, name, false) - || name[0] == '$' || name[0] == '#') + if (!dict_id_is_valid (dict, name) || name[0] == '$' || name[0] == '#') { var = add_var_with_generated_name (dict, rec->width); sys_warn (r, rec->pos, _("Renaming variable with invalid name " @@ -1536,9 +1540,9 @@ parse_format_spec (struct sfm_reader *r, off_t pos, unsigned int format, if (fmt_from_u32 (format, var_get_width (v), false, &f)) { if (which == PRINT_FORMAT) - var_set_print_format (v, &f); + var_set_print_format (v, f); else - var_set_write_format (v, &f); + var_set_write_format (v, f); } else if (format == 0) { @@ -1687,9 +1691,9 @@ parse_mrsets (struct sfm_reader *r, const struct sfm_extension_record *record, text = open_text_record (r, record, false); for (;;) { - struct sfm_mrset *mrset; - size_t allocated_vars; - char delimiter; + struct sfm_mrset *mrset = NULL; + size_t allocated_vars = 0; + char delimiter = '4'; /* Skip extra line feeds if present. */ while (text_match (text, '\n')) @@ -1812,7 +1816,7 @@ decode_mrsets (struct sfm_reader *r, struct dictionary *dict) size_t i; name = recode_string ("UTF-8", r->encoding, s->name, -1); - if (!mrset_is_valid_name (name, dict_get_encoding (dict), false)) + if (!mrset_is_valid_name (name, dict_get_encoding (dict))) { sys_warn (r, -1, _("Invalid multiple response set name `%s'."), name); @@ -1912,7 +1916,7 @@ parse_display_parameters (struct sfm_reader *r, size_t ofs; size_t i; - n_vars = dict_get_var_cnt (dict); + n_vars = dict_get_n_vars (dict); if (record->count == 3 * n_vars) includes_width = true; else if (record->count == 2 * n_vars) @@ -1987,12 +1991,12 @@ rename_var_and_save_short_names (struct sfm_reader *r, off_t pos, /* Renaming a variable may clear its short names, but we want to retain them, so we save them and re-set them afterward. */ - n_short_names = var_get_short_name_cnt (var); + n_short_names = var_get_n_short_names (var); short_names = xnmalloc (n_short_names, sizeof *short_names); for (i = 0; i < n_short_names; i++) { const char *s = var_get_short_name (var, i); - short_names[i] = s != NULL ? xstrdup (s) : NULL; + short_names[i] = xstrdup_if_nonnull (s); } /* Set long name. */ @@ -2025,7 +2029,7 @@ parse_long_var_name_map (struct sfm_reader *r, converted to lowercase, as the long variable names. */ size_t i; - for (i = 0; i < dict_get_var_cnt (dict); i++) + for (i = 0; i < dict_get_n_vars (dict); i++) { struct variable *var = dict_get_var (dict, i); char *new_name; @@ -2046,7 +2050,7 @@ parse_long_var_name_map (struct sfm_reader *r, while (read_variable_to_value_pair (r, dict, text, &var, &long_name)) { /* Validate long name. */ - if (!dict_id_is_valid (dict, long_name, false) + if (!dict_id_is_valid (dict, long_name) || long_name[0] == '$' || long_name[0] == '#') { sys_warn (r, record->pos, @@ -2077,7 +2081,6 @@ parse_long_string_map (struct sfm_reader *r, { size_t idx = var_get_dict_index (var); long int length; - int segment_cnt; int i; /* Get length. */ @@ -2092,8 +2095,8 @@ parse_long_string_map (struct sfm_reader *r, } /* Check segments. */ - segment_cnt = sfm_width_to_segments (length); - if (segment_cnt == 1) + int n_segments = sfm_width_to_segments (length); + if (n_segments == 1) { sys_warn (r, record->pos, _("%s listed in very long string record with width %s, " @@ -2101,7 +2104,7 @@ parse_long_string_map (struct sfm_reader *r, var_get_name (var), length_s); continue; } - if (idx + segment_cnt > dict_get_var_cnt (dict)) + if (idx + n_segments > dict_get_n_vars (dict)) { sys_error (r, record->pos, _("Very long string %s overflows dictionary."), @@ -2111,7 +2114,7 @@ parse_long_string_map (struct sfm_reader *r, /* Get the short names from the segments and check their lengths. */ - for (i = 0; i < segment_cnt; i++) + for (i = 0; i < n_segments; i++) { struct variable *seg = dict_get_var (dict, idx + i); int alloc_width = sfm_segment_alloc_width (length, i); @@ -2128,7 +2131,7 @@ parse_long_string_map (struct sfm_reader *r, return false; } } - dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1); + dict_delete_consecutive_vars (dict, idx + 1, n_segments - 1); var_set_width (var, length); } close_text_record (r, text); @@ -2421,7 +2424,7 @@ assign_variable_roles (struct sfm_reader *r, struct dictionary *dict) size_t n_warnings = 0; size_t i; - for (i = 0; i < dict_get_var_cnt (dict); i++) + for (i = 0; i < dict_get_n_vars (dict); i++) { struct variable *var = dict_get_var (dict, i); struct attrset *attrs = var_get_attributes (var); @@ -2474,21 +2477,28 @@ assign_variable_roles (struct sfm_reader *r, struct dictionary *dict) } static bool -check_overflow (struct sfm_reader *r, - const struct sfm_extension_record *record, - size_t ofs, size_t length) +check_overflow__ (const struct sfm_extension_record *record, + size_t ofs, size_t length) { size_t end = record->size * record->count; if (length >= end || ofs + length > end) - { - sys_warn (r, record->pos + end, - _("Extension record subtype %d ends unexpectedly."), - record->subtype); - return false; - } + return false; return true; } +static bool +check_overflow (struct sfm_reader *r, + const struct sfm_extension_record *record, + size_t ofs, size_t length) +{ + bool ok = check_overflow__ (record, ofs, length); + if (!ok) + sys_warn (r, record->pos + record->size * record->count, + _("Extension record subtype %d ends unexpectedly."), + record->subtype); + return ok; +} + static void parse_long_string_value_labels (struct sfm_reader *r, const struct sfm_extension_record *record, @@ -2615,6 +2625,7 @@ parse_long_string_missing_values (struct sfm_reader *r, size_t end = record->size * record->count; size_t ofs = 0; + bool warned = false; while (ofs < end) { struct missing_values mv; @@ -2663,17 +2674,32 @@ parse_long_string_missing_values (struct sfm_reader *r, var = NULL; } + /* Parse value length. */ + if (!check_overflow (r, record, ofs, 4)) + return; + size_t value_length = parse_int (r, record->data, ofs); + ofs += 4; + /* Parse values. */ mv_init_pool (r->pool, &mv, var ? var_get_width (var) : 8); for (i = 0; i < n_missing_values; i++) { - size_t value_length; - - /* Parse value length. */ - if (!check_overflow (r, record, ofs, 4)) - return; - value_length = parse_int (r, record->data, ofs); - ofs += 4; + /* Tolerate files written by old, buggy versions of PSPP where we + believed that the value_length was repeated before each missing + value. */ + if (check_overflow__ (record, ofs, value_length) + && parse_int (r, record->data, ofs) == 8) + { + if (!warned) + { + sys_warn (r, record->pos + ofs, + _("This file has corrupted metadata written by a " + "buggy version of PSPP. To fix it, save a new " + "copy of the file.")); + warned = true; + } + ofs += 4; + } /* Parse value. */ if (!check_overflow (r, record, ofs, value_length)) @@ -2718,12 +2744,12 @@ sys_file_casereader_read (struct casereader *reader, void *r_) int retval; int i; - if (r->error || !r->sfm_var_cnt) + if (r->error || !r->sfm_n_vars) return NULL; c = case_create (r->proto); - for (i = 0; i < r->sfm_var_cnt; i++) + for (i = 0; i < r->sfm_n_vars; i++) { struct sfm_var *sv = &r->sfm_vars[i]; union value *v = case_data_rw_idx (c, sv->case_index); @@ -2749,7 +2775,7 @@ sys_file_casereader_read (struct casereader *reader, void *r_) eof: if (i != 0) partial_record (r); - if (r->case_cnt != -1) + if (r->n_cases != -1) read_error (reader, r); case_unref (c); return NULL; @@ -3119,7 +3145,11 @@ text_get_token (struct text_record *text, struct substring delimiters, char *end; if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token)) - return NULL; + { + if (delimiter != NULL) + *delimiter = ss_data (text->buffer)[text->pos-1]; + return NULL; + } end = &ss_data (token)[ss_length (token)]; if (delimiter != NULL) @@ -3233,12 +3263,13 @@ sys_msg (struct sfm_reader *r, off_t offset, ds_put_format (&text, _("`%s': "), fh_get_file_name (r->fh)); ds_put_vformat (&text, format, args); - struct msg m = { + struct msg *m = xmalloc (sizeof *m); + *m = (struct msg) { .category = msg_class_to_category (class), .severity = msg_class_to_severity (class), - .text = ds_cstr (&text), + .text = ds_steal_cstr (&text), }; - msg_emit (&m); + msg_emit (m); } /* Displays a warning for offset OFFSET in the file. */ @@ -3273,11 +3304,11 @@ sys_error (struct sfm_reader *r, off_t offset, const char *format, ...) an error. */ static inline int read_bytes_internal (struct sfm_reader *r, bool eof_is_ok, - void *buf, size_t byte_cnt) + void *buf, size_t n_bytes) { - size_t bytes_read = fread (buf, 1, byte_cnt, r->file); + size_t bytes_read = fread (buf, 1, n_bytes, r->file); r->pos += bytes_read; - if (bytes_read == byte_cnt) + if (bytes_read == n_bytes) return 1; else if (ferror (r->file)) { @@ -3297,9 +3328,9 @@ read_bytes_internal (struct sfm_reader *r, bool eof_is_ok, Returns true if successful. Returns false upon I/O error or if end-of-file is encountered. */ static bool -read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt) +read_bytes (struct sfm_reader *r, void *buf, size_t n_bytes) { - return read_bytes_internal (r, false, buf, byte_cnt) == 1; + return read_bytes_internal (r, false, buf, n_bytes) == 1; } /* Reads BYTE_CNT bytes into BUF. @@ -3307,9 +3338,9 @@ read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt) Returns 0 if an immediate end-of-file is encountered. Returns -1 if an I/O error or a partial read occurs. */ static int -try_read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt) +try_read_bytes (struct sfm_reader *r, void *buf, size_t n_bytes) { - return read_bytes_internal (r, true, buf, byte_cnt); + return read_bytes_internal (r, true, buf, n_bytes); } /* Reads a 32-bit signed integer from R and stores its value in host format in @@ -3700,11 +3731,11 @@ close_zstream (struct sfm_reader *r) } static int -read_bytes_zlib (struct sfm_reader *r, void *buf_, size_t byte_cnt) +read_bytes_zlib (struct sfm_reader *r, void *buf_, size_t n_bytes) { uint8_t *buf = buf_; - if (byte_cnt == 0) + if (n_bytes == 0) return 1; for (;;) @@ -3714,13 +3745,13 @@ read_bytes_zlib (struct sfm_reader *r, void *buf_, size_t byte_cnt) /* Use already inflated data if there is any. */ if (r->zout_pos < r->zout_end) { - unsigned int n = MIN (byte_cnt, r->zout_end - r->zout_pos); + unsigned int n = MIN (n_bytes, r->zout_end - r->zout_pos); memcpy (buf, &r->zout_buf[r->zout_pos], n); r->zout_pos += n; - byte_cnt -= n; + n_bytes -= n; buf += n; - if (byte_cnt == 0) + if (n_bytes == 0) return 1; } @@ -3767,13 +3798,13 @@ read_bytes_zlib (struct sfm_reader *r, void *buf_, size_t byte_cnt) } static int -read_compressed_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt) +read_compressed_bytes (struct sfm_reader *r, void *buf, size_t n_bytes) { if (r->compression == ANY_COMP_SIMPLE) - return read_bytes (r, buf, byte_cnt); + return read_bytes (r, buf, n_bytes); else { - int retval = read_bytes_zlib (r, buf, byte_cnt); + int retval = read_bytes_zlib (r, buf, n_bytes); if (retval == 0) sys_error (r, r->pos, _("Unexpected end of ZLIB compressed data.")); return retval; @@ -3781,12 +3812,12 @@ read_compressed_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt) } static int -try_read_compressed_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt) +try_read_compressed_bytes (struct sfm_reader *r, void *buf, size_t n_bytes) { if (r->compression == ANY_COMP_SIMPLE) - return try_read_bytes (r, buf, byte_cnt); + return try_read_bytes (r, buf, n_bytes); else - return read_bytes_zlib (r, buf, byte_cnt); + return read_bytes_zlib (r, buf, n_bytes); } /* Reads a 64-bit floating-point number from R and returns its