X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fsys-file-reader.c;h=7684acbc576a40c77c78c7a2ea6b7d57360127ac;hb=339f1956cc72;hp=c0bdf0ba61594db25dc177eb68d26cf423662d79;hpb=57c1048c20829711ddcfe363e2d21812a450a522;p=pspp diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index c0bdf0ba61..7684acbc57 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-2000, 2006-2007, 2009-2016 Free Software Foundation, Inc. + Copyright (C) 1997-2000, 2006-2007, 2009-2016, 2021 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -200,8 +200,8 @@ struct sfm_reader enum integer_format integer_format; /* On-disk integer format. */ enum float_format float_format; /* On-disk floating point format. */ struct sfm_var *sfm_vars; /* Variables. */ - size_t sfm_var_cnt; /* Number of variables. */ - int case_cnt; /* Number of cases */ + size_t sfm_n_vars; /* Number of variables. */ + int n_cases; /* Number of cases */ const char *encoding; /* String encoding. */ bool written_by_readstat; /* From https://github.com/WizardMac/ReadStat? */ @@ -331,7 +331,7 @@ static bool parse_variable_records (struct sfm_reader *, struct dictionary *, struct sfm_var_record *, size_t n); static void parse_format_spec (struct sfm_reader *, off_t pos, unsigned int format, enum which_format, - struct variable *, int *format_warning_cnt); + struct variable *, int *format_n_warnings); static void parse_document (struct dictionary *, struct sfm_document_record *); static void parse_display_parameters (struct sfm_reader *, const struct sfm_extension_record *, @@ -391,10 +391,9 @@ static struct any_reader * sfm_open (struct file_handle *fh) { size_t allocated_mrsets = 0; - struct sfm_reader *r; /* Create and initialize reader. */ - r = xzalloc (sizeof *r); + struct sfm_reader *r = XZALLOC (struct sfm_reader); r->any_reader.klass = &sys_file_reader_class; r->pool = pool_create (); pool_register (r->pool, free, r); @@ -493,10 +492,7 @@ read_record (struct sfm_reader *r, int type, case 6: if (r->document != NULL) - { - sys_error (r, r->pos, _("Duplicate type 6 (document) record.")); - return false; - } + sys_warn (r, r->pos, _("Duplicate type 6 (document) record.")); return read_document_record (r); case 7: @@ -643,6 +639,13 @@ add_id (struct get_strings_aux *aux, const char *id, const char *title, ...) va_end (args); } +static const char * +skip_prefix (const char *s, const char *prefix) +{ + size_t prefix_len = strlen (prefix); + return !strncmp (s, prefix, prefix_len) ? s + prefix_len : s; +} + /* Retrieves significant string data from R in its raw format, to allow the caller to try to detect the encoding in use. @@ -692,7 +695,7 @@ sfm_get_strings (const struct any_reader *r_, struct pool *pool, add_string (&aux, r->header.creation_date, _("Creation Date")); add_string (&aux, r->header.creation_time, _("Creation Time")); - add_string (&aux, r->header.eye_catcher, _("Product")); + add_string (&aux, skip_prefix (r->header.eye_catcher, "@(#) "), _("Product")); add_string (&aux, r->header.file_label, _("File Label")); if (r->extensions[EXT_PRODUCT_INFO]) @@ -853,7 +856,7 @@ sfm_decode (struct any_reader *r_, const char *encoding, sfm_read_case to use. We cannot use the `struct variable's from the dictionary we created, because the caller owns the dictionary and may destroy or modify its variables. */ - sfm_dictionary_to_sfm_vars (dict, &r->sfm_vars, &r->sfm_var_cnt); + sfm_dictionary_to_sfm_vars (dict, &r->sfm_vars, &r->sfm_n_vars); pool_register (r->pool, free, r->sfm_vars); r->proto = caseproto_ref_pool (dict_get_proto (dict), r->pool); @@ -865,9 +868,8 @@ sfm_decode (struct any_reader *r_, const char *encoding, } return casereader_create_sequential - (NULL, r->proto, - r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt, - &sys_file_casereader_class, r); + (NULL, r->proto, r->n_cases == -1 ? CASENUMBER_MAX : r->n_cases, + &sys_file_casereader_class, r); error: sfm_close (r_); @@ -991,7 +993,7 @@ read_header (struct sfm_reader *r, struct any_read_info *info, r->compression = ANY_COMP_NONE; else if (compressed == 1) r->compression = ANY_COMP_SIMPLE; - else if (compressed != 0) + else { sys_error (r, 0, "System file header has invalid compression " "value %d.", compressed); @@ -1013,10 +1015,10 @@ read_header (struct sfm_reader *r, struct any_read_info *info, if (!read_int (r, &header->weight_idx)) return false; - if (!read_int (r, &r->case_cnt)) + if (!read_int (r, &r->n_cases)) return false; - if (r->case_cnt > INT_MAX / 2) - r->case_cnt = -1; + if (r->n_cases > INT_MAX / 2) + r->n_cases = -1; /* Identify floating-point format and obtain compression bias. */ if (!read_bytes (r, raw_bias, sizeof raw_bias)) @@ -1055,7 +1057,7 @@ read_header (struct sfm_reader *r, struct any_read_info *info, info->integer_format = r->integer_format; info->float_format = r->float_format; info->compression = r->compression; - info->case_cnt = r->case_cnt; + info->n_cases = r->n_cases; return true; } @@ -1392,6 +1394,15 @@ parse_header (struct sfm_reader *r, const struct sfm_header_record *header, info->product = ss_xstrdup (product); } +static struct variable * +add_var_with_generated_name (struct dictionary *dict, int width) +{ + char *name = dict_make_unique_var_name (dict, NULL, NULL); + struct variable *var = dict_create_var_assert (dict, name, width); + free (name); + return var; +} + /* Reads a variable (type 2) record from R and adds the corresponding variable to DICT. Also skips past additional variable records for long string @@ -1406,7 +1417,6 @@ parse_variable_records (struct sfm_reader *r, struct dictionary *dict, for (rec = var_recs; rec < &var_recs[n_var_recs];) { - struct variable *var; size_t n_values; char *name; size_t i; @@ -1415,13 +1425,6 @@ parse_variable_records (struct sfm_reader *r, struct dictionary *dict, rec->name, -1, r->pool); name[strcspn (name, " ")] = '\0'; - if (!dict_id_is_valid (dict, name, false) - || name[0] == '$' || name[0] == '#') - { - sys_error (r, rec->pos, _("Invalid variable name `%s'."), name); - return false; - } - if (rec->width < 0 || rec->width > 255) { sys_error (r, rec->pos, @@ -1429,17 +1432,26 @@ parse_variable_records (struct sfm_reader *r, struct dictionary *dict, return false; } - var = rec->var = dict_create_var (dict, name, rec->width); - if (var == NULL) + struct variable *var; + if (!dict_id_is_valid (dict, name, false) + || name[0] == '$' || name[0] == '#') { - char *new_name = dict_make_unique_var_name (dict, NULL, NULL); - sys_warn (r, rec->pos, _("Renaming variable with duplicate name " - "`%s' to `%s'."), - name, new_name); - var = rec->var = dict_create_var_assert (dict, new_name, rec->width); - var_set_short_name (var, 0, new_name); - free (new_name); + var = add_var_with_generated_name (dict, rec->width); + sys_warn (r, rec->pos, _("Renaming variable with invalid name " + "`%s' to `%s'."), name, var_get_name (var)); } + else + { + var = dict_create_var (dict, name, rec->width); + if (var == NULL) + { + var = add_var_with_generated_name (dict, rec->width); + sys_warn (r, rec->pos, _("Renaming variable with duplicate name " + "`%s' to `%s'."), + name, var_get_name (var)); + } + } + rec->var = var; /* Set the short name the same as the long name (even if we renamed it). */ @@ -1680,9 +1692,9 @@ parse_mrsets (struct sfm_reader *r, const struct sfm_extension_record *record, text = open_text_record (r, record, false); for (;;) { - struct sfm_mrset *mrset; - size_t allocated_vars; - char delimiter; + struct sfm_mrset *mrset = NULL; + size_t allocated_vars = 0; + char delimiter = '4'; /* Skip extra line feeds if present. */ while (text_match (text, '\n')) @@ -1905,7 +1917,7 @@ parse_display_parameters (struct sfm_reader *r, size_t ofs; size_t i; - n_vars = dict_get_var_cnt (dict); + n_vars = dict_get_n_vars (dict); if (record->count == 3 * n_vars) includes_width = true; else if (record->count == 2 * n_vars) @@ -1980,12 +1992,12 @@ rename_var_and_save_short_names (struct sfm_reader *r, off_t pos, /* Renaming a variable may clear its short names, but we want to retain them, so we save them and re-set them afterward. */ - n_short_names = var_get_short_name_cnt (var); + n_short_names = var_get_n_short_names (var); short_names = xnmalloc (n_short_names, sizeof *short_names); for (i = 0; i < n_short_names; i++) { const char *s = var_get_short_name (var, i); - short_names[i] = s != NULL ? xstrdup (s) : NULL; + short_names[i] = xstrdup_if_nonnull (s); } /* Set long name. */ @@ -2018,7 +2030,7 @@ parse_long_var_name_map (struct sfm_reader *r, converted to lowercase, as the long variable names. */ size_t i; - for (i = 0; i < dict_get_var_cnt (dict); i++) + for (i = 0; i < dict_get_n_vars (dict); i++) { struct variable *var = dict_get_var (dict, i); char *new_name; @@ -2070,7 +2082,6 @@ parse_long_string_map (struct sfm_reader *r, { size_t idx = var_get_dict_index (var); long int length; - int segment_cnt; int i; /* Get length. */ @@ -2085,8 +2096,8 @@ parse_long_string_map (struct sfm_reader *r, } /* Check segments. */ - segment_cnt = sfm_width_to_segments (length); - if (segment_cnt == 1) + int n_segments = sfm_width_to_segments (length); + if (n_segments == 1) { sys_warn (r, record->pos, _("%s listed in very long string record with width %s, " @@ -2094,7 +2105,7 @@ parse_long_string_map (struct sfm_reader *r, var_get_name (var), length_s); continue; } - if (idx + segment_cnt > dict_get_var_cnt (dict)) + if (idx + n_segments > dict_get_n_vars (dict)) { sys_error (r, record->pos, _("Very long string %s overflows dictionary."), @@ -2104,7 +2115,7 @@ parse_long_string_map (struct sfm_reader *r, /* Get the short names from the segments and check their lengths. */ - for (i = 0; i < segment_cnt; i++) + for (i = 0; i < n_segments; i++) { struct variable *seg = dict_get_var (dict, idx + i); int alloc_width = sfm_segment_alloc_width (length, i); @@ -2121,7 +2132,7 @@ parse_long_string_map (struct sfm_reader *r, return false; } } - dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1); + dict_delete_consecutive_vars (dict, idx + 1, n_segments - 1); var_set_width (var, length); } close_text_record (r, text); @@ -2414,7 +2425,7 @@ assign_variable_roles (struct sfm_reader *r, struct dictionary *dict) size_t n_warnings = 0; size_t i; - for (i = 0; i < dict_get_var_cnt (dict); i++) + for (i = 0; i < dict_get_n_vars (dict); i++) { struct variable *var = dict_get_var (dict, i); struct attrset *attrs = var_get_attributes (var); @@ -2711,12 +2722,12 @@ sys_file_casereader_read (struct casereader *reader, void *r_) int retval; int i; - if (r->error || !r->sfm_var_cnt) + if (r->error || !r->sfm_n_vars) return NULL; c = case_create (r->proto); - for (i = 0; i < r->sfm_var_cnt; i++) + for (i = 0; i < r->sfm_n_vars; i++) { struct sfm_var *sv = &r->sfm_vars[i]; union value *v = case_data_rw_idx (c, sv->case_index); @@ -2742,7 +2753,7 @@ sys_file_casereader_read (struct casereader *reader, void *r_) eof: if (i != 0) partial_record (r); - if (r->case_cnt != -1) + if (r->n_cases != -1) read_error (reader, r); case_unref (c); return NULL; @@ -3112,7 +3123,11 @@ text_get_token (struct text_record *text, struct substring delimiters, char *end; if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token)) - return NULL; + { + if (delimiter != NULL) + *delimiter = ss_data (text->buffer)[text->pos-1]; + return NULL; + } end = &ss_data (token)[ss_length (token)]; if (delimiter != NULL) @@ -3226,12 +3241,13 @@ sys_msg (struct sfm_reader *r, off_t offset, ds_put_format (&text, _("`%s': "), fh_get_file_name (r->fh)); ds_put_vformat (&text, format, args); - struct msg m = { + struct msg *m = xmalloc (sizeof *m); + *m = (struct msg) { .category = msg_class_to_category (class), .severity = msg_class_to_severity (class), - .text = ds_cstr (&text), + .text = ds_steal_cstr (&text), }; - msg_emit (&m); + msg_emit (m); } /* Displays a warning for offset OFFSET in the file. */ @@ -3266,11 +3282,11 @@ sys_error (struct sfm_reader *r, off_t offset, const char *format, ...) an error. */ static inline int read_bytes_internal (struct sfm_reader *r, bool eof_is_ok, - void *buf, size_t byte_cnt) + void *buf, size_t n_bytes) { - size_t bytes_read = fread (buf, 1, byte_cnt, r->file); + size_t bytes_read = fread (buf, 1, n_bytes, r->file); r->pos += bytes_read; - if (bytes_read == byte_cnt) + if (bytes_read == n_bytes) return 1; else if (ferror (r->file)) { @@ -3290,9 +3306,9 @@ read_bytes_internal (struct sfm_reader *r, bool eof_is_ok, Returns true if successful. Returns false upon I/O error or if end-of-file is encountered. */ static bool -read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt) +read_bytes (struct sfm_reader *r, void *buf, size_t n_bytes) { - return read_bytes_internal (r, false, buf, byte_cnt) == 1; + return read_bytes_internal (r, false, buf, n_bytes) == 1; } /* Reads BYTE_CNT bytes into BUF. @@ -3300,9 +3316,9 @@ read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt) Returns 0 if an immediate end-of-file is encountered. Returns -1 if an I/O error or a partial read occurs. */ static int -try_read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt) +try_read_bytes (struct sfm_reader *r, void *buf, size_t n_bytes) { - return read_bytes_internal (r, true, buf, byte_cnt); + return read_bytes_internal (r, true, buf, n_bytes); } /* Reads a 32-bit signed integer from R and stores its value in host format in @@ -3693,11 +3709,11 @@ close_zstream (struct sfm_reader *r) } static int -read_bytes_zlib (struct sfm_reader *r, void *buf_, size_t byte_cnt) +read_bytes_zlib (struct sfm_reader *r, void *buf_, size_t n_bytes) { uint8_t *buf = buf_; - if (byte_cnt == 0) + if (n_bytes == 0) return 1; for (;;) @@ -3707,13 +3723,13 @@ read_bytes_zlib (struct sfm_reader *r, void *buf_, size_t byte_cnt) /* Use already inflated data if there is any. */ if (r->zout_pos < r->zout_end) { - unsigned int n = MIN (byte_cnt, r->zout_end - r->zout_pos); + unsigned int n = MIN (n_bytes, r->zout_end - r->zout_pos); memcpy (buf, &r->zout_buf[r->zout_pos], n); r->zout_pos += n; - byte_cnt -= n; + n_bytes -= n; buf += n; - if (byte_cnt == 0) + if (n_bytes == 0) return 1; } @@ -3760,13 +3776,13 @@ read_bytes_zlib (struct sfm_reader *r, void *buf_, size_t byte_cnt) } static int -read_compressed_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt) +read_compressed_bytes (struct sfm_reader *r, void *buf, size_t n_bytes) { if (r->compression == ANY_COMP_SIMPLE) - return read_bytes (r, buf, byte_cnt); + return read_bytes (r, buf, n_bytes); else { - int retval = read_bytes_zlib (r, buf, byte_cnt); + int retval = read_bytes_zlib (r, buf, n_bytes); if (retval == 0) sys_error (r, r->pos, _("Unexpected end of ZLIB compressed data.")); return retval; @@ -3774,12 +3790,12 @@ read_compressed_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt) } static int -try_read_compressed_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt) +try_read_compressed_bytes (struct sfm_reader *r, void *buf, size_t n_bytes) { if (r->compression == ANY_COMP_SIMPLE) - return try_read_bytes (r, buf, byte_cnt); + return try_read_bytes (r, buf, n_bytes); else - return read_bytes_zlib (r, buf, byte_cnt); + return read_bytes_zlib (r, buf, n_bytes); } /* Reads a 64-bit floating-point number from R and returns its