X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fsys-file-reader.c;h=2ad146e88025699879c4aa536252bd8f3bd0ddf6;hb=f790dbda9d498eef9c9c0a49078adbeecf768d56;hp=7cd658ba818493a3c6ac58f7c5f1a561f73747df;hpb=fe7682b3c3d36cf9ba3e867588e5b808af833262;p=pspp diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index 7cd658ba81..2ad146e880 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-2000, 2006-2007, 2009-2015 Free Software Foundation, Inc. + Copyright (C) 1997-2000, 2006-2007, 2009-2016 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -45,6 +45,7 @@ #include "libpspp/assertion.h" #include "libpspp/compiler.h" #include "libpspp/i18n.h" +#include "libpspp/ll.h" #include "libpspp/message.h" #include "libpspp/misc.h" #include "libpspp/pool.h" @@ -158,6 +159,7 @@ struct sfm_mrset struct sfm_extension_record { + struct ll ll; /* In struct sfm_reader 'var_attrs' list. */ int subtype; /* Record subtype. */ off_t pos; /* Starting offset in file. */ unsigned int size; /* Size of data elements. */ @@ -184,6 +186,7 @@ struct sfm_reader struct sfm_mrset *mrsets; size_t n_mrsets; struct sfm_extension_record *extensions[32]; + struct ll_list var_attrs; /* Contains "struct sfm_extension_record"s. */ /* File state. */ struct file_handle *fh; /* File handle. */ @@ -200,6 +203,7 @@ struct sfm_reader size_t sfm_var_cnt; /* Number of variables. */ int case_cnt; /* Number of cases */ const char *encoding; /* String encoding. */ + bool written_by_readstat; /* From https://github.com/WizardMac/ReadStat? */ /* Decompression. */ enum any_compression compression; @@ -279,7 +283,7 @@ static bool read_variable_record (struct sfm_reader *, struct sfm_var_record *); static bool read_value_label_record (struct sfm_reader *, struct sfm_value_label_record *); -static struct sfm_document_record *read_document_record (struct sfm_reader *); +static bool read_document_record (struct sfm_reader *); static bool read_extension_record (struct sfm_reader *, int subtype, struct sfm_extension_record **); static bool skip_extension_record (struct sfm_reader *, int subtype); @@ -294,8 +298,7 @@ static bool read_variable_to_value_pair (struct sfm_reader *, struct text_record *, struct variable **var, char **value); static void text_warn (struct sfm_reader *r, struct text_record *text, - const char *format, ...) - PRINTF_FORMAT (3, 4); + const char *format, ...) PRINTF_FORMAT (3, 4); static char *text_get_token (struct text_record *, struct substring delimiters, char *delimiter); static bool text_match (struct text_record *, char c); @@ -401,6 +404,7 @@ sfm_open (struct file_handle *fh) pool_register (r->pool, free, r); r->fh = fh_ref (fh); r->opcode_idx = sizeof r->opcodes; + ll_init (&r->var_attrs); /* TRANSLATORS: this fragment will be interpolated into messages in fh_lock() that identify types of files. */ @@ -408,7 +412,7 @@ sfm_open (struct file_handle *fh) if (r->lock == NULL) goto error; - r->file = fn_open (fh_get_file_name (fh), "rb"); + r->file = fn_open (fh, "rb"); if (r->file == NULL) { msg (ME, _("Error opening `%s' for reading as a system file: %s."), @@ -497,8 +501,7 @@ read_record (struct sfm_reader *r, int type, sys_error (r, r->pos, _("Duplicate type 6 (document) record.")); return false; } - r->document = read_document_record (r); - return r->document != NULL; + return read_document_record (r); case 7: if (!read_int (r, &subtype)) @@ -507,21 +510,32 @@ read_record (struct sfm_reader *r, int type, || subtype >= sizeof r->extensions / sizeof *r->extensions) { sys_warn (r, r->pos, - _("Unrecognized record type 7, subtype %d. Please " - "send a copy of this file, and the syntax which " - "created it to %s."), - subtype, PACKAGE_BUGREPORT); + _("Unrecognized record type 7, subtype %d. For help, " + "please send this file to %s and mention that you were " + "using %s."), + subtype, PACKAGE_BUGREPORT, PACKAGE_STRING); return skip_extension_record (r, subtype); } + else if (subtype == 18) + { + /* System files written by "Stata 14.1/-savespss- 1.77 by S.Radyakin" + put each variable attribute into a separate record with subtype + 18. I'm surprised that SPSS puts up with this. */ + struct sfm_extension_record *ext; + bool ok = read_extension_record (r, subtype, &ext); + if (ok && ext) + ll_push_tail (&r->var_attrs, &ext->ll); + return ok; + } else if (r->extensions[subtype] != NULL) { sys_warn (r, r->pos, _("Record type 7, subtype %d found here has the same " - "type as the record found near offset 0x%llx. " - "Please send a copy of this file, and the syntax " - "which created it to %s."), + "type as the record found near offset 0x%llx. For " + "help, please send this file to %s and mention that " + "you were using %s."), subtype, (long long int) r->extensions[subtype]->pos, - PACKAGE_BUGREPORT); + PACKAGE_BUGREPORT, PACKAGE_STRING); return skip_extension_record (r, subtype); } else @@ -537,7 +551,7 @@ read_record (struct sfm_reader *r, int type, /* Returns the character encoding obtained from R, or a null pointer if R doesn't have an indication of its character encoding. */ -const char * +static const char * sfm_get_encoding (const struct sfm_reader *r) { /* The EXT_ENCODING record is the best way to determine dictionary @@ -719,7 +733,6 @@ sfm_get_strings (const struct any_reader *r_, struct pool *pool, mrset_idx); } - /* */ /* data file attributes */ /* variable attributes */ /* long var map */ @@ -830,14 +843,15 @@ sfm_decode (struct any_reader *r_, const char *encoding, parse_long_var_name_map (r, r->extensions[EXT_LONG_NAMES], dict); /* The following records use long names, so they need to follow renaming. */ - if (r->extensions[EXT_VAR_ATTRS] != NULL) + if (!ll_is_empty (&r->var_attrs)) { - parse_variable_attributes (r, r->extensions[EXT_VAR_ATTRS], dict); + struct sfm_extension_record *ext; + ll_for_each (ext, struct sfm_extension_record, ll, &r->var_attrs) + parse_variable_attributes (r, ext, dict); /* Roles use the $@Role attribute. */ assign_variable_roles (r, dict); } - if (r->extensions[EXT_LONG_LABELS] != NULL) parse_long_string_value_labels (r, r->extensions[EXT_LONG_LABELS], dict); if (r->extensions[EXT_LONG_MISSING] != NULL) @@ -848,7 +862,7 @@ sfm_decode (struct any_reader *r_, const char *encoding, amount that the header claims. SPSS version 13 gets this wrong when very long strings are involved, so don't warn in that case. */ - if (r->header.nominal_case_size != -1 + if (r->header.nominal_case_size > 0 && r->header.nominal_case_size != r->n_vars && r->info.version_major != 13) sys_warn (r, -1, _("File header claims %d variable positions but " @@ -894,7 +908,7 @@ sfm_close (struct any_reader *r_) if (r->file) { - if (fn_close (fh_get_file_name (r->fh), r->file) == EOF) + if (fn_close (r->fh, r->file) == EOF) { msg (ME, _("Error closing system file `%s': %s."), fh_get_file_name (r->fh), strerror (errno)); @@ -921,9 +935,8 @@ sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_) sfm_close (&r->any_reader); } -/* Returns 1 if FILE is an SPSS system file, - 0 if it is not, - otherwise a negative errno value. */ +/* Detects whether FILE is an SPSS system file. Returns 1 if so, 0 if not, and + a negative errno value if there is an error reading FILE. */ static int sfm_detect (FILE *file) { @@ -932,7 +945,7 @@ sfm_detect (FILE *file) if (fseek (file, 0, SEEK_SET) != 0) return -errno; if (fread (magic, 4, 1, file) != 1) - return feof (file) ? 0 : -errno; + return ferror (file) ? -errno : 0; magic[4] = '\0'; return (!strcmp (ASCII_MAGIC, magic) @@ -955,6 +968,8 @@ read_header (struct sfm_reader *r, struct any_read_info *info, if (!read_string (r, header->magic, sizeof header->magic) || !read_string (r, header->eye_catcher, sizeof header->eye_catcher)) return false; + r->written_by_readstat = strstr (header->eye_catcher, + "https://github.com/WizardMac/ReadStat"); if (!strcmp (ASCII_MAGIC, header->magic) || !strcmp (EBCDIC_MAGIC, header->magic)) @@ -1216,33 +1231,35 @@ read_value_label_record (struct sfm_reader *r, return true; } -/* Reads a document record from R and returns it. */ -static struct sfm_document_record * +/* Reads a document record from R. Returns true if successful, false on + error. */ +static bool read_document_record (struct sfm_reader *r) { - struct sfm_document_record *record; int n_lines; - - record = pool_malloc (r->pool, sizeof *record); - record->pos = r->pos; - if (!read_int (r, &n_lines)) - return NULL; - if (n_lines <= 0 || n_lines >= INT_MAX / DOC_LINE_LENGTH) + return false; + else if (n_lines == 0) + return true; + else if (n_lines < 0 || n_lines >= INT_MAX / DOC_LINE_LENGTH) { - sys_error (r, record->pos, + sys_error (r, r->pos, _("Number of document lines (%d) " "must be greater than 0 and less than %d."), n_lines, INT_MAX / DOC_LINE_LENGTH); - return NULL; + return false; } + struct sfm_document_record *record; + record = pool_malloc (r->pool, sizeof *record); + record->pos = r->pos; record->n_lines = n_lines; record->documents = pool_malloc (r->pool, DOC_LINE_LENGTH * n_lines); if (!read_bytes (r, record->documents, DOC_LINE_LENGTH * n_lines)) - return NULL; + return false; - return record; + r->document = record; + return true; } static bool @@ -1346,9 +1363,9 @@ read_extension_record (struct sfm_reader *r, int subtype, } sys_warn (r, record->pos, - _("Unrecognized record type 7, subtype %d. Please send a " - "copy of this file, and the syntax which created it to %s."), - subtype, PACKAGE_BUGREPORT); + _("Unrecognized record type 7, subtype %d. For help, please " + "send this file to %s and mention that you were using %s."), + subtype, PACKAGE_BUGREPORT, PACKAGE_STRING); skip: return skip_bytes (r, n_bytes); @@ -2047,7 +2064,8 @@ parse_long_var_name_map (struct sfm_reader *r, while (read_variable_to_value_pair (r, dict, text, &var, &long_name)) { /* Validate long name. */ - if (!dict_id_is_valid (dict, long_name, false)) + if (!dict_id_is_valid (dict, long_name, false) + || long_name[0] == '$' || long_name[0] == '#') { sys_warn (r, record->pos, _("Long variable mapping from %s to invalid " @@ -2213,7 +2231,15 @@ parse_value_labels (struct sfm_reader *r, struct dictionary *dict, if (!var_add_value_label (var, &value, utf8_labels[j])) { - if (var_is_numeric (var)) + if (r->written_by_readstat) + { + /* Ignore the problem. ReadStat is buggy and emits value + labels whose values are longer than string variables' + widths, that are identical in the actual width of the + variable, e.g. both values "ABC123" and "ABC456" for a + string variable with width 3. */ + } + else if (var_is_numeric (var)) sys_warn (r, record->pos, _("Duplicate value label for %g on %s."), value.f, var_get_name (var)); @@ -2294,20 +2320,20 @@ parse_attributes (struct sfm_reader *r, struct text_record *text, text_warn (r, text, _("Error parsing attribute value %s[%d]."), key, index); break; - } + } length = strlen (value); - if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'') + if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'') { value[length - 1] = '\0'; - attribute_add_value (attr, value + 1); + attribute_add_value (attr, value + 1); } - else + else { text_warn (r, text, _("Attribute value %s[%d] is not quoted: %s."), key, index, value); - attribute_add_value (attr, value); + attribute_add_value (attr, value); } /* Was this the last value for this attribute? */ @@ -2449,7 +2475,8 @@ parse_long_string_value_labels (struct sfm_reader *r, ofs += 4; /* Parse variable name, width, and number of labels. */ - if (!check_overflow (r, record, ofs, var_name_len + 8)) + if (!check_overflow (r, record, ofs, var_name_len) + || !check_overflow (r, record, ofs, var_name_len + 8)) return; var_name = recode_string_pool ("UTF-8", dict_encoding, (const char *) record->data + ofs, @@ -2567,7 +2594,8 @@ parse_long_string_missing_values (struct sfm_reader *r, ofs += 4; /* Parse variable name. */ - if (!check_overflow (r, record, ofs, var_name_len + 1)) + if (!check_overflow (r, record, ofs, var_name_len) + || !check_overflow (r, record, ofs, var_name_len + 1)) return; var_name = recode_string_pool ("UTF-8", dict_encoding, (const char *) record->data + ofs, @@ -2958,7 +2986,7 @@ open_text_record (struct sfm_reader *r, } /* Closes TEXT, frees its storage, and issues a final warning - about suppressed warnings if necesary. */ + about suppressed warnings if necessary. */ static void close_text_record (struct sfm_reader *r, struct text_record *text) { @@ -2981,7 +3009,7 @@ read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict, { if (!text_read_short_name (r, dict, text, ss_cstr ("="), var)) return false; - + *value = text_get_token (text, ss_buffer ("\t\0", 2), NULL); if (*value == NULL) return false; @@ -3037,7 +3065,7 @@ static void text_warn (struct sfm_reader *r, struct text_record *text, const char *format, ...) { - if (text->n_warnings++ < MAX_TEXT_WARNINGS) + if (text->n_warnings++ < MAX_TEXT_WARNINGS) { va_list args; @@ -3126,7 +3154,10 @@ text_parse_counted_string (struct sfm_reader *r, struct text_record *text) static bool text_match (struct text_record *text, char c) { - if (text->buffer.string[text->pos] == c) + if (text->pos >= text->buffer.length) + return false; + + if (text->buffer.string[text->pos] == c) { text->pos++; return true;