X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fsys-file-reader.c;h=8d973e4dbc103d09860e4c14cc12a006fda0bfcd;hb=e624e2da6ea68d22e6d4fba4eaa96d37d07a6730;hp=1d172adefd46a65d9751832ffc10a7df3cf90efc;hpb=a9acce47d67e0ab35ce1690e4f1b1ac0121c2d78;p=pspp-builds.git diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index 1d172ade..8d973e4d 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2007 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2007, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -86,9 +86,10 @@ struct sfm_reader double bias; /* Compression bias, usually 100.0. */ uint8_t opcodes[8]; /* Current block of opcodes. */ size_t opcode_idx; /* Next opcode to interpret, 8 if none left. */ + bool corruption_warning; /* Warned about possible corruption? */ }; -static struct casereader_class sys_file_casereader_class; +static const struct casereader_class sys_file_casereader_class; static bool close_reader (struct sfm_reader *); @@ -192,6 +193,7 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, r->oct_cnt = 0; r->has_long_var_names = false; r->opcode_idx = sizeof r->opcodes; + r->corruption_warning = false; /* TRANSLATORS: this fragment will be interpolated into messages in fh_lock() that identify types of files. */ @@ -425,10 +427,21 @@ read_header (struct sfm_reader *r, struct dictionary *dict, read_bytes (r, raw_bias, sizeof raw_bias); if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0) { - sys_warn (r, _("Compression bias (%g) is not the usual " - "value of 100, or system file uses unrecognized " - "floating-point format."), - r->bias); + uint8_t zero_bias[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + + if (memcmp (raw_bias, zero_bias, 8)) + sys_warn (r, _("Compression bias is not the usual " + "value of 100, or system file uses unrecognized " + "floating-point format.")); + else + { + /* Some software is known to write all-zeros to this + field. Such software also writes floating-point + numbers in the format that we expect by default + (it seems that all software most likely does, in + reality), so don't warn in this case. */ + } + if (r->integer_format == INTEGER_MSB_FIRST) r->float_format = FLOAT_IEEE_DOUBLE_BE; else @@ -549,11 +562,14 @@ read_variable_record (struct sfm_reader *r, struct dictionary *dict, for (i = 0; i < missing_value_code; i++) mv_add_num (&mv, read_float (r)); } - else if (var_get_width (var) <= MAX_SHORT_STRING) + else { if (missing_value_code < 1 || missing_value_code > 3) sys_error (r, _("String missing value indicator field is not " "0, 1, 2, or 3.")); + if (var_is_long_string (var)) + sys_warn (r, _("Ignoring missing values on long string variable " + "%s, which PSPP does not yet support."), name); for (i = 0; i < missing_value_code; i++) { char string[9]; @@ -561,10 +577,8 @@ read_variable_record (struct sfm_reader *r, struct dictionary *dict, mv_add_str (&mv, string); } } - else - sys_error (r, _("Long string variable %s may not have missing " - "values."), name); - var_set_missing_values (var, &mv); + if (!var_is_long_string (var)) + var_set_missing_values (var, &mv); } /* Set formats. */ @@ -728,7 +742,11 @@ read_extension_record (struct sfm_reader *r, struct dictionary *dict, break; case 7: - /* Unknown purpose. */ + /* Used by the MRSETS command. */ + break; + + case 8: + /* Used by the SPSS Data Entry software. */ break; case 11: @@ -752,8 +770,21 @@ read_extension_record (struct sfm_reader *r, struct dictionary *dict, SPSS 14. */ break; + case 20: + /* New in SPSS 16. Contains a single string that describes + the character encoding, e.g. "windows-1252". */ + break; + + case 21: + /* New in SPSS 16. Encodes value labels for long string + variables. */ + sys_warn (r, _("Ignoring value labels for long string variables, " + "which PSPP does not yet support.")); + break; + default: - sys_warn (r, _("Unrecognized record type 7, subtype %d."), subtype); + sys_warn (r, _("Unrecognized record type 7, subtype %d. Please send a copy of this file, and the syntax which created it to %s"), + subtype, PACKAGE_BUGREPORT); break; } @@ -811,7 +842,7 @@ read_machine_integer_info (struct sfm_reader *r, size_t size, size_t count, NOT_REACHED (); if (integer_representation != expected_integer_format) { - static const char *endian[] = {N_("little-endian"), N_("big-endian")}; + static const char *const endian[] = {N_("little-endian"), N_("big-endian")}; sys_warn (r, _("Integer format indicated by system file (%s) " "differs from expected (%s)."), gettext (endian[integer_representation == 1]), @@ -845,19 +876,36 @@ static void read_display_parameters (struct sfm_reader *r, size_t size, size_t count, struct dictionary *dict) { - const size_t n_vars = count / 3 ; + size_t n_vars; + bool includes_width; bool warned = false; - int i; + size_t i; - if (count % 3 || n_vars != dict_get_var_cnt (dict)) - sys_error (r, _("Bad size (%zu) or count (%zu) on extension 11."), - size, count); + if (size != 4) + { + sys_warn (r, _("Bad size %zu on extension 11."), size); + skip_bytes (r, size * count); + return; + } + + n_vars = dict_get_var_cnt (dict); + if (count == 3 * n_vars) + includes_width = true; + else if (count == 2 * n_vars) + includes_width = false; + else + { + sys_warn (r, _("Extension 11 has bad count %zu (for %zu variables)."), + count, n_vars); + skip_bytes (r, size * count); + return; + } for (i = 0; i < n_vars; ++i) { struct variable *v = dict_get_var (dict, i); int measure = read_int (r); - int width = read_int (r); + int width = includes_width ? read_int (r) : 0; int align = read_int (r); /* SPSS 14 sometimes seems to set string variables' measure @@ -865,16 +913,13 @@ read_display_parameters (struct sfm_reader *r, size_t size, size_t count, if (0 == measure && var_is_alpha (v)) measure = 1; - /* Older versions (SPSS 9.0) sometimes set the display width - to zero. This causes confusion especially in the GUI */ - if (0 == width) - width = 8; - if (measure < 1 || measure > 3 || align < 0 || align > 2) { if (!warned) - sys_warn (r, _("Invalid variable display parameters. " - "Default parameters substituted.")); + sys_warn (r, _("Invalid variable display parameters " + "for variable %zu (%s). " + "Default parameters substituted."), + i, var_get_name (v)); warned = true; continue; } @@ -882,10 +927,15 @@ read_display_parameters (struct sfm_reader *r, size_t size, size_t count, var_set_measure (v, (measure == 1 ? MEASURE_NOMINAL : measure == 2 ? MEASURE_ORDINAL : MEASURE_SCALE)); - var_set_display_width (v, width); var_set_alignment (v, (align == 0 ? ALIGN_LEFT : align == 1 ? ALIGN_RIGHT : ALIGN_CENTRE)); + + /* Older versions (SPSS 9.0) sometimes set the display + width to zero. This causes confusion in the GUI, so + only set the width if it is nonzero. */ + if (width > 0) + var_set_display_width (v, width); } } @@ -1326,7 +1376,14 @@ read_compressed_number (struct sfm_reader *r, double *d) break; case 254: - sys_error (r, _("Compressed data is corrupt.")); + float_convert (r->float_format, " ", FLOAT_NATIVE_DOUBLE, d); + if (!r->corruption_warning) + { + r->corruption_warning = true; + sys_warn (r, _("Possible compressed data corruption: " + "compressed spaces appear in numeric field.")); + } + break; case 255: *d = SYSMIS; @@ -1347,7 +1404,8 @@ read_compressed_number (struct sfm_reader *r, double *d) static bool read_compressed_string (struct sfm_reader *r, char *dst) { - switch (read_opcode (r)) + int opcode = read_opcode (r); + switch (opcode) { case -1: case 252: @@ -1362,7 +1420,25 @@ read_compressed_string (struct sfm_reader *r, char *dst) break; default: - sys_error (r, _("Compressed data is corrupt.")); + { + double value = opcode - r->bias; + float_convert (FLOAT_NATIVE_DOUBLE, &value, r->float_format, dst); + if (value == 0.0) + { + /* This has actually been seen "in the wild". The submitter of the + file that showed that the contents decoded as spaces, but they + were at the end of the field so it's possible that the null + bytes just acted as null terminators. */ + } + else if (!r->corruption_warning) + { + r->corruption_warning = true; + sys_warn (r, _("Possible compressed data corruption: " + "string contains compressed integer (opcode %d)"), + opcode); + } + } + break; } return true; @@ -1697,7 +1773,7 @@ skip_bytes (struct sfm_reader *r, size_t bytes) } } -static struct casereader_class sys_file_casereader_class = +static const struct casereader_class sys_file_casereader_class = { sys_file_casereader_read, sys_file_casereader_destroy,