X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fsys-file-reader.c;h=8d973e4dbc103d09860e4c14cc12a006fda0bfcd;hb=e624e2da6ea68d22e6d4fba4eaa96d37d07a6730;hp=8dd95ad5c02724d95f977293b7b01c4f7d7420dc;hpb=614184c65fba060932911bbf2b8c6736d9e1452f;p=pspp-builds.git diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index 8dd95ad5..8d973e4d 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2007 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2007, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,11 +25,9 @@ #include #include -#include #include #include #include -#include #include #include #include @@ -44,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -52,6 +51,7 @@ #include "inttostr.h" #include "minmax.h" #include "unlocked-io.h" +#include "xalloc.h" #include "xsize.h" #include "gettext.h" @@ -67,6 +67,7 @@ struct sfm_reader /* File state. */ struct file_handle *fh; /* File handle. */ + struct fh_lock *lock; /* Mutual exclusion for file handle. */ FILE *file; /* File stream. */ bool error; /* I/O or corruption error? */ size_t value_cnt; /* Number of "union value"s in struct case. */ @@ -85,9 +86,10 @@ struct sfm_reader double bias; /* Compression bias, usually 100.0. */ uint8_t opcodes[8]; /* Current block of opcodes. */ size_t opcode_idx; /* Next opcode to interpret, 8 if none left. */ + bool corruption_warning; /* Warned about possible corruption? */ }; -static struct casereader_class sys_file_casereader_class; +static const struct casereader_class sys_file_casereader_class; static bool close_reader (struct sfm_reader *); @@ -180,19 +182,32 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, int claimed_oct_cnt; int rec_type; - if (!fh_open (fh, FH_REF_FILE, "system file", "rs")) - return NULL; - *dict = dict_create (); /* Create and initialize reader. */ r = pool_create_container (struct sfm_reader, pool); - r->fh = fh; - r->file = fn_open (fh_get_file_name (fh), "rb"); + r->fh = fh_ref (fh); + r->lock = NULL; + r->file = NULL; r->error = false; r->oct_cnt = 0; r->has_long_var_names = false; r->opcode_idx = sizeof r->opcodes; + r->corruption_warning = false; + + /* TRANSLATORS: this fragment will be interpolated into + messages in fh_lock() that identify types of files. */ + r->lock = fh_lock (fh, FH_REF_FILE, N_("system file"), FH_ACC_READ, false); + if (r->lock == NULL) + goto error; + + r->file = fn_open (fh_get_file_name (fh), "rb"); + if (r->file == NULL) + { + msg (ME, _("Error opening \"%s\" for reading as a system file: %s."), + fh_get_file_name (r->fh), strerror (errno)); + goto error; + } /* Initialize info. */ if (info == NULL) @@ -200,19 +215,8 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, memset (info, 0, sizeof *info); if (setjmp (r->bail_out)) - { - close_reader (r); - dict_destroy (*dict); - *dict = NULL; - return NULL; - } + goto error; - if (r->file == NULL) - { - msg (ME, _("Error opening \"%s\" for reading as a system file: %s."), - fh_get_file_name (r->fh), strerror (errno)); - longjmp (r->bail_out, 1); - } /* Read header. */ read_header (r, *dict, &weight_idx, &claimed_oct_cnt, info); @@ -262,8 +266,8 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, for (i = 0; i < dict_get_var_cnt (*dict); i++) { struct variable *var = dict_get_var (*dict, i); - char short_name [SHORT_NAME_LEN + 1]; - char long_name [SHORT_NAME_LEN + 1]; + char short_name[SHORT_NAME_LEN + 1]; + char long_name[SHORT_NAME_LEN + 1]; strcpy (short_name, var_get_name (var)); @@ -306,6 +310,12 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, (NULL, r->value_cnt, r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt, &sys_file_casereader_class, r); + +error: + close_reader (r); + dict_destroy (*dict); + *dict = NULL; + return NULL; } /* Closes a system file after we're done with it. @@ -330,8 +340,8 @@ close_reader (struct sfm_reader *r) r->file = NULL; } - if (r->fh != NULL) - fh_close (r->fh, "system file", "rs"); + fh_unlock (r->lock); + fh_unref (r->fh); error = r->error; pool_destroy (r->pool); @@ -417,10 +427,21 @@ read_header (struct sfm_reader *r, struct dictionary *dict, read_bytes (r, raw_bias, sizeof raw_bias); if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0) { - sys_warn (r, _("Compression bias (%g) is not the usual " - "value of 100, or system file uses unrecognized " - "floating-point format."), - r->bias); + uint8_t zero_bias[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + + if (memcmp (raw_bias, zero_bias, 8)) + sys_warn (r, _("Compression bias is not the usual " + "value of 100, or system file uses unrecognized " + "floating-point format.")); + else + { + /* Some software is known to write all-zeros to this + field. Such software also writes floating-point + numbers in the format that we expect by default + (it seems that all software most likely does, in + reality), so don't warn in this case. */ + } + if (r->integer_format == INTEGER_MSB_FIRST) r->float_format = FLOAT_IEEE_DOUBLE_BE; else @@ -510,8 +531,8 @@ read_variable_record (struct sfm_reader *r, struct dictionary *dict, len = read_int (r); if (len >= sizeof label) - sys_error (r, _("Variable %s has label of invalid length %u."), - name, (unsigned int) len); + sys_error (r, _("Variable %s has label of invalid length %zu."), + name, len); read_string (r, label, len + 1); var_set_label (var, label); @@ -535,17 +556,20 @@ read_variable_record (struct sfm_reader *r, struct dictionary *dict, { double low = read_float (r); double high = read_float (r); - mv_add_num_range (&mv, low, high); + mv_add_range (&mv, low, high); missing_value_code = -missing_value_code - 2; } for (i = 0; i < missing_value_code; i++) mv_add_num (&mv, read_float (r)); } - else if (var_get_width (var) <= MAX_SHORT_STRING) + else { if (missing_value_code < 1 || missing_value_code > 3) sys_error (r, _("String missing value indicator field is not " "0, 1, 2, or 3.")); + if (var_is_long_string (var)) + sys_warn (r, _("Ignoring missing values on long string variable " + "%s, which PSPP does not yet support."), name); for (i = 0; i < missing_value_code; i++) { char string[9]; @@ -553,10 +577,8 @@ read_variable_record (struct sfm_reader *r, struct dictionary *dict, mv_add_str (&mv, string); } } - else - sys_error (r, _("Long string variable %s may not have missing " - "values."), name); - var_set_missing_values (var, &mv); + if (!var_is_long_string (var)) + var_set_missing_values (var, &mv); } /* Set formats. */ @@ -609,7 +631,7 @@ parse_format_spec (struct sfm_reader *r, unsigned int s, bool ok; if (!fmt_from_io (raw_type, &f.type)) - sys_error (r, _("Unknown variable format %d."), (int) raw_type); + sys_error (r, _("Unknown variable format %"PRIu8"."), raw_type); f.w = w; f.d = d; @@ -720,7 +742,11 @@ read_extension_record (struct sfm_reader *r, struct dictionary *dict, break; case 7: - /* Unknown purpose. */ + /* Used by the MRSETS command. */ + break; + + case 8: + /* Used by the SPSS Data Entry software. */ break; case 11: @@ -744,8 +770,21 @@ read_extension_record (struct sfm_reader *r, struct dictionary *dict, SPSS 14. */ break; + case 20: + /* New in SPSS 16. Contains a single string that describes + the character encoding, e.g. "windows-1252". */ + break; + + case 21: + /* New in SPSS 16. Encodes value labels for long string + variables. */ + sys_warn (r, _("Ignoring value labels for long string variables, " + "which PSPP does not yet support.")); + break; + default: - sys_warn (r, _("Unrecognized record type 7, subtype %d."), subtype); + sys_warn (r, _("Unrecognized record type 7, subtype %d. Please send a copy of this file, and the syntax which created it to %s"), + subtype, PACKAGE_BUGREPORT); break; } @@ -770,9 +809,9 @@ read_machine_integer_info (struct sfm_reader *r, size_t size, size_t count, int expected_integer_format; if (size != 4 || count != 8) - sys_error (r, _("Bad size (%u) or count (%u) field on record type 7, " + sys_error (r, _("Bad size (%zu) or count (%zu) field on record type 7, " "subtype 3."), - (unsigned int) size, (unsigned int) count); + size, count); /* Save version info. */ info->version_major = version_major; @@ -803,7 +842,7 @@ read_machine_integer_info (struct sfm_reader *r, size_t size, size_t count, NOT_REACHED (); if (integer_representation != expected_integer_format) { - static const char *endian[] = {N_("little-endian"), N_("big-endian")}; + static const char *const endian[] = {N_("little-endian"), N_("big-endian")}; sys_warn (r, _("Integer format indicated by system file (%s) " "differs from expected (%s)."), gettext (endian[integer_representation == 1]), @@ -820,8 +859,8 @@ read_machine_float_info (struct sfm_reader *r, size_t size, size_t count) double lowest = read_float (r); if (size != 8 || count != 3) - sys_error (r, _("Bad size (%u) or count (%u) on extension 4."), - (unsigned int) size, (unsigned int) count); + sys_error (r, _("Bad size (%zu) or count (%zu) on extension 4."), + size, count); if (sysmis != SYSMIS) sys_warn (r, _("File specifies unexpected value %g as SYSMIS."), sysmis); @@ -837,19 +876,36 @@ static void read_display_parameters (struct sfm_reader *r, size_t size, size_t count, struct dictionary *dict) { - const size_t n_vars = count / 3 ; + size_t n_vars; + bool includes_width; bool warned = false; - int i; + size_t i; - if (count % 3 || n_vars != dict_get_var_cnt (dict)) - sys_error (r, _("Bad size (%u) or count (%u) on extension 11."), - (unsigned int) size, (unsigned int) count); + if (size != 4) + { + sys_warn (r, _("Bad size %zu on extension 11."), size); + skip_bytes (r, size * count); + return; + } + + n_vars = dict_get_var_cnt (dict); + if (count == 3 * n_vars) + includes_width = true; + else if (count == 2 * n_vars) + includes_width = false; + else + { + sys_warn (r, _("Extension 11 has bad count %zu (for %zu variables)."), + count, n_vars); + skip_bytes (r, size * count); + return; + } for (i = 0; i < n_vars; ++i) { struct variable *v = dict_get_var (dict, i); int measure = read_int (r); - int width = read_int (r); + int width = includes_width ? read_int (r) : 0; int align = read_int (r); /* SPSS 14 sometimes seems to set string variables' measure @@ -857,16 +913,13 @@ read_display_parameters (struct sfm_reader *r, size_t size, size_t count, if (0 == measure && var_is_alpha (v)) measure = 1; - /* Older versions (SPSS 9.0) sometimes set the display width - to zero. This causes confusion especially in the GUI */ - if (0 == width) - width = 8; - if (measure < 1 || measure > 3 || align < 0 || align > 2) { if (!warned) - sys_warn (r, _("Invalid variable display parameters. " - "Default parameters substituted.")); + sys_warn (r, _("Invalid variable display parameters " + "for variable %zu (%s). " + "Default parameters substituted."), + i, var_get_name (v)); warned = true; continue; } @@ -874,10 +927,15 @@ read_display_parameters (struct sfm_reader *r, size_t size, size_t count, var_set_measure (v, (measure == 1 ? MEASURE_NOMINAL : measure == 2 ? MEASURE_ORDINAL : MEASURE_SCALE)); - var_set_display_width (v, width); var_set_alignment (v, (align == 0 ? ALIGN_LEFT : align == 1 ? ALIGN_RIGHT : ALIGN_CENTRE)); + + /* Older versions (SPSS 9.0) sometimes set the display + width to zero. This causes confusion in the GUI, so + only set the width if it is nonzero. */ + if (width > 0) + var_set_display_width (v, width); } } @@ -1083,8 +1141,8 @@ read_value_labels (struct sfm_reader *r, var_cnt = read_int (r); if (var_cnt < 1 || var_cnt > dict_get_var_cnt (dict)) sys_error (r, _("Number of variables associated with a value label (%d) " - "is not between 1 and the number of variables (%u)."), - var_cnt, (unsigned int) dict_get_var_cnt (dict)); + "is not between 1 and the number of variables (%zu)."), + var_cnt, dict_get_var_cnt (dict)); /* Read the list of variables. */ var = pool_nalloc (subpool, var_cnt, sizeof *var); @@ -1318,7 +1376,14 @@ read_compressed_number (struct sfm_reader *r, double *d) break; case 254: - sys_error (r, _("Compressed data is corrupt.")); + float_convert (r->float_format, " ", FLOAT_NATIVE_DOUBLE, d); + if (!r->corruption_warning) + { + r->corruption_warning = true; + sys_warn (r, _("Possible compressed data corruption: " + "compressed spaces appear in numeric field.")); + } + break; case 255: *d = SYSMIS; @@ -1339,7 +1404,8 @@ read_compressed_number (struct sfm_reader *r, double *d) static bool read_compressed_string (struct sfm_reader *r, char *dst) { - switch (read_opcode (r)) + int opcode = read_opcode (r); + switch (opcode) { case -1: case 252: @@ -1354,7 +1420,25 @@ read_compressed_string (struct sfm_reader *r, char *dst) break; default: - sys_error (r, _("Compressed data is corrupt.")); + { + double value = opcode - r->bias; + float_convert (FLOAT_NATIVE_DOUBLE, &value, r->float_format, dst); + if (value == 0.0) + { + /* This has actually been seen "in the wild". The submitter of the + file that showed that the contents decoded as spaces, but they + were at the end of the field so it's possible that the null + bytes just acted as null terminators. */ + } + else if (!r->corruption_warning) + { + r->corruption_warning = true; + sys_warn (r, _("Possible compressed data corruption: " + "string contains compressed integer (opcode %d)"), + opcode); + } + } + break; } return true; @@ -1689,7 +1773,7 @@ skip_bytes (struct sfm_reader *r, size_t bytes) } } -static struct casereader_class sys_file_casereader_class = +static const struct casereader_class sys_file_casereader_class = { sys_file_casereader_read, sys_file_casereader_destroy,