X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fsys-file-reader.c;h=7dfba894cba210aa1dc4e1dbbe3ff9adf6467a56;hb=8c3d1da71a10a31270d669d7410e52a7c66ed396;hp=281edcbcadee23794b517cc6d888b6dfb72e2425;hpb=9d6969686924cea36f9fd90e2d2c51309190a67f;p=pspp-builds.git diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index 281edcbc..7dfba894 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -1,25 +1,23 @@ -/* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. +/* PSPP - a program for statistical analysis. + Copyright (C) 1997-9, 2000, 2006, 2007 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ #include -#include "sys-file-reader.h" -#include "sys-file-private.h" +#include +#include #include #include @@ -38,15 +36,17 @@ #include #include -#include "case.h" -#include "dictionary.h" -#include "file-handle-def.h" -#include "file-name.h" -#include "format.h" -#include "missing-values.h" -#include "value-labels.h" -#include "variable.h" -#include "value.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include "c-ctype.h" #include "inttostr.h" @@ -69,13 +69,16 @@ struct sfm_reader struct file_handle *fh; /* File handle. */ FILE *file; /* File stream. */ bool error; /* I/O or corruption error? */ + size_t value_cnt; /* Number of "union value"s in struct case. */ /* File format. */ enum integer_format integer_format; /* On-disk integer format. */ enum float_format float_format; /* On-disk floating point format. */ - int value_cnt; /* Number of 8-byte units per case. */ + int flt64_cnt; /* Number of 8-byte units per case. */ struct sfm_var *vars; /* Variables. */ size_t var_cnt; /* Number of variables. */ + int32_t case_cnt; /* Number of cases */ + bool has_long_var_names; /* File has a long variable name map */ bool has_vls; /* File has one or more very long strings? */ /* Decompression. */ @@ -86,12 +89,16 @@ struct sfm_reader }; /* A variable in a system file. */ -struct sfm_var +struct sfm_var { int width; /* 0=numeric, otherwise string width. */ int case_index; /* Index into case. */ }; +static struct casereader_class sys_file_casereader_class; + +static bool close_reader (struct sfm_reader *); + static struct variable **make_var_by_value_idx (struct sfm_reader *, struct dictionary *); static struct variable *lookup_var_by_value_idx (struct sfm_reader *, @@ -124,17 +131,19 @@ static bool read_variable_to_value_map (struct sfm_reader *, struct variable_to_value_map *, struct variable **var, char **value, int *warning_cnt); + +static bool close_reader (struct sfm_reader *r); /* Dictionary reader. */ -enum which_format +enum which_format { PRINT_FORMAT, WRITE_FORMAT }; static void read_header (struct sfm_reader *, struct dictionary *, - int *weight_idx, int *claimed_value_cnt, + int *weight_idx, int *claimed_flt64_cnt, struct sfm_read_info *); static void read_variable_record (struct sfm_reader *, struct dictionary *, int *format_warning_cnt); @@ -168,7 +177,7 @@ static void read_long_string_map (struct sfm_reader *, reading. Reads the system file's dictionary into *DICT. If INFO is non-null, then it receives additional info about the system file. */ -struct sfm_reader * +struct casereader * sfm_open_reader (struct file_handle *fh, struct dictionary **dict, struct sfm_read_info *info) { @@ -176,7 +185,7 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, struct variable **var_by_value_idx; int format_warning_cnt = 0; int weight_idx; - int claimed_value_cnt; + int claimed_flt64_cnt; int rec_type; size_t i; @@ -190,13 +199,14 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, r->fh = fh; r->file = fn_open (fh_get_file_name (fh), "rb"); r->error = false; - r->value_cnt = 0; + r->flt64_cnt = 0; r->has_vls = false; + r->has_long_var_names = false; r->opcode_idx = sizeof r->opcodes; - if (setjmp (r->bail_out)) + if (setjmp (r->bail_out)) { - sfm_close_reader (r); + close_reader (r); dict_destroy (*dict); *dict = NULL; return NULL; @@ -210,13 +220,13 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, } /* Read header. */ - read_header (r, *dict, &weight_idx, &claimed_value_cnt, info); + read_header (r, *dict, &weight_idx, &claimed_flt64_cnt, info); /* Read all the variable definition records. */ rec_type = read_int32 (r); while (rec_type == 2) { - read_variable_record (r, *dict, &format_warning_cnt); + read_variable_record (r, *dict, &format_warning_cnt); rec_type = read_int32 (r); } @@ -225,7 +235,7 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, setup_weight (r, weight_idx, var_by_value_idx, *dict); /* Read all the rest of the dictionary records. */ - while (rec_type != 999) + while (rec_type != 999) { switch (rec_type) { @@ -250,13 +260,38 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, rec_type = read_int32 (r); } + + if ( ! r->has_long_var_names ) + { + int i; + for (i = 0; i < dict_get_var_cnt (*dict); i++) + { + struct variable *var = dict_get_var (*dict, i); + char short_name [SHORT_NAME_LEN + 1]; + char long_name [SHORT_NAME_LEN + 1]; + + strcpy (short_name, var_get_name (var)); + + strcpy (long_name, short_name); + str_lowercase (long_name); + + /* Set long name. Renaming a variable may clear the short + name, but we want to retain it, so re-set it + explicitly. */ + dict_rename_var (*dict, var, long_name); + var_set_short_name (var, 0, short_name); + } + + r->has_long_var_names = true; + } + /* Read record 999 data, which is just filler. */ read_int32 (r); - if (claimed_value_cnt != -1 && claimed_value_cnt != r->value_cnt) + if (claimed_flt64_cnt != -1 && claimed_flt64_cnt != r->flt64_cnt) sys_warn (r, _("File header claims %d variable positions but " "%d were read from file."), - claimed_value_cnt, r->value_cnt); + claimed_flt64_cnt, r->flt64_cnt); /* Create an index of dictionary variable widths for sfm_read_case to use. We cannot use the `struct variable's @@ -264,58 +299,72 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, dictionary and may destroy or modify its variables. */ r->var_cnt = dict_get_var_cnt (*dict); r->vars = pool_nalloc (r->pool, r->var_cnt, sizeof *r->vars); - for (i = 0; i < r->var_cnt; i++) + for (i = 0; i < r->var_cnt; i++) { struct variable *v = dict_get_var (*dict, i); struct sfm_var *sv = &r->vars[i]; sv->width = var_get_width (v); - sv->case_index = var_get_case_index (v); + sv->case_index = var_get_case_index (v); } pool_free (r->pool, var_by_value_idx); - return r; + r->value_cnt = dict_get_next_value_idx (*dict); + return casereader_create_sequential + (NULL, r->value_cnt, + r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt, + &sys_file_casereader_class, r); } -/* Closes a system file after we're done with it. */ -void -sfm_close_reader (struct sfm_reader *r) +/* Closes a system file after we're done with it. + Returns true if an I/O error has occurred on READER, false + otherwise. */ +static bool +close_reader (struct sfm_reader *r) { + bool error; + if (r == NULL) - return; + return true; if (r->file) { if (fn_close (fh_get_file_name (r->fh), r->file) == EOF) - msg (ME, _("Error closing system file \"%s\": %s."), - fh_get_file_name (r->fh), strerror (errno)); + { + msg (ME, _("Error closing system file \"%s\": %s."), + fh_get_file_name (r->fh), strerror (errno)); + r->error = true; + } r->file = NULL; } if (r->fh != NULL) fh_close (r->fh, "system file", "rs"); + error = r->error; pool_destroy (r->pool); + + return !error; } -/* Returns true if an I/O error has occurred on READER, false - otherwise. */ -bool -sfm_read_error (const struct sfm_reader *reader) +/* Destroys READER. */ +static void +sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_) { - return reader->error; + struct sfm_reader *r = r_; + close_reader (r); } /* Returns true if FILE is an SPSS system file, false otherwise. */ bool -sfm_detect (FILE *file) +sfm_detect (FILE *file) { char rec_type[5]; if (fread (rec_type, 4, 1, file) != 1) return false; rec_type[4] = '\0'; - + return !strcmp ("$FL2", rec_type); } @@ -323,19 +372,18 @@ sfm_detect (FILE *file) Sets DICT's file label to the system file's label. Sets *WEIGHT_IDX to 0 if the system file is unweighted, or to the value index of the weight variable otherwise. - Sets *CLAIMED_VALUE_CNT to the number of values that the file + Sets *CLAIMED_FLT64_CNT to the number of values that the file claims to have (although it is not always correct). If INFO is non-null, initializes *INFO with header - information. */ + information. */ static void read_header (struct sfm_reader *r, struct dictionary *dict, - int *weight_idx, int *claimed_value_cnt, + int *weight_idx, int *claimed_flt64_cnt, struct sfm_read_info *info) { char rec_type[5]; char eye_catcher[61]; uint8_t raw_layout_code[4]; - int case_cnt; uint8_t raw_bias[8]; char creation_date[10]; char creation_time[9]; @@ -344,7 +392,7 @@ read_header (struct sfm_reader *r, struct dictionary *dict, read_string (r, rec_type, sizeof rec_type); read_string (r, eye_catcher, sizeof eye_catcher); - + if (strcmp ("$FL2", rec_type) != 0) sys_error (r, _("This is not an SPSS system file.")); @@ -358,17 +406,18 @@ read_header (struct sfm_reader *r, struct dictionary *dict, && r->integer_format != INTEGER_LSB_FIRST)) sys_error (r, _("This is not an SPSS system file.")); - *claimed_value_cnt = read_int32 (r); - if (*claimed_value_cnt < 0 || *claimed_value_cnt > INT_MAX / 16) - *claimed_value_cnt = -1; + *claimed_flt64_cnt = read_int32 (r); + if (*claimed_flt64_cnt < 0 || *claimed_flt64_cnt > INT_MAX / 16) + *claimed_flt64_cnt = -1; r->compressed = read_int32 (r) != 0; *weight_idx = read_int32 (r); - case_cnt = read_int32 (r); - if (case_cnt < -1 || case_cnt > INT_MAX / 2) - case_cnt = -1; + r->case_cnt = read_int32 (r); + if ( r->case_cnt > INT_MAX / 2) + r->case_cnt = -1; + /* Identify floating-point format and obtain compression bias. */ read_bytes (r, raw_bias, sizeof raw_bias); @@ -389,10 +438,10 @@ read_header (struct sfm_reader *r, struct dictionary *dict, read_string (r, creation_time, sizeof creation_time); read_string (r, file_label, sizeof file_label); skip_bytes (r, 3); - + file_label_ss = ss_cstr (file_label); ss_trim (&file_label_ss, ss_cstr (" ")); - if (!ss_is_empty (file_label_ss)) + if (!ss_is_empty (file_label_ss)) { ss_data (file_label_ss)[ss_length (file_label_ss)] = '\0'; dict_set_label (dict, ss_data (file_label_ss)); @@ -407,7 +456,7 @@ read_header (struct sfm_reader *r, struct dictionary *dict, info->integer_format = r->integer_format; info->float_format = r->float_format; info->compressed = r->compressed; - info->case_cnt = case_cnt; + info->case_cnt = r->case_cnt; product = ss_cstr (eye_catcher); ss_match_string (&product, ss_cstr ("@(#) SPSS DATA FILE")); @@ -454,13 +503,13 @@ read_variable_record (struct sfm_reader *r, struct dictionary *dict, if (width < 0 || width > 255) sys_error (r, _("Bad variable width %d."), width); var = dict_create_var (dict, name, width); - if (var == NULL) + if (var == NULL) sys_error (r, _("Duplicate variable name `%s' within system file."), name); - /* Set the short name the same as the long name */ - var_set_short_name (var, var_get_name (var)); + /* Set the short name the same as the long name. */ + var_set_short_name (var, 0, var_get_name (var)); /* Get variable label, if any. */ if (has_variable_label != 0 && has_variable_label != 1) @@ -476,7 +525,7 @@ read_variable_record (struct sfm_reader *r, struct dictionary *dict, name, (unsigned int) len); read_string (r, label, len + 1); var_set_label (var, label); - + skip_bytes (r, ROUND_UP (len, 4) - len); } @@ -489,7 +538,7 @@ read_variable_record (struct sfm_reader *r, struct dictionary *dict, { struct missing_values mv; mv_init (&mv, var_get_width (var)); - if (var_is_numeric (var)) + if (var_is_numeric (var)) { if (missing_value_code > 0) { @@ -515,10 +564,10 @@ read_variable_record (struct sfm_reader *r, struct dictionary *dict, { char string[9]; read_string (r, string, sizeof string); - mv_add_str (&mv, string); + mv_add_str (&mv, string); } } - else + else sys_error (r, _("String variable %s may not have missing " "values specified as a range."), name); @@ -537,7 +586,7 @@ read_variable_record (struct sfm_reader *r, struct dictionary *dict, /* Account for values. Skip long string continuation records, if any. */ nv = width == 0 ? 1 : DIV_RND_UP (width, 8); - r->value_cnt += nv; + r->flt64_cnt += nv; if (width > 8) { int i; @@ -558,7 +607,7 @@ read_variable_record (struct sfm_reader *r, struct dictionary *dict, /* Variable label fields on continuation records have been spotted in system files created by "SPSS Power Macintosh Release 6.1". */ - if (has_variable_label) + if (has_variable_label) skip_bytes (r, ROUND_UP (read_int32 (r), 4)); } } @@ -576,9 +625,9 @@ parse_format_spec (struct sfm_reader *r, uint32_t s, uint8_t raw_type = s >> 16; uint8_t w = s >> 8; uint8_t d = s; - + bool ok; - + if (!fmt_from_io (raw_type, &f.type)) sys_error (r, _("Unknown variable format %d."), (int) raw_type); f.w = w; @@ -587,8 +636,8 @@ parse_format_spec (struct sfm_reader *r, uint32_t s, msg_disable (); ok = fmt_check_output (&f) && fmt_check_width_compat (&f, var_get_width (v)); msg_enable (); - - if (ok) + + if (ok) { if (which == PRINT_FORMAT) var_set_print_format (v, &f); @@ -614,7 +663,7 @@ parse_format_spec (struct sfm_reader *r, uint32_t s, nonzero. */ static void setup_weight (struct sfm_reader *r, int weight_idx, - struct variable **var_by_value_idx, struct dictionary *dict) + struct variable **var_by_value_idx, struct dictionary *dict) { if (weight_idx != 0) { @@ -644,9 +693,12 @@ read_documents (struct sfm_reader *r, struct dictionary *dict) sys_error (r, _("Number of document lines (%d) " "must be greater than 0."), line_cnt); - documents = pool_nmalloc (r->pool, line_cnt + 1, 80); - read_string (r, documents, 80 * line_cnt + 1); - dict_set_documents (dict, documents); + documents = pool_nmalloc (r->pool, line_cnt + 1, DOC_LINE_LENGTH); + read_string (r, documents, DOC_LINE_LENGTH * line_cnt + 1); + if (strlen (documents) == DOC_LINE_LENGTH * line_cnt) + dict_set_documents (dict, documents); + else + sys_error (r, _("Document line contains null byte.")); pool_free (r->pool, documents); } @@ -685,11 +737,11 @@ read_extension_record (struct sfm_reader *r, struct dictionary *dict) /* DATE variable information. We don't use it yet, but we should. */ break; - + case 7: /* Unknown purpose. */ break; - + case 11: read_display_parameters (r, size, count, dict); return; @@ -710,7 +762,7 @@ read_extension_record (struct sfm_reader *r, struct dictionary *dict) /* Text field that defines variable attributes. New in SPSS 14. */ break; - + default: sys_warn (r, _("Unrecognized record type 7, subtype %d."), subtype); break; @@ -802,11 +854,11 @@ read_display_parameters (struct sfm_reader *r, size_t size, size_t count, bool warned = false; int i; - if (count % 3 || n_vars != dict_get_var_cnt (dict)) + if (count % 3 || n_vars != dict_get_var_cnt (dict)) sys_error (r, _("Bad size (%u) or count (%u) on extension 11."), (unsigned int) size, (unsigned int) count); - for (i = 0; i < n_vars; ++i) + for (i = 0; i < n_vars; ++i) { int measure = read_int32 (r); int width = read_int32 (r); @@ -847,13 +899,14 @@ read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count, struct variable *var; char *long_name; int warning_cnt = 0; - + map = open_variable_to_value_map (r, size * count); while (read_variable_to_value_map (r, dict, map, &var, &long_name, &warning_cnt)) { - char short_name[SHORT_NAME_LEN + 1]; - strcpy (short_name, var_get_short_name (var)); + char **short_names; + size_t short_name_cnt; + size_t i; /* Validate long name. */ if (!var_is_valid_name (long_name, false)) @@ -863,9 +916,9 @@ read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count, var_get_name (var), long_name); continue; } - + /* Identify any duplicates. */ - if (strcasecmp (short_name, long_name) + if (strcasecmp (var_get_short_name (var, 0), long_name) && dict_lookup_var (dict, long_name) != NULL) { sys_warn (r, _("Duplicate long variable name `%s' " @@ -873,13 +926,29 @@ read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count, continue; } - /* Set long name. Renaming a variable may clear the short - name, but we want to retain it, so re-set it - explicitly. */ + /* Renaming a variable may clear its short names, but we + want to retain them, so we save them and re-set them + afterward. */ + short_name_cnt = var_get_short_name_cnt (var); + short_names = xnmalloc (short_name_cnt, sizeof *short_names); + for (i = 0; i < short_name_cnt; i++) + { + const char *s = var_get_short_name (var, i); + short_names[i] = s != NULL ? xstrdup (s) : NULL; + } + + /* Set long name. */ dict_rename_var (dict, var, long_name); - var_set_short_name (var, short_name); + + /* Restore short names. */ + for (i = 0; i < short_name_cnt; i++) + { + var_set_short_name (var, i, short_names[i]); + free (short_names[i]); + } } close_variable_to_value_map (r, map); + r->has_long_var_names = true; } /* Reads record type 7, subtype 14, which gives the real length @@ -904,7 +973,7 @@ read_long_string_map (struct sfm_reader *r, size_t size, size_t count, /* Get length. */ length = strtol (length_s, NULL, 10); - if (length < MIN_VERY_LONG_STRING || length == LONG_MAX) + if (length < MIN_VERY_LONG_STRING || length == LONG_MAX) { sys_warn (r, _("%s listed as string of length %s " "in length table."), @@ -916,7 +985,7 @@ read_long_string_map (struct sfm_reader *r, size_t size, size_t count, and delete all but the first. */ remaining_length = length; for (idx = var_get_dict_index (var); remaining_length > 0; idx++) - if (idx < dict_get_var_cnt (dict)) + if (idx < dict_get_var_cnt (dict)) remaining_length -= MIN (var_get_width (dict_get_var (dict, idx)), EFFECTIVE_LONG_STRING_LENGTH); else @@ -940,8 +1009,8 @@ read_value_labels (struct sfm_reader *r, struct dictionary *dict, struct variable **var_by_value_idx) { struct pool *subpool; - - struct label + + struct label { char raw_value[8]; /* Value as uninterpreted bytes. */ union value value; /* Value. */ @@ -966,7 +1035,7 @@ read_value_labels (struct sfm_reader *r, label_cnt = read_int32 (r); if (label_cnt >= INT32_MAX / sizeof *labels) - { + { sys_warn (r, _("Invalid number of labels: %d. Ignoring labels."), label_cnt); label_cnt = 0; @@ -1031,17 +1100,17 @@ read_value_labels (struct sfm_reader *r, var_is_numeric (var[i]) ? _("numeric") : _("string")); /* Fill in labels[].value, now that we know the desired type. */ - for (i = 0; i < label_cnt; i++) + for (i = 0; i < label_cnt; i++) { struct label *label = labels + i; - + if (var_is_alpha (var[0])) buf_copy_rpad (label->value.s, sizeof label->value.s, label->raw_value, sizeof label->raw_value); else label->value.f = flt64_to_double (r, (uint8_t *) label->raw_value); } - + /* Assign the `value_label's to each variable. */ for (i = 0; i < var_cnt; i++) { @@ -1052,7 +1121,7 @@ read_value_labels (struct sfm_reader *r, for (j = 0; j < label_cnt; j++) { struct label *label = &labels[j]; - if (!var_add_value_label (v, &label->value, label->label)) + if (!var_add_value_label (v, &label->value, label->label)) { if (var_is_numeric (var[0])) sys_warn (r, _("Duplicate value label for %g on %s."), @@ -1060,7 +1129,7 @@ read_value_labels (struct sfm_reader *r, else sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."), var_get_width (v), label->value.s, - var_get_name (v)); + var_get_name (v)); } } } @@ -1072,6 +1141,10 @@ read_value_labels (struct sfm_reader *r, static void partial_record (struct sfm_reader *r) NO_RETURN; + +static void read_error (struct casereader *, const struct sfm_reader *); + + static bool read_case_number (struct sfm_reader *, double *); static bool read_case_string (struct sfm_reader *, char *, size_t); static int read_opcode (struct sfm_reader *); @@ -1079,39 +1152,51 @@ static bool read_compressed_number (struct sfm_reader *, double *); static bool read_compressed_string (struct sfm_reader *, char *); static bool read_whole_strings (struct sfm_reader *, char *, size_t); -/* Reads one case from READER's file into C. Returns nonzero - only if successful. */ -int -sfm_read_case (struct sfm_reader *r, struct ccase *c) +/* Reads one case from READER's file into C. Returns true only + if successful. */ +static bool +sys_file_casereader_read (struct casereader *reader, void *r_, + struct ccase *c) { + struct sfm_reader *r = r_; if (r->error) - return 0; + return false; + case_create (c, r->value_cnt); if (setjmp (r->bail_out)) - return 0; + { + casereader_force_error (reader); + case_destroy (c); + return false; + } - if (!r->compressed && sizeof (double) == 8 && !r->has_vls) + if (!r->compressed && sizeof (double) == 8 && !r->has_vls) { /* Fast path. Read the whole case directly. */ if (!try_read_bytes (r, case_data_all_rw (c), - sizeof (union value) * r->value_cnt)) - return 0; + sizeof (union value) * r->flt64_cnt)) + { + case_destroy (c); + if ( r->case_cnt != -1 ) + read_error (reader, r); + return false; + } /* Convert floating point numbers to native format if needed. */ - if (r->float_format != FLOAT_NATIVE_DOUBLE) + if (r->float_format != FLOAT_NATIVE_DOUBLE) { int i; - - for (i = 0; i < r->var_cnt; i++) - if (r->vars[i].width == 0) + + for (i = 0; i < r->var_cnt; i++) + if (r->vars[i].width == 0) { double *d = &case_data_rw_idx (c, r->vars[i].case_index)->f; - float_convert (r->float_format, d, FLOAT_NATIVE_DOUBLE, d); + float_convert (r->float_format, d, FLOAT_NATIVE_DOUBLE, d); } } - return 1; + return true; } - else + else { /* Slow path. Convert from external to internal format. */ int i; @@ -1121,10 +1206,10 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c) struct sfm_var *sv = &r->vars[i]; union value *v = case_data_rw_idx (c, sv->case_index); - if (sv->width == 0) + if (sv->width == 0) { if (!read_case_number (r, &v->f)) - goto eof; + goto eof; } else { @@ -1135,24 +1220,24 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c) for (ofs = 0; ofs < sv->width; ofs += chunk_size) { chunk_size = MIN (max_chunk, sv->width - ofs); - if (!read_case_string (r, v->s + ofs, chunk_size)) + if (!read_case_string (r, v->s + ofs, chunk_size)) { if (ofs) partial_record (r); - goto eof; + goto eof; } } /* Very long strings have trailing wasted space that we must skip. */ - if (sv->width >= MIN_VERY_LONG_STRING) + if (sv->width >= MIN_VERY_LONG_STRING) { int bytes_read = (sv->width / max_chunk * 256 + ROUND_UP (sv->width % max_chunk, 8)); int total_bytes = sfm_width_to_bytes (sv->width); int excess_bytes = total_bytes - bytes_read; - while (excess_bytes > 0) + while (excess_bytes > 0) { char buffer[1024]; size_t chunk = MIN (sizeof buffer, excess_bytes); @@ -1163,12 +1248,15 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c) } } } - return 1; + return true; eof: + case_destroy (c); if (i != 0) partial_record (r); - return 0; + if ( r->case_cnt != -1 ) + read_error (reader, r); + return false; } } @@ -1179,13 +1267,20 @@ partial_record (struct sfm_reader *r) sys_error (r, _("File ends in partial case.")); } +static void +read_error (struct casereader *r, const struct sfm_reader *sfm) +{ + msg (ME, _("Error reading case from file %s"), fh_get_name (sfm->fh)); + casereader_force_error (r); +} + /* Reads a number from R and stores its value in *D. If R is compressed, reads a compressed number; otherwise, reads a number in the regular way. Returns true if successful, false if end of file is reached immediately. */ static bool -read_case_number (struct sfm_reader *r, double *d) +read_case_number (struct sfm_reader *r, double *d) { if (!r->compressed) { @@ -1207,12 +1302,12 @@ read_case_number (struct sfm_reader *r, double *d) Returns true if successful, false if end of file is reached immediately. */ static bool -read_case_string (struct sfm_reader *r, char *s, size_t length) +read_case_string (struct sfm_reader *r, char *s, size_t length) { size_t whole = ROUND_DOWN (length, 8); size_t partial = length % 8; - - if (whole) + + if (whole) { if (!read_whole_strings (r, s, whole)) return false; @@ -1225,7 +1320,7 @@ read_case_string (struct sfm_reader *r, char *s, size_t length) { if (whole) partial_record (r); - return false; + return false; } memcpy (s + whole, bounce, partial); } @@ -1235,13 +1330,13 @@ read_case_string (struct sfm_reader *r, char *s, size_t length) /* Reads and returns the next compression opcode from R. */ static int -read_opcode (struct sfm_reader *r) +read_opcode (struct sfm_reader *r) { assert (r->compressed); for (;;) { int opcode; - if (r->opcode_idx >= sizeof r->opcodes) + if (r->opcode_idx >= sizeof r->opcodes) { if (!try_read_bytes (r, r->opcodes, sizeof r->opcodes)) return -1; @@ -1260,7 +1355,7 @@ read_opcode (struct sfm_reader *r) static bool read_compressed_number (struct sfm_reader *r, double *d) { - int opcode = read_opcode (r); + int opcode = read_opcode (r); switch (opcode) { case -1: @@ -1270,7 +1365,7 @@ read_compressed_number (struct sfm_reader *r, double *d) case 253: *d = read_flt64 (r); break; - + case 254: sys_error (r, _("Compressed data is corrupt.")); @@ -1329,7 +1424,7 @@ read_whole_strings (struct sfm_reader *r, char *s, size_t length) { size_t ofs; for (ofs = 0; ofs < length; ofs += 8) - if (!read_compressed_string (r, s + ofs)) + if (!read_compressed_string (r, s + ofs)) { if (ofs != 0) partial_record (r); @@ -1348,15 +1443,15 @@ read_whole_strings (struct sfm_reader *r, char *s, size_t length) values to be deleted from the case and the dictionary to be compacted. */ static struct variable ** -make_var_by_value_idx (struct sfm_reader *r, struct dictionary *dict) +make_var_by_value_idx (struct sfm_reader *r, struct dictionary *dict) { struct variable **var_by_value_idx; int value_idx = 0; int i; var_by_value_idx = pool_nmalloc (r->pool, - r->value_cnt, sizeof *var_by_value_idx); - for (i = 0; i < dict_get_var_cnt (dict); i++) + r->flt64_cnt, sizeof *var_by_value_idx); + for (i = 0; i < dict_get_var_cnt (dict); i++) { struct variable *v = dict_get_var (dict, i); int nv = var_is_numeric (v) ? 1 : DIV_RND_UP (var_get_width (v), 8); @@ -1366,7 +1461,7 @@ make_var_by_value_idx (struct sfm_reader *r, struct dictionary *dict) for (j = 1; j < nv; j++) var_by_value_idx[value_idx++] = NULL; } - assert (value_idx == r->value_cnt); + assert (value_idx == r->flt64_cnt); return var_by_value_idx; } @@ -1376,13 +1471,13 @@ make_var_by_value_idx (struct sfm_reader *r, struct dictionary *dict) is valid. */ static struct variable * lookup_var_by_value_idx (struct sfm_reader *r, - struct variable **var_by_value_idx, int value_idx) + struct variable **var_by_value_idx, int value_idx) { struct variable *var; - - if (value_idx < 1 || value_idx > r->value_cnt) + + if (value_idx < 1 || value_idx > r->flt64_cnt) sys_error (r, _("Variable index %d not in valid range 1...%d."), - value_idx, r->value_cnt); + value_idx, r->flt64_cnt); var = var_by_value_idx[value_idx - 1]; if (var == NULL) @@ -1404,15 +1499,15 @@ lookup_var_by_short_name (struct dictionary *d, const char *short_name) /* First try looking up by full name. This often succeeds. */ var = dict_lookup_var (d, short_name); - if (var != NULL && !strcasecmp (var_get_short_name (var), short_name)) + if (var != NULL && !strcasecmp (var_get_short_name (var, 0), short_name)) return var; /* Iterate through the whole dictionary as a fallback. */ var_cnt = dict_get_var_cnt (d); - for (i = 0; i < var_cnt; i++) + for (i = 0; i < var_cnt; i++) { var = dict_get_var (d, i); - if (!strcasecmp (var_get_short_name (var), short_name)) + if (!strcasecmp (var_get_short_name (var, 0), short_name)) return var; } @@ -1423,7 +1518,7 @@ lookup_var_by_short_name (struct dictionary *d, const char *short_name) pairs. */ /* State. */ -struct variable_to_value_map +struct variable_to_value_map { struct substring buffer; /* Record contents. */ size_t pos; /* Current position in buffer. */ @@ -1432,7 +1527,7 @@ struct variable_to_value_map /* Reads SIZE bytes into a "variable=value" map for R, and returns the map. */ static struct variable_to_value_map * -open_variable_to_value_map (struct sfm_reader *r, size_t size) +open_variable_to_value_map (struct sfm_reader *r, size_t size) { struct variable_to_value_map *map = pool_alloc (r->pool, sizeof *map); char *buffer = pool_malloc (r->pool, size + 1); @@ -1447,7 +1542,7 @@ open_variable_to_value_map (struct sfm_reader *r, size_t size) but can be used to free it earlier. */ static void close_variable_to_value_map (struct sfm_reader *r, - struct variable_to_value_map *map) + struct variable_to_value_map *map) { pool_free (r->pool, ss_data (map->buffer)); } @@ -1459,24 +1554,24 @@ static bool read_variable_to_value_map (struct sfm_reader *r, struct dictionary *dict, struct variable_to_value_map *map, struct variable **var, char **value, - int *warning_cnt) + int *warning_cnt) { int max_warnings = 5; - - for (;;) + + for (;;) { struct substring short_name_ss, value_ss; if (!ss_tokenize (map->buffer, ss_cstr ("="), &map->pos, &short_name_ss) || !ss_tokenize (map->buffer, ss_buffer ("\t\0", 2), &map->pos, - &value_ss)) + &value_ss)) { if (*warning_cnt > max_warnings) sys_warn (r, _("Suppressed %d additional variable map warnings."), *warning_cnt - max_warnings); - return false; + return false; } - + map->pos += ss_span (ss_substr (map->buffer, map->pos, SIZE_MAX), ss_buffer ("\t\0", 2)); @@ -1484,7 +1579,7 @@ read_variable_to_value_map (struct sfm_reader *r, struct dictionary *dict, *var = lookup_var_by_short_name (dict, ss_data (short_name_ss)); if (*var == NULL) { - if (++*warning_cnt <= 5) + if (++*warning_cnt <= max_warnings) sys_warn (r, _("Variable map refers to unknown variable %s."), ss_data (short_name_ss)); continue; @@ -1522,10 +1617,10 @@ sys_msg (struct sfm_reader *r, int class, const char *format, va_list args) /* Displays a warning for the current file position. */ static void -sys_warn (struct sfm_reader *r, const char *format, ...) +sys_warn (struct sfm_reader *r, const char *format, ...) { va_list args; - + va_start (args, format); sys_msg (r, MW, format, args); va_end (args); @@ -1535,10 +1630,10 @@ sys_warn (struct sfm_reader *r, const char *format, ...) marks it as in an error state, and aborts reading it using longjmp. */ static void -sys_error (struct sfm_reader *r, const char *format, ...) +sys_error (struct sfm_reader *r, const char *format, ...) { va_list args; - + va_start (args, format); sys_msg (r, ME, format, args); va_end (args); @@ -1589,7 +1684,7 @@ try_read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt) /* Reads a 32-bit signed integer from R and returns its value in host format. */ static int32_t -read_int32 (struct sfm_reader *r) +read_int32 (struct sfm_reader *r) { uint8_t int32[4]; read_bytes (r, int32, sizeof int32); @@ -1599,7 +1694,7 @@ read_int32 (struct sfm_reader *r) /* Reads a 64-bit floating-point number from R and returns its value in host format. */ static double -read_flt64 (struct sfm_reader *r) +read_flt64 (struct sfm_reader *r) { uint8_t flt64[8]; read_bytes (r, flt64, sizeof flt64); @@ -1609,7 +1704,7 @@ read_flt64 (struct sfm_reader *r) /* Reads exactly SIZE - 1 bytes into BUFFER and stores a null byte into BUFFER[SIZE - 1]. */ static void -read_string (struct sfm_reader *r, char *buffer, size_t size) +read_string (struct sfm_reader *r, char *buffer, size_t size) { assert (size > 0); read_bytes (r, buffer, size - 1); @@ -1620,7 +1715,7 @@ read_string (struct sfm_reader *r, char *buffer, size_t size) static void skip_bytes (struct sfm_reader *r, size_t bytes) { - while (bytes > 0) + while (bytes > 0) { char buffer[1024]; size_t chunk = MIN (sizeof buffer, bytes); @@ -1632,7 +1727,7 @@ skip_bytes (struct sfm_reader *r, size_t bytes) /* Returns the value of the 32-bit signed integer at INT32, converted from the format used by R to the host format. */ static int32_t -int32_to_native (const struct sfm_reader *r, const uint8_t int32[4]) +int32_to_native (const struct sfm_reader *r, const uint8_t int32[4]) { int32_t x; if (r->integer_format == INTEGER_NATIVE) @@ -1655,4 +1750,11 @@ flt64_to_double (const struct sfm_reader *r, const uint8_t flt64[8]) float_convert (r->float_format, flt64, FLOAT_NATIVE_DOUBLE, &x); return x; } - + +static struct casereader_class sys_file_casereader_class = + { + sys_file_casereader_read, + sys_file_casereader_destroy, + NULL, + NULL, + };