X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fsfm-read.c;h=986dede94fd2947a62d8a944d1777cf319c688b3;hb=dcf9b154cbcaa35c3d8459a201b77eec8bcb30bd;hp=01f6330f19ca0403ebf9623eecf279d856284416;hpb=74a57f26f1458b28a0fddbb9f46004ac8f4d9c30;p=pspp-builds.git diff --git a/src/sfm-read.c b/src/sfm-read.c index 01f6330f..986dede9 100644 --- a/src/sfm-read.c +++ b/src/sfm-read.c @@ -1,5 +1,5 @@ /* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. Written by Ben Pfaff . This program is free software; you can redistribute it and/or @@ -14,23 +14,26 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. */ + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ #include -#include "sfm.h" +#include "sfm-read.h" #include "sfmP.h" -#include +#include "error.h" #include #include #include #include +#include #include "alloc.h" +#include "case.h" +#include "dictionary.h" #include "error.h" #include "file-handle.h" #include "filename.h" #include "format.h" -#include "getline.h" +#include "getl.h" #include "hash.h" #include "magic.h" #include "misc.h" @@ -38,90 +41,79 @@ #include "str.h" #include "var.h" -#include "debug-print.h" +#include "gettext.h" +#define _(msgid) gettext (msgid) -/* PORTME: This file may require substantial revision for those - systems that don't meet the typical 32-bit integer/64-bit double - model. It's kinda hard to tell without having one of them on my - desk. */ +#include "debug-print.h" -/* sfm's file_handle extension. */ -struct sfm_fhuser_ext +/* System file reader. */ +struct sfm_reader { - FILE *file; /* Actual file. */ - int opened; /* Reference count. */ - - struct dictionary *dict; /* File's dictionary. */ + struct file_handle *fh; /* File handle. */ + FILE *file; /* File stream. */ int reverse_endian; /* 1=file has endianness opposite us. */ - int case_size; /* Number of `values's per case. */ - long ncases; /* Number of cases, -1 if unknown. */ + int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */ + int value_cnt; /* Number of `union values's per case. */ + long case_cnt; /* Number of cases, -1 if unknown. */ int compressed; /* 1=compressed, 0=not compressed. */ double bias; /* Compression bias, usually 100.0. */ - int weight_index; /* 0-based index of weighting variable, or -1. */ + int weight_idx; /* 0-based index of weighting variable, or -1. */ + + /* Variables. */ + struct sfm_var *vars; /* Variables. */ /* File's special constants. */ flt64 sysmis; flt64 highest; flt64 lowest; - /* Uncompression buffer. */ + /* Decompression buffer. */ flt64 *buf; /* Buffer data. */ flt64 *ptr; /* Current location in buffer. */ flt64 *end; /* End of buffer data. */ /* Compression instruction octet. */ - unsigned char x[sizeof (flt64)]; - /* Current instruction octet. */ + unsigned char x[8]; /* Current instruction octet. */ unsigned char *y; /* Location in current instruction octet. */ }; -static struct fh_ext_class sfm_r_class; - -#if GLOBAL_DEBUGGING -void dump_dictionary (struct dictionary * dict); -#endif +/* A variable in a system file. */ +struct sfm_var + { + int width; /* 0=numeric, otherwise string width. */ + int fv; /* Index into case. */ + }; /* Utilities. */ -/* bswap_int32(): Reverse the byte order of 32-bit integer *X. */ +/* Swap bytes *A and *B. */ static inline void -bswap_int32 (int32 *x) +bswap (char *a, char *b) { - unsigned char *y = (unsigned char *) x; - unsigned char t; - - t = y[0]; - y[0] = y[3]; - y[3] = t; + char t = *a; + *a = *b; + *b = t; +} - t = y[1]; - y[1] = y[2]; - y[2] = t; +/* Reverse the byte order of 32-bit integer *X. */ +static inline void +bswap_int32 (int32 *x_) +{ + char *x = (char *) x_; + bswap (x + 0, x + 3); + bswap (x + 1, x + 2); } /* Reverse the byte order of 64-bit floating point *X. */ static inline void -bswap_flt64 (flt64 *x) +bswap_flt64 (flt64 *x_) { - unsigned char *y = (unsigned char *) x; - unsigned char t; - - t = y[0]; - y[0] = y[7]; - y[7] = t; - - t = y[1]; - y[1] = y[6]; - y[6] = t; - - t = y[2]; - y[2] = y[5]; - y[5] = t; - - t = y[3]; - y[3] = y[4]; - y[4] = t; + char *x = (char *) x_; + bswap (x + 0, x + 7); + bswap (x + 1, x + 6); + bswap (x + 2, x + 5); + bswap (x + 3, x + 4); } static void @@ -132,195 +124,181 @@ corrupt_msg (int class, const char *format,...) static void corrupt_msg (int class, const char *format,...) { - char buf[1024]; - - { - va_list args; - - va_start (args, format); - vsnprintf (buf, 1024, format, args); - va_end (args); - } - - { - struct error e; + struct error e; + va_list args; - e.class = class; - getl_location (&e.where.filename, &e.where.line_number); - e.title = _("corrupt system file: "); - e.text = buf; + e.class = class; + getl_location (&e.where.filename, &e.where.line_number); + e.title = _("corrupt system file: "); - err_vmsg (&e); - } + va_start (args, format); + err_vmsg (&e, format, args); + va_end (args); } /* Closes a system file after we're done with it. */ -static void -sfm_close (struct file_handle *h) -{ - struct sfm_fhuser_ext *ext = h->ext; - - ext->opened--; - assert (ext->opened == 0); - if (EOF == fn_close (handle_get_filename (h), ext->file)) - msg (ME, _("%s: Closing system file: %s."), - handle_get_filename (h), strerror (errno)); - free (ext->buf); - free (h->ext); -} - -/* Closes a system file if we're done with it. */ void -sfm_maybe_close (struct file_handle *h) +sfm_close_reader (struct sfm_reader *r) { - struct sfm_fhuser_ext *ext = h->ext; + if (r == NULL) + return; - if (ext->opened == 1) - fh_close_handle (h); - else - ext->opened--; + if (r->file) + { + if (fn_close (fh_get_filename (r->fh), r->file) == EOF) + msg (ME, _("%s: Closing system file: %s."), + fh_get_filename (r->fh), strerror (errno)); + r->file = NULL; + } + + if (r->fh != NULL) + fh_close (r->fh, "system file", "rs"); + + free (r->vars); + free (r->buf); + free (r); } /* Dictionary reader. */ -static void *bufread (struct file_handle * handle, void *buf, size_t nbytes, - size_t minalloc); +static void buf_unread(struct sfm_reader *r, size_t byte_cnt); -static int read_header (struct file_handle * h, struct sfm_read_info * inf); -static int parse_format_spec (struct file_handle * h, int32 s, - struct fmt_spec * v, struct variable *vv); -static int read_value_labels (struct file_handle * h, struct variable ** var_by_index); -static int read_variables (struct file_handle * h, struct variable *** var_by_index); -static int read_machine_int32_info (struct file_handle * h, int size, int count); -static int read_machine_flt64_info (struct file_handle * h, int size, int count); -static int read_documents (struct file_handle * h); +static void *buf_read (struct sfm_reader *, void *buf, size_t byte_cnt, + size_t min_alloc); -/* Displays the message X with corrupt_msg, then jumps to the lossage +static int read_header (struct sfm_reader *, + struct dictionary *, struct sfm_read_info *); +static int parse_format_spec (struct sfm_reader *, int32, + struct fmt_spec *, struct variable *); +static int read_value_labels (struct sfm_reader *, struct dictionary *, + struct variable **var_by_idx); +static int read_variables (struct sfm_reader *, + struct dictionary *, struct variable ***var_by_idx); +static int read_machine_int32_info (struct sfm_reader *, int size, int count); +static int read_machine_flt64_info (struct sfm_reader *, int size, int count); +static int read_documents (struct sfm_reader *, struct dictionary *); + +static int fread_ok (struct sfm_reader *, void *, size_t); + +/* Displays the message X with corrupt_msg, then jumps to the error label. */ -#define lose(X) \ - do \ - { \ - corrupt_msg X; \ - goto lossage; \ - } \ - while (0) - -/* Calls bufread with the specified arguments, and jumps to lossage if - the read fails. */ -#define assertive_bufread(a,b,c,d) \ - do \ - { \ - if (!bufread (a,b,c,d)) \ - goto lossage; \ - } \ - while (0) - -/* Reads the dictionary from file with handle H, and returns it in a - dictionary structure. This dictionary may be modified in order to - rename, reorder, and delete variables, etc. */ -struct dictionary * -sfm_read_dictionary (struct file_handle * h, struct sfm_read_info * inf) +#define lose(X) \ + do { \ + corrupt_msg X; \ + goto error; \ + } while (0) + +/* Calls buf_read with the specified arguments, and jumps to + error if the read fails. */ +#define assertive_buf_read(a,b,c,d) \ + do { \ + if (!buf_read (a,b,c,d)) \ + goto error; \ + } while (0) + +/* Opens the system file designated by file handle FH for + reading. Reads the system file's dictionary into *DICT. + If INFO is non-null, then it receives additional info about the + system file. */ +struct sfm_reader * +sfm_open_reader (struct file_handle *fh, struct dictionary **dict, + struct sfm_read_info *info) { - /* The file handle extension record. */ - struct sfm_fhuser_ext *ext; + struct sfm_reader *r = NULL; + struct variable **var_by_idx = NULL; - /* Allows for quick reference to variables according to indexes - relative to position within a case. */ - struct variable **var_by_index = NULL; + *dict = dict_create (); + if (!fh_open (fh, FH_REF_FILE, "system file", "rs")) + goto error; - /* Check whether the file is already open. */ - if (h->class == &sfm_r_class) - { - ext = h->ext; - ext->opened++; - return ext->dict; - } - else if (h->class != NULL) - { - msg (ME, _("Cannot read file %s as system file: already opened for %s."), - handle_get_name (h), h->class->name); - return NULL; - } + /* Create and initialize reader. */ + r = xmalloc (sizeof *r); + r->fh = fh; + r->file = fn_open (fh_get_filename (fh), "rb"); - msg (VM (1), _("%s: Opening system-file handle %s for reading."), - handle_get_filename (h), handle_get_name (h)); - - /* Open the physical disk file. */ - ext = xmalloc (sizeof (struct sfm_fhuser_ext)); - ext->file = fn_open (handle_get_filename (h), "rb"); - if (ext->file == NULL) + r->reverse_endian = 0; + r->fix_specials = 0; + r->value_cnt = 0; + r->case_cnt = 0; + r->compressed = 0; + r->bias = 100.0; + r->weight_idx = -1; + + r->vars = NULL; + + r->sysmis = -FLT64_MAX; + r->highest = FLT64_MAX; + r->lowest = second_lowest_flt64; + + r->buf = r->ptr = r->end = NULL; + r->y = r->x + sizeof r->x; + + /* Check that file open succeeded. */ + if (r->file == NULL) { msg (ME, _("An error occurred while opening \"%s\" for reading " "as a system file: %s."), - handle_get_filename (h), strerror (errno)); + fh_get_filename (r->fh), strerror (errno)); err_cond_fail (); - free (ext); - return NULL; + goto error; } - /* Initialize the sfm_fhuser_ext structure. */ - h->class = &sfm_r_class; - h->ext = ext; - ext->dict = NULL; - ext->buf = ext->ptr = ext->end = NULL; - ext->y = ext->x + sizeof ext->x; - ext->opened = 1; - - /* Default special constants. */ - ext->sysmis = -FLT64_MAX; - ext->highest = FLT64_MAX; - ext->lowest = second_lowest_flt64; + /* Read header and variables. */ + if (!read_header (r, *dict, info) || !read_variables (r, *dict, &var_by_idx)) + goto error; - /* Read the header. */ - if (!read_header (h, inf)) - goto lossage; - - /* Read about the variables. */ - if (!read_variables (h, &var_by_index)) - goto lossage; /* Handle weighting. */ - if (ext->weight_index != -1) + if (r->weight_idx != -1) { - struct variable *wv = var_by_index[ext->weight_index]; + struct variable *weight_var; + + if (r->weight_idx < 0 || r->weight_idx >= r->value_cnt) + lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 " + "and number of elements per case (%d)."), + fh_get_filename (r->fh), r->weight_idx, r->value_cnt)); + - if (wv == NULL) - lose ((ME, _("%s: Weighting variable may not be a continuation of " - "a long string variable."), handle_get_filename (h))); - else if (wv->type == ALPHA) + weight_var = var_by_idx[r->weight_idx]; + + if (weight_var == NULL) + lose ((ME, + _("%s: Weighting variable may not be a continuation of " + "a long string variable."), fh_get_filename (fh))); + else if (weight_var->type == ALPHA) lose ((ME, _("%s: Weighting variable may not be a string variable."), - handle_get_filename (h))); + fh_get_filename (fh))); - dict_set_weight (ext->dict, wv); + dict_set_weight (*dict, weight_var); } else - dict_set_weight (ext->dict, NULL); + dict_set_weight (*dict, NULL); /* Read records of types 3, 4, 6, and 7. */ for (;;) { int32 rec_type; - assertive_bufread (h, &rec_type, sizeof rec_type, 0); - if (ext->reverse_endian) + assertive_buf_read (r, &rec_type, sizeof rec_type, 0); + if (r->reverse_endian) bswap_int32 (&rec_type); switch (rec_type) { case 3: - if (!read_value_labels (h, var_by_index)) - goto lossage; + if (!read_value_labels (r, *dict, var_by_idx)) + goto error; break; case 4: lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 " "records must always immediately follow type 3 " "records."), - handle_get_filename (h))); + fh_get_filename (r->fh))); case 6: - if (!read_documents (h)) - goto lossage; + if (!read_documents (r, *dict)) + goto error; break; case 7: @@ -332,47 +310,165 @@ sfm_read_dictionary (struct file_handle * h, struct sfm_read_info * inf) int32 count P; } data; + unsigned long bytes; int skip = 0; - assertive_bufread (h, &data, sizeof data, 0); - if (ext->reverse_endian) + assertive_buf_read (r, &data, sizeof data, 0); + if (r->reverse_endian) { bswap_int32 (&data.subtype); bswap_int32 (&data.size); bswap_int32 (&data.count); } + bytes = data.size * data.count; + if (bytes < data.size || bytes < data.count) + lose ((ME, "%s: Record type %d subtype %d too large.", + fh_get_filename (r->fh), rec_type, data.subtype)); switch (data.subtype) { case 3: - if (!read_machine_int32_info (h, data.size, data.count)) - goto lossage; + if (!read_machine_int32_info (r, data.size, data.count)) + goto error; break; case 4: - if (!read_machine_flt64_info (h, data.size, data.count)) - goto lossage; + if (!read_machine_flt64_info (r, data.size, data.count)) + goto error; break; case 5: - case 6: - case 11: /* ?? Used by SPSS 8.0. */ + case 6: /* ?? Used by SPSS 8.0. */ skip = 1; break; + + case 11: /* Variable display parameters */ + { + const int n_vars = data.count / 3 ; + int i; + if ( data.count % 3 || n_vars > dict_get_var_cnt(*dict) ) + { + msg (MW, _("%s: Invalid subrecord length. " + "Record: 7; Subrecord: 11"), + fh_get_filename (r->fh)); + skip = 1; + } + + for ( i = 0 ; i < min(n_vars, dict_get_var_cnt(*dict)) ; ++i ) + { + struct + { + int32 measure P; + int32 width P; + int32 align P; + } + params; + + struct variable *v; + + assertive_buf_read (r, ¶ms, sizeof(params), 0); + + v = dict_get_var(*dict, i); + + v->measure = params.measure; + v->display_width = params.width; + v->alignment = params.align; + } + } + break; + + case 13: /* SPSS 12.0 Long variable name map */ + { + char *buf, *short_name, *save_ptr; + int idx; + + /* Read data. */ + buf = xmalloc (bytes + 1); + if (!buf_read (r, buf, bytes, 0)) + { + free (buf); + goto error; + } + buf[bytes] = '\0'; + + /* Parse data. */ + for (short_name = strtok_r (buf, "=", &save_ptr), idx = 0; + short_name != NULL; + short_name = strtok_r (NULL, "=", &save_ptr), idx++) + { + char *long_name = strtok_r (NULL, "\t", &save_ptr); + struct variable *v; + + /* Validate long name. */ + if (long_name == NULL) + { + msg (MW, _("%s: Trailing garbage in long variable " + "name map."), + fh_get_filename (r->fh)); + break; + } + if (!var_is_valid_name (long_name, false)) + { + msg (MW, _("%s: Long variable mapping to invalid " + "variable name `%s'."), + fh_get_filename (r->fh), long_name); + break; + } + + /* Find variable using short name. */ + v = dict_lookup_var (*dict, short_name); + if (v == NULL) + { + msg (MW, _("%s: Long variable mapping for " + "nonexistent variable %s."), + fh_get_filename (r->fh), short_name); + break; + } + + /* Identify any duplicates. */ + if ( compare_var_names(short_name, long_name, 0) && + NULL != dict_lookup_var (*dict, long_name)) + { + lose ((ME, _("%s: Duplicate long variable name `%s' " + "within system file."), + fh_get_filename (r->fh), long_name)); + break; + } + + /* Set long name. + Renaming a variable may clear the short + name, but we want to retain it, so + re-set it explicitly. */ + dict_rename_var (*dict, v, long_name); + var_set_short_name (v, short_name); + + /* For compatability, make sure dictionary + is in long variable name map order. In + the common case, this has no effect, + because the dictionary and the long + variable name map are already in the + same order. */ + dict_reorder_var (*dict, v, idx); + } + + /* Free data. */ + free (buf); + } + break; default: msg (MW, _("%s: Unrecognized record type 7, subtype %d " "encountered in system file."), - handle_get_filename (h), data.subtype); + fh_get_filename (r->fh), data.subtype); skip = 1; } if (skip) { - void *x = bufread (h, NULL, data.size * data.count, 0); + void *x = buf_read (r, NULL, data.size * data.count, 0); if (x == NULL) - goto lossage; + goto error; free (x); } } @@ -382,46 +478,37 @@ sfm_read_dictionary (struct file_handle * h, struct sfm_read_info * inf) { int32 filler; - assertive_bufread (h, &filler, sizeof filler, 0); - goto break_out_of_loop; + assertive_buf_read (r, &filler, sizeof filler, 0); + goto success; } default: - lose ((ME, _("%s: Unrecognized record type %d."), - handle_get_filename (h), rec_type)); + corrupt_msg(MW, _("%s: Unrecognized record type %d."), + fh_get_filename (r->fh), rec_type); } } -break_out_of_loop: +success: /* Come here on successful completion. */ - msg (VM (2), _("Read system-file dictionary successfully.")); - -#if DEBUGGING - dump_dictionary (ext->dict); -#endif - free (var_by_index); - return ext->dict; + free (var_by_idx); + return r; -lossage: +error: /* Come here on unsuccessful completion. */ - msg (VM (1), _("Error reading system-file header.")); - - free (var_by_index); - fn_close (handle_get_filename (h), ext->file); - if (ext && ext->dict) - dict_destroy (ext->dict); - free (ext); - h->class = NULL; - h->ext = NULL; + sfm_close_reader (r); + free (var_by_idx); + if (*dict != NULL) + { + dict_destroy (*dict); + *dict = NULL; + } return NULL; } /* Read record type 7, subtype 3. */ static int -read_machine_int32_info (struct file_handle * h, int size, int count) +read_machine_int32_info (struct sfm_reader *r, int size, int count) { - struct sfm_fhuser_ext *ext = h->ext; - int32 data[8]; int file_bigendian; @@ -430,113 +517,105 @@ read_machine_int32_info (struct file_handle * h, int size, int count) if (size != sizeof (int32) || count != 8) lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, " "subtype 3. Expected size %d, count 8."), - handle_get_filename (h), size, count, sizeof (int32))); + fh_get_filename (r->fh), size, count, sizeof (int32))); - assertive_bufread (h, data, sizeof data, 0); - if (ext->reverse_endian) + assertive_buf_read (r, data, sizeof data, 0); + if (r->reverse_endian) for (i = 0; i < 8; i++) bswap_int32 (&data[i]); - /* PORTME: Check floating-point representation. */ #ifdef FPREP_IEEE754 if (data[4] != 1) lose ((ME, _("%s: Floating-point representation in system file is not " "IEEE-754. PSPP cannot convert between floating-point " "formats."), - handle_get_filename (h))); + fh_get_filename (r->fh))); +#else +#error Add support for your floating-point format. #endif - /* PORTME: Check recorded file endianness against intuited file - endianness. */ #ifdef WORDS_BIGENDIAN file_bigendian = 1; #else file_bigendian = 0; #endif - if (ext->reverse_endian) + if (r->reverse_endian) file_bigendian ^= 1; if (file_bigendian ^ (data[6] == 1)) - lose ((ME, _("%s: File-indicated endianness (%s) does not match endianness " - "intuited from file header (%s)."), - handle_get_filename (h), + lose ((ME, _("%s: File-indicated endianness (%s) does not match " + "endianness intuited from file header (%s)."), + fh_get_filename (r->fh), file_bigendian ? _("big-endian") : _("little-endian"), data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian") : _("unknown")))); /* PORTME: Character representation code. */ - if (data[7] != 2 && data[7] != 3) - lose ((ME, _("%s: File-indicated character representation code (%s) is not " - "ASCII."), - handle_get_filename (h), + if (data[7] != 2 && data[7] != 3) + lose ((ME, _("%s: File-indicated character representation code (%s) is " + "not ASCII."), + fh_get_filename (r->fh), (data[7] == 1 ? "EBCDIC" : (data[7] == 4 ? _("DEC Kanji") : _("Unknown"))))); return 1; -lossage: +error: return 0; } /* Read record type 7, subtype 4. */ static int -read_machine_flt64_info (struct file_handle * h, int size, int count) +read_machine_flt64_info (struct sfm_reader *r, int size, int count) { - struct sfm_fhuser_ext *ext = h->ext; - flt64 data[3]; - int i; if (size != sizeof (flt64) || count != 3) lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, " "subtype 4. Expected size %d, count 8."), - handle_get_filename (h), size, count, sizeof (flt64))); + fh_get_filename (r->fh), size, count, sizeof (flt64))); - assertive_bufread (h, data, sizeof data, 0); - if (ext->reverse_endian) + assertive_buf_read (r, data, sizeof data, 0); + if (r->reverse_endian) for (i = 0; i < 3; i++) bswap_flt64 (&data[i]); if (data[0] != SYSMIS || data[1] != FLT64_MAX || data[2] != second_lowest_flt64) { - ext->sysmis = data[0]; - ext->highest = data[1]; - ext->lowest = data[2]; + r->sysmis = data[0]; + r->highest = data[1]; + r->lowest = data[2]; msg (MW, _("%s: File-indicated value is different from internal value " "for at least one of the three system values. SYSMIS: " "indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: " "%g, %g."), - handle_get_filename (h), (double) data[0], (double) SYSMIS, + fh_get_filename (r->fh), (double) data[0], (double) SYSMIS, (double) data[1], (double) FLT64_MAX, (double) data[2], (double) second_lowest_flt64); } return 1; -lossage: +error: return 0; } static int -read_header (struct file_handle * h, struct sfm_read_info * inf) +read_header (struct sfm_reader *r, + struct dictionary *dict, struct sfm_read_info *info) { - struct sfm_fhuser_ext *ext = h->ext; /* File extension strcut. */ struct sysfile_header hdr; /* Disk buffer. */ - struct dictionary *dict; /* File dictionary. */ char prod_name[sizeof hdr.prod_name + 1]; /* Buffer for product name. */ int skip_amt = 0; /* Amount of product name to omit. */ int i; - /* Create the dictionary. */ - dict = ext->dict = dict_create (); - /* Read header, check magic. */ - assertive_bufread (h, &hdr, sizeof hdr, 0); - if (0 != strncmp ("$FL2", hdr.rec_type, 4)) + assertive_buf_read (r, &hdr, sizeof hdr, 0); + if (strncmp ("$FL2", hdr.rec_type, 4) != 0) lose ((ME, _("%s: Bad magic. Proper system files begin with " "the four characters `$FL2'. This file will not be read."), - handle_get_filename (h))); + fh_get_filename (r->fh))); /* Check eye-catcher string. */ memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name); @@ -570,52 +649,48 @@ read_header (struct file_handle * h, struct sfm_read_info * inf) } /* Check endianness. */ - /* PORTME: endianness. */ if (hdr.layout_code == 2) - ext->reverse_endian = 0; + r->reverse_endian = 0; else { bswap_int32 (&hdr.layout_code); if (hdr.layout_code != 2) lose ((ME, _("%s: File layout code has unexpected value %d. Value " "should be 2, in big-endian or little-endian format."), - handle_get_filename (h), hdr.layout_code)); + fh_get_filename (r->fh), hdr.layout_code)); - ext->reverse_endian = 1; + r->reverse_endian = 1; bswap_int32 (&hdr.case_size); - bswap_int32 (&hdr.compressed); - bswap_int32 (&hdr.weight_index); - bswap_int32 (&hdr.ncases); + bswap_int32 (&hdr.compress); + bswap_int32 (&hdr.weight_idx); + bswap_int32 (&hdr.case_cnt); bswap_flt64 (&hdr.bias); } + /* Copy basic info and verify correctness. */ - ext->case_size = hdr.case_size; - if (hdr.case_size <= 0 || ext->case_size > (INT_MAX - / (int) sizeof (union value) / 2)) - lose ((ME, _("%s: Number of elements per case (%d) is not between 1 " - "and %d."), - handle_get_filename (h), hdr.case_size, - INT_MAX / sizeof (union value) / 2)); - - ext->compressed = hdr.compressed; - - ext->weight_index = hdr.weight_index - 1; - if (hdr.weight_index < 0 || hdr.weight_index > hdr.case_size) - lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 " - "and number of elements per case (%d)."), - handle_get_filename (h), hdr.weight_index, ext->case_size)); - - ext->ncases = hdr.ncases; - if (ext->ncases < -1 || ext->ncases > INT_MAX / 2) - lose ((ME, _("%s: Number of cases in file (%ld) is not between -1 and " - "%d."), handle_get_filename (h), (long) ext->ncases, INT_MAX / 2)); - - ext->bias = hdr.bias; - if (ext->bias != 100.0) + r->value_cnt = hdr.case_size; + + /* If value count is rediculous, then force it to -1 (a sentinel value) */ + if ( r->value_cnt < 0 || + r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2)) + r->value_cnt = -1; + + r->compressed = hdr.compress; + + r->weight_idx = hdr.weight_idx - 1; + + r->case_cnt = hdr.case_cnt; + if (r->case_cnt < -1 || r->case_cnt > INT_MAX / 2) + lose ((ME, + _("%s: Number of cases in file (%ld) is not between -1 and %d."), + fh_get_filename (r->fh), (long) r->case_cnt, INT_MAX / 2)); + + r->bias = hdr.bias; + if (r->bias != 100.0) corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual " "value of 100."), - handle_get_filename (h), ext->bias); + fh_get_filename (r->fh), r->bias); /* Make a file label only on the condition that the given label is not all spaces or nulls. */ @@ -635,68 +710,78 @@ read_header (struct file_handle * h, struct sfm_read_info * inf) } } - if (inf) + if (info) { char *cp; - memcpy (inf->creation_date, hdr.creation_date, 9); - inf->creation_date[9] = 0; + memcpy (info->creation_date, hdr.creation_date, 9); + info->creation_date[9] = 0; - memcpy (inf->creation_time, hdr.creation_time, 8); - inf->creation_time[8] = 0; + memcpy (info->creation_time, hdr.creation_time, 8); + info->creation_time[8] = 0; #ifdef WORDS_BIGENDIAN - inf->bigendian = !ext->reverse_endian; + info->big_endian = !r->reverse_endian; #else - inf->bigendian = ext->reverse_endian; + info->big_endian = r->reverse_endian; #endif - inf->compressed = hdr.compressed; + info->compressed = hdr.compress; - inf->ncases = hdr.ncases; + info->case_cnt = hdr.case_cnt; for (cp = &prod_name[skip_amt]; cp < &prod_name[60]; cp++) if (isgraph ((unsigned char) *cp)) break; - strcpy (inf->product, cp); + strcpy (info->product, cp); } return 1; -lossage: +error: return 0; } /* Reads most of the dictionary from file H; also fills in the - associated VAR_BY_INDEX array. The get.* elements in the - created dictionary are set to appropriate values to allow the - file to be read. */ + associated VAR_BY_IDX array. */ static int -read_variables (struct file_handle * h, struct variable *** var_by_index) +read_variables (struct sfm_reader *r, + struct dictionary *dict, struct variable ***var_by_idx) { int i; - struct sfm_fhuser_ext *ext = h->ext; /* File extension record. */ - struct dictionary *dict = ext->dict; /* Dictionary being constructed. */ struct sysfile_variable sv; /* Disk buffer. */ int long_string_count = 0; /* # of long string continuation records still expected. */ int next_value = 0; /* Index to next `value' structure. */ - /* Allocate variables. */ - *var_by_index = xmalloc (sizeof **var_by_index * ext->case_size); + assert(r); + + *var_by_idx = 0; + + /* Pre-allocate variables. */ + if (r->value_cnt != -1) + { + *var_by_idx = xnmalloc (r->value_cnt, sizeof **var_by_idx); + r->vars = xnmalloc (r->value_cnt, sizeof *r->vars); + } + /* Read in the entry for each variable and use the info to initialize the dictionary. */ - for (i = 0; i < ext->case_size; i++) + for (i = 0; ; ++i) { struct variable *vv; - char name[9]; + char name[SHORT_NAME_LEN + 1]; + int nv; int j; - assertive_bufread (h, &sv, sizeof sv, 0); + if ( r->value_cnt != -1 && i >= r->value_cnt ) + break; + + assertive_buf_read (r, &sv, sizeof sv, 0); - if (ext->reverse_endian) + if (r->reverse_endian) { bswap_int32 (&sv.rec_type); bswap_int32 (&sv.type); @@ -706,10 +791,19 @@ read_variables (struct file_handle * h, struct variable *** var_by_index) bswap_int32 (&sv.write); } + /* We've come to the end of the variable entries */ if (sv.rec_type != 2) - lose ((ME, _("%s: position %d: Bad record type (%d); " - "the expected value was 2."), - handle_get_filename (h), i, sv.rec_type)); + { + buf_unread(r, sizeof sv); + r->value_cnt = i; + break; + } + + if ( -1 == r->value_cnt ) + { + *var_by_idx = xnrealloc (*var_by_idx, i + 1, sizeof **var_by_idx); + r->vars = xnrealloc (r->vars, i + 1, sizeof *r->vars); + } /* If there was a long string previously, make sure that the continuations are present; otherwise make sure there aren't @@ -719,48 +813,50 @@ read_variables (struct file_handle * h, struct variable *** var_by_index) if (sv.type != -1) lose ((ME, _("%s: position %d: String variable does not have " "proper number of continuation records."), - handle_get_filename (h), i)); + fh_get_filename (r->fh), i)); + - (*var_by_index)[i] = NULL; + r->vars[i].width = -1; + (*var_by_idx)[i] = NULL; long_string_count--; continue; } else if (sv.type == -1) lose ((ME, _("%s: position %d: Superfluous long string continuation " "record."), - handle_get_filename (h), i)); + fh_get_filename (r->fh), i)); /* Check fields for validity. */ if (sv.type < 0 || sv.type > 255) lose ((ME, _("%s: position %d: Bad variable type code %d."), - handle_get_filename (h), i, sv.type)); + fh_get_filename (r->fh), i, sv.type)); if (sv.has_var_label != 0 && sv.has_var_label != 1) lose ((ME, _("%s: position %d: Variable label indicator field is not " - "0 or 1."), handle_get_filename (h), i)); + "0 or 1."), fh_get_filename (r->fh), i)); if (sv.n_missing_values < -3 || sv.n_missing_values > 3 || sv.n_missing_values == -1) lose ((ME, _("%s: position %d: Missing value indicator field is not " - "-3, -2, 0, 1, 2, or 3."), handle_get_filename (h), i)); + "-3, -2, 0, 1, 2, or 3."), fh_get_filename (r->fh), i)); /* Copy first character of variable name. */ if (!isalpha ((unsigned char) sv.name[0]) && sv.name[0] != '@' && sv.name[0] != '#') lose ((ME, _("%s: position %d: Variable name begins with invalid " "character."), - handle_get_filename (h), i)); + fh_get_filename (r->fh), i)); if (islower ((unsigned char) sv.name[0])) msg (MW, _("%s: position %d: Variable name begins with lowercase letter " "%c."), - handle_get_filename (h), i, sv.name[0]); + fh_get_filename (r->fh), i, sv.name[0]); if (sv.name[0] == '#') msg (MW, _("%s: position %d: Variable name begins with octothorpe " "(`#'). Scratch variables should not appear in system " "files."), - handle_get_filename (h), i); + fh_get_filename (r->fh), i); name[0] = toupper ((unsigned char) (sv.name[0])); /* Copy remaining characters of variable name. */ - for (j = 1; j < 8; j++) + for (j = 1; j < SHORT_NAME_LEN; j++) { int c = (unsigned char) sv.name[j]; @@ -770,7 +866,7 @@ read_variables (struct file_handle * h, struct variable *** var_by_index) { msg (MW, _("%s: position %d: Variable name character %d is " "lowercase letter %c."), - handle_get_filename (h), i, j + 1, sv.name[j]); + fh_get_filename (r->fh), i, j + 1, sv.name[j]); name[j] = toupper ((unsigned char) (c)); } else if (isalnum (c) || c == '.' || c == '@' @@ -779,24 +875,27 @@ read_variables (struct file_handle * h, struct variable *** var_by_index) else lose ((ME, _("%s: position %d: character `\\%03o' (%c) is not valid in a " "variable name."), - handle_get_filename (h), i, c, c)); + fh_get_filename (r->fh), i, c, c)); } name[j] = 0; + if ( ! var_is_valid_name(name, false) ) + lose ((ME, _("%s: Invalid variable name `%s' within system file."), + fh_get_filename (r->fh), name)); + /* Create variable. */ - vv = (*var_by_index)[i] = dict_create_var (dict, name, sv.type); + + vv = (*var_by_idx)[i] = dict_create_var (dict, name, sv.type); if (vv == NULL) lose ((ME, _("%s: Duplicate variable name `%s' within system file."), - handle_get_filename (h), name)); + fh_get_filename (r->fh), name)); + + var_set_short_name (vv, vv->name); /* Case reading data. */ - vv->get.fv = next_value; - if (sv.type == 0) - vv->get.nv = 1; - else - vv->get.nv = DIV_RND_UP (sv.type, sizeof (flt64)); - long_string_count = vv->get.nv - 1; - next_value += vv->get.nv; + nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64)); + long_string_count = nv - 1; + next_value += nv; /* Get variable label, if any. */ if (sv.has_var_label == 1) @@ -805,149 +904,141 @@ read_variables (struct file_handle * h, struct variable *** var_by_index) int32 len; /* Read length of label. */ - assertive_bufread (h, &len, sizeof len, 0); - if (ext->reverse_endian) + assertive_buf_read (r, &len, sizeof len, 0); + if (r->reverse_endian) bswap_int32 (&len); /* Check len. */ if (len < 0 || len > 255) lose ((ME, _("%s: Variable %s indicates variable label of invalid " "length %d."), - handle_get_filename (h), vv->name, len)); + fh_get_filename (r->fh), vv->name, len)); - /* Read label into variable structure. */ - vv->label = bufread (h, NULL, ROUND_UP (len, sizeof (int32)), len + 1); - if (vv->label == NULL) - goto lossage; - vv->label[len] = '\0'; + if ( len != 0 ) + { + /* Read label into variable structure. */ + vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32)), len + 1); + if (vv->label == NULL) + goto error; + vv->label[len] = '\0'; + } } /* Set missing values. */ if (sv.n_missing_values != 0) { flt64 mv[3]; + int mv_cnt = abs (sv.n_missing_values); if (vv->width > MAX_SHORT_STRING) lose ((ME, _("%s: Long string variable %s may not have missing " "values."), - handle_get_filename (h), vv->name)); + fh_get_filename (r->fh), vv->name)); - assertive_bufread (h, mv, sizeof *mv * abs (sv.n_missing_values), 0); + assertive_buf_read (r, mv, sizeof *mv * mv_cnt, 0); - if (ext->reverse_endian && vv->type == NUMERIC) - for (j = 0; j < abs (sv.n_missing_values); j++) + if (r->reverse_endian && vv->type == NUMERIC) + for (j = 0; j < mv_cnt; j++) bswap_flt64 (&mv[j]); if (sv.n_missing_values > 0) { - vv->miss_type = sv.n_missing_values; - if (vv->type == NUMERIC) - for (j = 0; j < sv.n_missing_values; j++) - vv->missing[j].f = mv[j]; - else - for (j = 0; j < sv.n_missing_values; j++) - memcpy (vv->missing[j].s, &mv[j], vv->width); + for (j = 0; j < sv.n_missing_values; j++) + if (vv->type == NUMERIC) + mv_add_num (&vv->miss, mv[j]); + else + mv_add_str (&vv->miss, (char *) &mv[j]); } else { - int x = 0; - if (vv->type == ALPHA) lose ((ME, _("%s: String variable %s may not have missing " "values specified as a range."), - handle_get_filename (h), vv->name)); + fh_get_filename (r->fh), vv->name)); - if (mv[0] == ext->lowest) - { - vv->miss_type = MISSING_LOW; - vv->missing[x++].f = mv[1]; - } - else if (mv[1] == ext->highest) - { - vv->miss_type = MISSING_HIGH; - vv->missing[x++].f = mv[0]; - } + if (mv[0] == r->lowest) + mv_add_num_range (&vv->miss, LOWEST, mv[1]); + else if (mv[1] == r->highest) + mv_add_num_range (&vv->miss, mv[0], HIGHEST); else - { - vv->miss_type = MISSING_RANGE; - vv->missing[x++].f = mv[0]; - vv->missing[x++].f = mv[1]; - } + mv_add_num_range (&vv->miss, mv[0], mv[1]); if (sv.n_missing_values == -3) - { - vv->miss_type += 3; - vv->missing[x++].f = mv[2]; - } + mv_add_num (&vv->miss, mv[2]); } } - else - vv->miss_type = MISSING_NONE; - if (!parse_format_spec (h, sv.print, &vv->print, vv) - || !parse_format_spec (h, sv.write, &vv->write, vv)) - goto lossage; + if (!parse_format_spec (r, sv.print, &vv->print, vv) + || !parse_format_spec (r, sv.write, &vv->write, vv)) + goto error; + + r->vars[i].width = vv->width; + r->vars[i].fv = vv->fv; + } /* Some consistency checks. */ if (long_string_count != 0) lose ((ME, _("%s: Long string continuation records omitted at end of " "dictionary."), - handle_get_filename (h))); - if (next_value != ext->case_size) - lose ((ME, _("%s: System file header indicates %d variable positions but " + fh_get_filename (r->fh))); + + if (next_value != r->value_cnt) + corrupt_msg(MW, _("%s: System file header indicates %d variable positions but " "%d were read from file."), - handle_get_filename (h), ext->case_size, next_value)); + fh_get_filename (r->fh), r->value_cnt, next_value); - return 1; -lossage: - dict_destroy (dict); - ext->dict = NULL; + return 1; +error: return 0; } /* Translates the format spec from sysfile format to internal format. */ static int -parse_format_spec (struct file_handle *h, int32 s, struct fmt_spec *v, struct variable *vv) +parse_format_spec (struct sfm_reader *r, int32 s, + struct fmt_spec *f, struct variable *v) { - v->type = translate_fmt ((s >> 16) & 0xff); - if (v->type == -1) + f->type = translate_fmt ((s >> 16) & 0xff); + if (f->type == -1) lose ((ME, _("%s: Bad format specifier byte (%d)."), - handle_get_filename (h), (s >> 16) & 0xff)); - v->w = (s >> 8) & 0xff; - v->d = s & 0xff; - - /* FIXME? Should verify the resulting specifier more thoroughly. */ + fh_get_filename (r->fh), (s >> 16) & 0xff)); + f->w = (s >> 8) & 0xff; + f->d = s & 0xff; - if (v->type == -1) - lose ((ME, _("%s: Bad format specifier byte (%d)."), - handle_get_filename (h), (s >> 16) & 0xff)); - if ((vv->type == ALPHA) ^ ((formats[v->type].cat & FCAT_STRING) != 0)) + if ((v->type == ALPHA) ^ ((formats[f->type].cat & FCAT_STRING) != 0)) lose ((ME, _("%s: %s variable %s has %s format specifier %s."), - handle_get_filename (h), - vv->type == ALPHA ? _("String") : _("Numeric"), - vv->name, - formats[v->type].cat & FCAT_STRING ? _("string") : _("numeric"), - formats[v->type].name)); + fh_get_filename (r->fh), + v->type == ALPHA ? _("String") : _("Numeric"), + v->name, + formats[f->type].cat & FCAT_STRING ? _("string") : _("numeric"), + formats[f->type].name)); + + if (!check_output_specifier (f, false) + || !check_specifier_width (f, v->width, false)) + { + msg (ME, _("%s variable %s has invalid format specifier %s."), + v->type == NUMERIC ? _("Numeric") : _("String"), + v->name, fmt_to_string (f)); + *f = v->type == NUMERIC ? f8_2 : make_output_format (FMT_A, v->width, 0); + } return 1; -lossage: +error: return 0; } /* Reads value labels from sysfile H and inserts them into the associated dictionary. */ int -read_value_labels (struct file_handle * h, struct variable ** var_by_index) +read_value_labels (struct sfm_reader *r, + struct dictionary *dict, struct variable **var_by_idx) { - struct sfm_fhuser_ext *ext = h->ext; /* File extension record. */ - struct label { - unsigned char raw_value[8]; /* Value as uninterpreted bytes. */ + char raw_value[8]; /* Value as uninterpreted bytes. */ union value value; /* Value. */ char *label; /* Null-terminated label string. */ }; @@ -965,12 +1056,19 @@ read_value_labels (struct file_handle * h, struct variable ** var_by_index) don't know yet whether it is of numeric or string type. */ /* Read number of labels. */ - assertive_bufread (h, &n_labels, sizeof n_labels, 0); - if (ext->reverse_endian) + assertive_buf_read (r, &n_labels, sizeof n_labels, 0); + if (r->reverse_endian) bswap_int32 (&n_labels); + if ( n_labels >= ((int32) ~0) / sizeof *labels) + { + corrupt_msg(MW, _("%s: Invalid number of labels: %d. Ignoring labels."), + fh_get_filename (r->fh), n_labels); + n_labels = 0; + } + /* Allocate memory. */ - labels = xmalloc (n_labels * sizeof *labels); + labels = xcalloc (n_labels, sizeof *labels); for (i = 0; i < n_labels; i++) labels[i].label = NULL; @@ -982,15 +1080,15 @@ read_value_labels (struct file_handle * h, struct variable ** var_by_index) size_t padded_len; /* Read value. */ - assertive_bufread (h, label->raw_value, sizeof label->raw_value, 0); + assertive_buf_read (r, label->raw_value, sizeof label->raw_value, 0); /* Read label length. */ - assertive_bufread (h, &label_len, sizeof label_len, 0); + assertive_buf_read (r, &label_len, sizeof label_len, 0); padded_len = ROUND_UP (label_len + 1, sizeof (flt64)); /* Read label, padding. */ label->label = xmalloc (padded_len + 1); - assertive_bufread (h, label->label, padded_len - 1, 0); + assertive_buf_read (r, label->label, padded_len - 1, 0); label->label[label_len] = 0; } @@ -1001,53 +1099,53 @@ read_value_labels (struct file_handle * h, struct variable ** var_by_index) { int32 rec_type; - assertive_bufread (h, &rec_type, sizeof rec_type, 0); - if (ext->reverse_endian) + assertive_buf_read (r, &rec_type, sizeof rec_type, 0); + if (r->reverse_endian) bswap_int32 (&rec_type); if (rec_type != 4) lose ((ME, _("%s: Variable index record (type 4) does not immediately " "follow value label record (type 3) as it should."), - handle_get_filename (h))); + fh_get_filename (r->fh))); } /* Read number of variables associated with value label from type 4 record. */ - assertive_bufread (h, &n_vars, sizeof n_vars, 0); - if (ext->reverse_endian) + assertive_buf_read (r, &n_vars, sizeof n_vars, 0); + if (r->reverse_endian) bswap_int32 (&n_vars); - if (n_vars < 1 || n_vars > dict_get_var_cnt (ext->dict)) + if (n_vars < 1 || n_vars > dict_get_var_cnt (dict)) lose ((ME, _("%s: Number of variables associated with a value label (%d) " "is not between 1 and the number of variables (%d)."), - handle_get_filename (h), n_vars, dict_get_var_cnt (ext->dict))); + fh_get_filename (r->fh), n_vars, dict_get_var_cnt (dict))); /* Read the list of variables. */ - var = xmalloc (n_vars * sizeof *var); + var = xnmalloc (n_vars, sizeof *var); for (i = 0; i < n_vars; i++) { - int32 var_index; + int32 var_idx; struct variable *v; /* Read variable index, check range. */ - assertive_bufread (h, &var_index, sizeof var_index, 0); - if (ext->reverse_endian) - bswap_int32 (&var_index); - if (var_index < 1 || var_index > ext->case_size) + assertive_buf_read (r, &var_idx, sizeof var_idx, 0); + if (r->reverse_endian) + bswap_int32 (&var_idx); + if (var_idx < 1 || var_idx > r->value_cnt) lose ((ME, _("%s: Variable index associated with value label (%d) is " "not between 1 and the number of values (%d)."), - handle_get_filename (h), var_index, ext->case_size)); + fh_get_filename (r->fh), var_idx, r->value_cnt)); /* Make sure it's a real variable. */ - v = var_by_index[var_index - 1]; + v = var_by_idx[var_idx - 1]; if (v == NULL) lose ((ME, _("%s: Variable index associated with value label (%d) " "refers to a continuation of a string variable, not to " "an actual variable."), - handle_get_filename (h), var_index)); + fh_get_filename (r->fh), var_idx)); if (v->type == ALPHA && v->width > MAX_SHORT_STRING) lose ((ME, _("%s: Value labels are not allowed on long string " "variables (%s)."), - handle_get_filename (h), v->name)); + fh_get_filename (r->fh), v->name)); /* Add it to the list of variables. */ var[i] = v; @@ -1059,7 +1157,7 @@ read_value_labels (struct file_handle * h, struct variable ** var_by_index) lose ((ME, _("%s: Variables associated with value label are not all of " "identical type. Variable %s has %s type, but variable " "%s has %s type."), - handle_get_filename (h), + fh_get_filename (r->fh), var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"), var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric"))); @@ -1070,14 +1168,14 @@ read_value_labels (struct file_handle * h, struct variable ** var_by_index) if (var[0]->type == ALPHA) { - const int copy_len = min (sizeof (label->raw_value), - sizeof (label->label)); + const int copy_len = min (sizeof label->raw_value, + sizeof label->label); memcpy (label->value.s, label->raw_value, copy_len); } else { flt64 f; assert (sizeof f == sizeof label->raw_value); memcpy (&f, label->raw_value, sizeof f); - if (ext->reverse_endian) + if (r->reverse_endian) bswap_flt64 (&f); label->value.f = f; } @@ -1099,11 +1197,11 @@ read_value_labels (struct file_handle * h, struct variable ** var_by_index) if (var[0]->type == NUMERIC) msg (MW, _("%s: File contains duplicate label for value %g for " "variable %s."), - handle_get_filename (h), label->value.f, v->name); + fh_get_filename (r->fh), label->value.f, v->name); else msg (MW, _("%s: File contains duplicate label for value `%.*s' " "for variable %s."), - handle_get_filename (h), v->width, label->value.s, v->name); + fh_get_filename (r->fh), v->width, label->value.s, v->name); } } @@ -1113,7 +1211,7 @@ read_value_labels (struct file_handle * h, struct variable ** var_by_index) free (var); return 1; -lossage: +error: if (labels) { for (i = 0; i < n_labels; i++) @@ -1124,144 +1222,80 @@ lossage: return 0; } -/* Reads NBYTES bytes from the file represented by H. If BUF is +/* Reads BYTE_CNT bytes from the file represented by H. If BUF is non-NULL, uses that as the buffer; otherwise allocates at least - MINALLOC bytes. Returns a pointer to the buffer on success, NULL + MIN_ALLOC bytes. Returns a pointer to the buffer on success, NULL on failure. */ static void * -bufread (struct file_handle * h, void *buf, size_t nbytes, size_t minalloc) +buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc) { - struct sfm_fhuser_ext *ext = h->ext; + assert (r); + + if (buf == NULL && byte_cnt > 0 ) + buf = xmalloc (max (byte_cnt, min_alloc)); - if (buf == NULL) - buf = xmalloc (max (nbytes, minalloc)); - if (1 != fread (buf, nbytes, 1, ext->file)) + if ( byte_cnt == 0 ) + return buf; + + + if (1 != fread (buf, byte_cnt, 1, r->file)) { - if (ferror (ext->file)) + if (ferror (r->file)) msg (ME, _("%s: Reading system file: %s."), - handle_get_filename (h), strerror (errno)); + fh_get_filename (r->fh), strerror (errno)); else corrupt_msg (ME, _("%s: Unexpected end of file."), - handle_get_filename (h)); + fh_get_filename (r->fh)); return NULL; } return buf; } -/* Reads a document record, type 6, from system file H, and sets up +/* Winds the reader BYTE_CNT bytes back in the reader stream. */ +void +buf_unread(struct sfm_reader *r, size_t byte_cnt) +{ + assert(byte_cnt > 0); + + if ( 0 != fseek(r->file, -byte_cnt, SEEK_CUR)) + { + msg (ME, _("%s: Seeking system file: %s."), + fh_get_filename (r->fh), strerror (errno)); + } +} + +/* Reads a document record, type 6, from system file R, and sets up the documents and n_documents fields in the associated dictionary. */ static int -read_documents (struct file_handle * h) +read_documents (struct sfm_reader *r, struct dictionary *dict) { - struct sfm_fhuser_ext *ext = h->ext; - struct dictionary *dict = ext->dict; - int32 n_lines; + int32 line_cnt; char *documents; if (dict_get_documents (dict) != NULL) lose ((ME, _("%s: System file contains multiple " "type 6 (document) records."), - handle_get_filename (h))); + fh_get_filename (r->fh))); - assertive_bufread (h, &n_lines, sizeof n_lines, 0); - if (n_lines <= 0) + assertive_buf_read (r, &line_cnt, sizeof line_cnt, 0); + if (line_cnt <= 0) lose ((ME, _("%s: Number of document lines (%ld) " "must be greater than 0."), - handle_get_filename (h), (long) n_lines)); + fh_get_filename (r->fh), (long) line_cnt)); - documents = bufread (h, NULL, 80 * n_lines, n_lines * 80 + 1); + documents = buf_read (r, NULL, 80 * line_cnt, line_cnt * 80 + 1); /* FIXME? Run through asciify. */ if (documents == NULL) return 0; - documents[80 * n_lines] = '\0'; + documents[80 * line_cnt] = '\0'; dict_set_documents (dict, documents); free (documents); return 1; -lossage: +error: return 0; } - -#if GLOBAL_DEBUGGING -#include "debug-print.h" -/* Displays dictionary DICT on stdout. */ -void -dump_dictionary (struct dictionary * dict) -{ - int i; - - debug_printf ((_("dictionary:\n"))); - for (i = 0; i < dict->nvar; i++) - { - char print[32]; - struct variable *v = dict->var[i]; - int n, j; - - debug_printf ((" var %s", v->name)); - debug_printf (("(type:%s,%d)", (v->type == NUMERIC ? _("num") - : (v->type == ALPHA ? _("str") : "!!!")), - v->width)); - debug_printf (("(fv:%d,%d)", v->fv, v->nv)); - debug_printf (("(left:%s)(miss:", v->left ? _("left") : _("right"))); - - switch (v->miss_type) - { - case MISSING_NONE: - n = 0; - debug_printf ((_("none"))); - break; - case MISSING_1: - n = 1; - debug_printf ((_("one"))); - break; - case MISSING_2: - n = 2; - debug_printf ((_("two"))); - break; - case MISSING_3: - n = 3; - debug_printf ((_("three"))); - break; - case MISSING_RANGE: - n = 2; - debug_printf ((_("range"))); - break; - case MISSING_LOW: - n = 1; - debug_printf ((_("low"))); - break; - case MISSING_HIGH: - n = 1; - debug_printf ((_("high"))); - break; - case MISSING_RANGE_1: - n = 3; - debug_printf ((_("range+1"))); - break; - case MISSING_LOW_1: - n = 2; - debug_printf ((_("low+1"))); - break; - case MISSING_HIGH_1: - n = 2; - debug_printf ((_("high+1"))); - break; - default: - assert (0); - } - for (j = 0; j < n; j++) - if (v->type == NUMERIC) - debug_printf ((",%g", v->missing[j].f)); - else - debug_printf ((",\"%.*s\"", v->width, v->missing[j].s)); - strcpy (print, fmt_to_string (&v->print)); - debug_printf ((")(fmt:%s,%s)(lbl:%s)\n", - print, fmt_to_string (&v->write), - v->label ? v->label : "nolabel")); - } -} -#endif /* Data reader. */ @@ -1269,47 +1303,44 @@ dump_dictionary (struct dictionary * dict) appropriately. Returns nonzero only if both no errors occur and data was read. */ static int -buffer_input (struct file_handle * h) +buffer_input (struct sfm_reader *r) { - struct sfm_fhuser_ext *ext = h->ext; size_t amt; - if (ext->buf == NULL) - ext->buf = xmalloc (sizeof *ext->buf * 128); - amt = fread (ext->buf, sizeof *ext->buf, 128, ext->file); - if (ferror (ext->file)) + if (r->buf == NULL) + r->buf = xnmalloc (128, sizeof *r->buf); + amt = fread (r->buf, sizeof *r->buf, 128, r->file); + if (ferror (r->file)) { msg (ME, _("%s: Error reading file: %s."), - handle_get_filename (h), strerror (errno)); + fh_get_filename (r->fh), strerror (errno)); return 0; } - ext->ptr = ext->buf; - ext->end = &ext->buf[amt]; + r->ptr = r->buf; + r->end = &r->buf[amt]; return amt; } -/* Reads a single case consisting of compressed data from system file - H into the array TEMP[] according to dictionary DICT, and returns - nonzero only if successful. */ -/* Data in system files is compressed in the following manner: - data values are grouped into sets of eight; each of the eight has - one instruction byte, which are output together in an octet; each - byte gives a value for that byte or indicates that the value can be - found following the instructions. */ +/* Reads a single case consisting of compressed data from system + file H into the array BUF[] according to reader R, and + returns nonzero only if successful. */ +/* Data in system files is compressed in this manner. Data + values are grouped into sets of eight ("octets"). Each value + in an octet has one instruction byte that are output together. + Each instruction byte gives a value for that byte or indicates + that the value can be found following the instructions. */ static int -read_compressed_data (struct file_handle * h, flt64 * temp) +read_compressed_data (struct sfm_reader *r, flt64 *buf) { - struct sfm_fhuser_ext *ext = h->ext; + const unsigned char *p_end = r->x + sizeof (flt64); + unsigned char *p = r->y; - const unsigned char *p_end = ext->x + sizeof (flt64); - unsigned char *p = ext->y; - - const flt64 *temp_beg = temp; - const flt64 *temp_end = &temp[ext->case_size]; + const flt64 *buf_beg = buf; + const flt64 *buf_end = &buf[r->value_cnt]; for (;;) { - for (; p < p_end; p++) + for (; p < p_end; p++){ switch (*p) { case 0: @@ -1317,152 +1348,195 @@ read_compressed_data (struct file_handle * h, flt64 * temp) continue; case 252: /* Code 252 is end of file. */ - if (temp_beg != temp) + if (buf_beg != buf) lose ((ME, _("%s: Compressed data is corrupted. Data ends " "in partial case."), - handle_get_filename (h))); - goto lossage; + fh_get_filename (r->fh))); + goto error; case 253: /* Code 253 indicates that the value is stored explicitly following the instruction bytes. */ - if (ext->ptr == NULL || ext->ptr >= ext->end) - if (!buffer_input (h)) + if (r->ptr == NULL || r->ptr >= r->end) + if (!buffer_input (r)) { lose ((ME, _("%s: Unexpected end of file."), - handle_get_filename (h))); - goto lossage; + fh_get_filename (r->fh))); + goto error; } - memcpy (temp++, ext->ptr++, sizeof *temp); - if (temp >= temp_end) - goto winnage; + memcpy (buf++, r->ptr++, sizeof *buf); + if (buf >= buf_end) + goto success; break; case 254: /* Code 254 indicates a string that is all blanks. */ - memset (temp++, ' ', sizeof *temp); - if (temp >= temp_end) - goto winnage; + memset (buf++, ' ', sizeof *buf); + if (buf >= buf_end) + goto success; break; case 255: /* Code 255 indicates the system-missing value. */ - *temp = ext->sysmis; - if (ext->reverse_endian) - bswap_flt64 (temp); - temp++; - if (temp >= temp_end) - goto winnage; + *buf = r->sysmis; + if (r->reverse_endian) + bswap_flt64 (buf); + buf++; + if (buf >= buf_end) + goto success; break; default: /* Codes 1 through 251 inclusive are taken to indicate a value of (BYTE - BIAS), where BYTE is the byte's value and BIAS is the compression bias (generally 100.0). */ - *temp = *p - ext->bias; - if (ext->reverse_endian) - bswap_flt64 (temp); - temp++; - if (temp >= temp_end) - goto winnage; + *buf = *p - r->bias; + if (r->reverse_endian) + bswap_flt64 (buf); + buf++; + if (buf >= buf_end) + goto success; break; } - + } /* We have reached the end of this instruction octet. Read another. */ - if (ext->ptr == NULL || ext->ptr >= ext->end) - if (!buffer_input (h)) + if (r->ptr == NULL || r->ptr >= r->end) + if (!buffer_input (r)) { - if (temp_beg != temp) + if (buf_beg != buf) lose ((ME, _("%s: Unexpected end of file."), - handle_get_filename (h))); - goto lossage; + fh_get_filename (r->fh))); + goto error; } - memcpy (ext->x, ext->ptr++, sizeof *temp); - p = ext->x; + memcpy (r->x, r->ptr++, sizeof *buf); + p = r->x; } /* Not reached. */ assert (0); -winnage: +success: /* We have filled up an entire record. Update state and return successfully. */ - ext->y = ++p; + r->y = ++p; return 1; -lossage: +error: /* We have been unsuccessful at filling a record, either through i/o error or through an end-of-file indication. Update state and return unsuccessfully. */ return 0; } -/* Reads one case from system file H into the value array PERM - according to the instructions given in associated dictionary DICT, - which must have the get.* elements appropriately set. Returns - nonzero only if successful. */ +/* Reads one case from READER's file into C. Returns nonzero + only if successful. */ int -sfm_read_case (struct file_handle * h, union value * perm, struct dictionary * dict) +sfm_read_case (struct sfm_reader *r, struct ccase *c) { - struct sfm_fhuser_ext *ext = h->ext; - - size_t nbytes; - flt64 *temp; - - int i; - - /* The first concern is to obtain a full case relative to the data - file. (Cases in the data file have no particular relationship to - cases in the active file.) */ - nbytes = sizeof *temp * ext->case_size; - temp = local_alloc (nbytes); - - if (ext->compressed == 0) + if (!r->compressed && sizeof (flt64) == sizeof (double)) { - size_t amt = fread (temp, 1, nbytes, ext->file); + /* Fast path: external and internal representations are the + same, except possibly for endianness or SYSMIS. Read + directly into the case's buffer, then fix up any minor + details as needed. */ + if (!fread_ok (r, case_data_all_rw (c), + sizeof (union value) * r->value_cnt)) + return 0; + + /* Fix up endianness if needed. */ + if (r->reverse_endian) + { + int i; + + for (i = 0; i < r->value_cnt; i++) + if (r->vars[i].width == 0) + bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f); + } - if (amt != nbytes) - { - if (ferror (ext->file)) - msg (ME, _("%s: Reading system file: %s."), - handle_get_filename (h), strerror (errno)); - else if (amt != 0) - msg (ME, _("%s: Partial record at end of system file."), - handle_get_filename (h)); - goto lossage; - } + /* Fix up SYSMIS values if needed. + I don't think this will ever actually kick in, but it + can't hurt. */ + if (r->sysmis != SYSMIS) + { + int i; + + for (i = 0; i < r->value_cnt; i++) + if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis) + case_data_rw (c, r->vars[i].fv)->f = SYSMIS; + } } - else if (!read_compressed_data (h, temp)) - goto lossage; - - /* Translate a case in data file format to a case in active file - format. */ - for (i = 0; i < dict_get_var_cnt (dict); i++) + else { - struct variable *v = dict_get_var (dict, i); - - if (v->get.fv == -1) - continue; - - if (v->type == NUMERIC) - { - flt64 src = temp[v->get.fv]; - if (ext->reverse_endian) - bswap_flt64 (&src); - perm[v->fv].f = src == ext->sysmis ? SYSMIS : src; - } + /* Slow path: internal and external representations differ. + Read into a bounce buffer, then copy to C. */ + flt64 *bounce; + flt64 *bounce_cur; + size_t bounce_size; + int read_ok; + int i; + + bounce_size = sizeof *bounce * r->value_cnt; + bounce = bounce_cur = local_alloc (bounce_size); + + if (!r->compressed) + read_ok = fread_ok (r, bounce, bounce_size); else - memcpy (&perm[v->fv].s, &temp[v->get.fv], v->width); - } + read_ok = read_compressed_data (r, bounce); + if (!read_ok) + { + local_free (bounce); + return 0; + } - local_free (temp); - return 1; + for (i = 0; i < r->value_cnt; i++) + { + struct sfm_var *v = &r->vars[i]; + + if (v->width == 0) + { + flt64 f = *bounce_cur++; + if (r->reverse_endian) + bswap_flt64 (&f); + case_data_rw (c, v->fv)->f = f == r->sysmis ? SYSMIS : f; + } + else if (v->width != -1) + { + memcpy (case_data_rw (c, v->fv)->s, bounce_cur, v->width); + bounce_cur += DIV_RND_UP (v->width, sizeof (flt64)); + } + } -lossage: - local_free (temp); - return 0; + local_free (bounce); + } + return 1; } -static struct fh_ext_class sfm_r_class = +static int +fread_ok (struct sfm_reader *r, void *buffer, size_t byte_cnt) { - 3, - N_("reading as a system file"), - sfm_close, -}; + size_t read_bytes = fread (buffer, 1, byte_cnt, r->file); + + if (read_bytes == byte_cnt) + return 1; + else + { + if (ferror (r->file)) + msg (ME, _("%s: Reading system file: %s."), + fh_get_filename (r->fh), strerror (errno)); + else if (read_bytes != 0) + msg (ME, _("%s: Partial record at end of system file."), + fh_get_filename (r->fh)); + return 0; + } +} + +/* Returns true if FILE is an SPSS system file, + false otherwise. */ +bool +sfm_detect (FILE *file) +{ + struct sysfile_header hdr; + + if (fread (&hdr, sizeof hdr, 1, file) != 1) + return false; + if (strncmp ("$FL2", hdr.rec_type, 4)) + return false; + return true; +}