X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fsys-file-reader.c;h=b0a41a83573b0e986fb35bfb3bce46379282662f;hb=b5c82cc9aabe7e641011130240ae1b2e84348e23;hp=7ccf969b775e9ea763d1b35cae40bc345d696ac3;hpb=3f2ed1c5fe6dc692ca00bb18a15e41617fa2d37d;p=pspp-builds.git diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index 7ccf969b..b0a41a83 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -1,1540 +1,2132 @@ -/* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. - Written by Ben Pfaff . +/* PSPP - a program for statistical analysis. + Copyright (C) 1997-9, 2000, 2006, 2007, 2009 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ #include -#include +#include +#include + #include #include -#include +#include +#include +#include -#include +#include +#include #include #include -#include #include +#include #include - -#include "sys-file-reader.h" -#include "sfm-private.h" -#include "case.h" -#include "dictionary.h" -#include "file-handle-def.h" -#include "file-name.h" -#include "format.h" -#include "value-labels.h" -#include "variable.h" +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "c-ctype.h" +#include "inttostr.h" +#include "minmax.h" +#include "unlocked-io.h" +#include "xalloc.h" +#include "xsize.h" #include "gettext.h" #define _(msgid) gettext (msgid) +#define N_(msgid) (msgid) /* System file reader. */ struct sfm_reader { - struct file_handle *fh; /* File handle. */ - FILE *file; /* File stream. */ + /* Resource tracking. */ + struct pool *pool; /* All system file state. */ + jmp_buf bail_out; /* longjmp() target for error handling. */ - int reverse_endian; /* 1=file has endianness opposite us. */ - int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */ - int value_cnt; /* Number of `union values's per case. */ - long case_cnt; /* Number of cases, -1 if unknown. */ - int compressed; /* 1=compressed, 0=not compressed. */ + /* File state. */ + struct file_handle *fh; /* File handle. */ + struct fh_lock *lock; /* Mutual exclusion for file handle. */ + FILE *file; /* File stream. */ + bool error; /* I/O or corruption error? */ + struct caseproto *proto; /* Format of output cases. */ + + /* File format. */ + enum integer_format integer_format; /* On-disk integer format. */ + enum float_format float_format; /* On-disk floating point format. */ + int oct_cnt; /* Number of 8-byte units per case. */ + struct sfm_var *sfm_vars; /* Variables. */ + size_t sfm_var_cnt; /* Number of variables. */ + casenumber case_cnt; /* Number of cases */ + bool has_long_var_names; /* File has a long variable name map */ + + /* Decompression. */ + bool compressed; /* File is compressed? */ double bias; /* Compression bias, usually 100.0. */ - int weight_idx; /* 0-based index of weighting variable, or -1. */ - bool ok; /* False after an I/O error or corrupt data. */ + uint8_t opcodes[8]; /* Current block of opcodes. */ + size_t opcode_idx; /* Next opcode to interpret, 8 if none left. */ + }; - /* Variables. */ - struct sfm_var *vars; /* Variables. */ +static const struct casereader_class sys_file_casereader_class; - /* File's special constants. */ - flt64 sysmis; - flt64 highest; - flt64 lowest; +static bool close_reader (struct sfm_reader *); - /* Decompression buffer. */ - flt64 *buf; /* Buffer data. */ - flt64 *ptr; /* Current location in buffer. */ - flt64 *end; /* End of buffer data. */ +static struct variable **make_var_by_value_idx (struct sfm_reader *, + struct dictionary *); +static struct variable *lookup_var_by_value_idx (struct sfm_reader *, + struct variable **, + int value_idx); - /* Compression instruction octet. */ - unsigned char x[8]; /* Current instruction octet. */ - unsigned char *y; /* Location in current instruction octet. */ - }; +static void sys_msg (struct sfm_reader *r, int class, + const char *format, va_list args) + PRINTF_FORMAT (3, 0); +static void sys_warn (struct sfm_reader *, const char *, ...) + PRINTF_FORMAT (2, 3); +static void sys_error (struct sfm_reader *, const char *, ...) + PRINTF_FORMAT (2, 3) + NO_RETURN; + +static void read_bytes (struct sfm_reader *, void *, size_t); +static bool try_read_bytes (struct sfm_reader *, void *, size_t); +static int read_int (struct sfm_reader *); +static double read_float (struct sfm_reader *); +static void read_string (struct sfm_reader *, char *, size_t); +static void skip_bytes (struct sfm_reader *, size_t); + +static struct text_record *open_text_record (struct sfm_reader *, size_t size); +static void close_text_record (struct sfm_reader *r, + struct text_record *); +static bool read_variable_to_value_pair (struct sfm_reader *, + struct dictionary *, + struct text_record *, + struct variable **var, char **value); +static void text_warn (struct sfm_reader *r, struct text_record *text, + const char *format, ...) + PRINTF_FORMAT (3, 4); +static char *text_get_token (struct text_record *, + struct substring delimiters); +static bool text_match (struct text_record *, char c); +static bool text_read_short_name (struct sfm_reader *, struct dictionary *, + struct text_record *, + struct substring delimiters, + struct variable **); + +static bool close_reader (struct sfm_reader *r); + +/* Dictionary reader. */ -/* A variable in a system file. */ -struct sfm_var +enum which_format { - int width; /* 0=numeric, otherwise string width. */ - int fv; /* Index into case. */ + PRINT_FORMAT, + WRITE_FORMAT }; - -/* Utilities. */ -/* Swap bytes *A and *B. */ -static inline void -bswap (char *a, char *b) +static void read_header (struct sfm_reader *, struct dictionary *, + int *weight_idx, int *claimed_oct_cnt, + struct sfm_read_info *); +static void read_variable_record (struct sfm_reader *, struct dictionary *, + int *format_warning_cnt); +static void parse_format_spec (struct sfm_reader *, unsigned int, + enum which_format, struct variable *, + int *format_warning_cnt); +static void setup_weight (struct sfm_reader *, int weight_idx, + struct variable **var_by_value_idx, + struct dictionary *); +static void read_documents (struct sfm_reader *, struct dictionary *); +static void read_value_labels (struct sfm_reader *, struct dictionary *, + struct variable **var_by_value_idx); + +static void read_extension_record (struct sfm_reader *, struct dictionary *, + struct sfm_read_info *); +static void read_machine_integer_info (struct sfm_reader *, + size_t size, size_t count, + struct sfm_read_info *, + struct dictionary * + ); +static void read_machine_float_info (struct sfm_reader *, + size_t size, size_t count); +static void read_display_parameters (struct sfm_reader *, + size_t size, size_t count, + struct dictionary *); +static void read_long_var_name_map (struct sfm_reader *, + size_t size, size_t count, + struct dictionary *); +static void read_long_string_map (struct sfm_reader *, + size_t size, size_t count, + struct dictionary *); +static void read_data_file_attributes (struct sfm_reader *, + size_t size, size_t count, + struct dictionary *); +static void read_variable_attributes (struct sfm_reader *, + size_t size, size_t count, + struct dictionary *); +static void read_long_string_value_labels (struct sfm_reader *, + size_t size, size_t count, + struct dictionary *); + +/* Convert all the strings in DICT from the dict encoding to UTF8 */ +static void +recode_strings (struct dictionary *dict) { - char t = *a; - *a = *b; - *b = t; -} + int i; -/* Reverse the byte order of 32-bit integer *X. */ -static inline void -bswap_int32 (int32_t *x_) -{ - char *x = (char *) x_; - bswap (x + 0, x + 3); - bswap (x + 1, x + 2); -} + const char *enc = dict_get_encoding (dict); -/* Reverse the byte order of 64-bit floating point *X. */ -static inline void -bswap_flt64 (flt64 *x_) -{ - char *x = (char *) x_; - bswap (x + 0, x + 7); - bswap (x + 1, x + 6); - bswap (x + 2, x + 5); - bswap (x + 3, x + 4); -} + if ( NULL == enc) + enc = get_default_encoding (); -static void -corrupt_msg (int class, const char *format,...) - PRINTF_FORMAT (2, 3); + for (i = 0 ; i < dict_get_var_cnt (dict); ++i) + { + /* Convert the long variable name */ + struct variable *var = dict_get_var (dict, i); + const char *native_name = var_get_name (var); + char *utf8_name = recode_string (UTF8, enc, native_name, -1); + if ( 0 != strcmp (utf8_name, native_name)) + { + if ( NULL == dict_lookup_var (dict, utf8_name)) + dict_rename_var (dict, var, utf8_name); + else + msg (MW, + _("Recoded variable name duplicates an existing `%s' within system file."), utf8_name); + } -/* Displays a corrupt sysfile error. */ -static void -corrupt_msg (int class, const char *format,...) -{ - struct error e; - va_list args; + free (utf8_name); - e.class = class; - e.where.file_name = NULL; - e.where.line_number = 0; - e.title = _("corrupt system file: "); + /* Convert the variable label */ + if (var_has_label (var)) + { + char *utf8_label = recode_string (UTF8, enc, var_get_label (var), -1); + var_set_label (var, utf8_label); + free (utf8_label); + } - va_start (args, format); - err_vmsg (&e, format, args); - va_end (args); -} + if (var_has_value_labels (var)) + { + const struct val_lab *vl = NULL; + const struct val_labs *vlabs = var_get_value_labels (var); -/* Closes a system file after we're done with it. */ -void -sfm_close_reader (struct sfm_reader *r) -{ - if (r == NULL) - return; + for (vl = val_labs_first (vlabs); vl != NULL; vl = val_labs_next (vlabs, vl)) + { + const union value *val = val_lab_get_value (vl); + const char *label = val_lab_get_label (vl); + char *new_label = NULL; - if (r->file) - { - if (fn_close (fh_get_file_name (r->fh), r->file) == EOF) - msg (ME, _("%s: Closing system file: %s."), - fh_get_file_name (r->fh), strerror (errno)); - r->file = NULL; - } + new_label = recode_string (UTF8, enc, label, -1); - if (r->fh != NULL) - fh_close (r->fh, "system file", "rs"); - - free (r->vars); - free (r->buf); - free (r); + var_replace_value_label (var, val, new_label); + free (new_label); + } + } + } } - -/* Dictionary reader. */ - -static void buf_unread(struct sfm_reader *r, size_t byte_cnt); - -static void *buf_read (struct sfm_reader *, void *buf, size_t byte_cnt, - size_t min_alloc); - -static int read_header (struct sfm_reader *, - struct dictionary *, struct sfm_read_info *); -static int parse_format_spec (struct sfm_reader *, int32_t, - struct fmt_spec *, const struct variable *); -static int read_value_labels (struct sfm_reader *, struct dictionary *, - struct variable **var_by_idx); -static int read_variables (struct sfm_reader *, - struct dictionary *, struct variable ***var_by_idx); -static int read_machine_int32_info (struct sfm_reader *, int size, int count); -static int read_machine_flt64_info (struct sfm_reader *, int size, int count); -static int read_documents (struct sfm_reader *, struct dictionary *); - -static int fread_ok (struct sfm_reader *, void *, size_t); - -/* Displays the message X with corrupt_msg, then jumps to the error - label. */ -#define lose(X) \ - do { \ - corrupt_msg X; \ - goto error; \ - } while (0) - -/* Calls buf_read with the specified arguments, and jumps to - error if the read fails. */ -#define assertive_buf_read(a,b,c,d) \ - do { \ - if (!buf_read (a,b,c,d)) \ - goto error; \ - } while (0) /* Opens the system file designated by file handle FH for reading. Reads the system file's dictionary into *DICT. If INFO is non-null, then it receives additional info about the system file. */ -struct sfm_reader * +struct casereader * sfm_open_reader (struct file_handle *fh, struct dictionary **dict, - struct sfm_read_info *info) + struct sfm_read_info *volatile info) { - struct sfm_reader *r = NULL; - struct variable **var_by_idx = NULL; + struct sfm_reader *volatile r = NULL; + struct variable **var_by_value_idx; + struct sfm_read_info local_info; + int format_warning_cnt = 0; + int weight_idx; + int claimed_oct_cnt; + int rec_type; *dict = dict_create (); - if (!fh_open (fh, FH_REF_FILE, "system file", "rs")) - goto error; /* Create and initialize reader. */ - r = xmalloc (sizeof *r); - r->fh = fh; - r->file = fn_open (fh_get_file_name (fh), "rb"); - - r->reverse_endian = 0; - r->fix_specials = 0; - r->value_cnt = 0; - r->case_cnt = 0; - r->compressed = 0; - r->bias = 100.0; - r->weight_idx = -1; - r->ok = true; - - r->vars = NULL; - - r->sysmis = -FLT64_MAX; - r->highest = FLT64_MAX; - r->lowest = second_lowest_flt64; - - r->buf = r->ptr = r->end = NULL; - r->y = r->x + sizeof r->x; + r = pool_create_container (struct sfm_reader, pool); + r->fh = fh_ref (fh); + r->lock = NULL; + r->file = NULL; + r->error = false; + r->oct_cnt = 0; + r->has_long_var_names = false; + r->opcode_idx = sizeof r->opcodes; + + /* TRANSLATORS: this fragment will be interpolated into + messages in fh_lock() that identify types of files. */ + r->lock = fh_lock (fh, FH_REF_FILE, N_("system file"), FH_ACC_READ, false); + if (r->lock == NULL) + goto error; - /* Check that file open succeeded. */ + r->file = fn_open (fh_get_file_name (fh), "rb"); if (r->file == NULL) { - msg (ME, _("An error occurred while opening \"%s\" for reading " - "as a system file: %s."), + msg (ME, _("Error opening \"%s\" for reading as a system file: %s."), fh_get_file_name (r->fh), strerror (errno)); goto error; } - /* Read header and variables. */ - if (!read_header (r, *dict, info) || !read_variables (r, *dict, &var_by_idx)) + /* Initialize info. */ + if (info == NULL) + info = &local_info; + memset (info, 0, sizeof *info); + + if (setjmp (r->bail_out)) goto error; - /* Handle weighting. */ - if (r->weight_idx != -1) + /* Read header. */ + read_header (r, *dict, &weight_idx, &claimed_oct_cnt, info); + + /* Read all the variable definition records. */ + rec_type = read_int (r); + while (rec_type == 2) { - struct variable *weight_var; + read_variable_record (r, *dict, &format_warning_cnt); + rec_type = read_int (r); + } - if (r->weight_idx < 0 || r->weight_idx >= r->value_cnt) - lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 " - "and number of elements per case (%d)."), - fh_get_file_name (r->fh), r->weight_idx, r->value_cnt)); + /* Figure out the case format. */ + var_by_value_idx = make_var_by_value_idx (r, *dict); + setup_weight (r, weight_idx, var_by_value_idx, *dict); + /* Read all the rest of the dictionary records. */ + while (rec_type != 999) + { + switch (rec_type) + { + case 3: + read_value_labels (r, *dict, var_by_value_idx); + break; - weight_var = var_by_idx[r->weight_idx]; + case 4: + sys_error (r, _("Misplaced type 4 record.")); - if (weight_var == NULL) - lose ((ME, - _("%s: Weighting variable may not be a continuation of " - "a long string variable."), fh_get_file_name (fh))); - else if (weight_var->type == ALPHA) - lose ((ME, _("%s: Weighting variable may not be a string variable."), - fh_get_file_name (fh))); + case 6: + read_documents (r, *dict); + break; - dict_set_weight (*dict, weight_var); - } - else - dict_set_weight (*dict, NULL); + case 7: + read_extension_record (r, *dict, info); + break; - /* Read records of types 3, 4, 6, and 7. */ - for (;;) - { - int32_t rec_type; + default: + sys_error (r, _("Unrecognized record type %d."), rec_type); + } + rec_type = read_int (r); + } - assertive_buf_read (r, &rec_type, sizeof rec_type, 0); - if (r->reverse_endian) - bswap_int32 (&rec_type); - switch (rec_type) + if ( ! r->has_long_var_names ) + { + int i; + for (i = 0; i < dict_get_var_cnt (*dict); i++) { - case 3: - if (!read_value_labels (r, *dict, var_by_idx)) - goto error; - break; - - case 4: - lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 " - "records must always immediately follow type 3 " - "records."), - fh_get_file_name (r->fh))); - - case 6: - if (!read_documents (r, *dict)) - goto error; - break; - - case 7: - { - struct - { - int32_t subtype P; - int32_t size P; - int32_t count P; - } - data; - unsigned long bytes; - - int skip = 0; - - assertive_buf_read (r, &data, sizeof data, 0); - if (r->reverse_endian) - { - bswap_int32 (&data.subtype); - bswap_int32 (&data.size); - bswap_int32 (&data.count); - } - bytes = data.size * data.count; - if (bytes < data.size || bytes < data.count) - lose ((ME, "%s: Record type %d subtype %d too large.", - fh_get_file_name (r->fh), rec_type, data.subtype)); - - switch (data.subtype) - { - case 3: - if (!read_machine_int32_info (r, data.size, data.count)) - goto error; - break; - - case 4: - if (!read_machine_flt64_info (r, data.size, data.count)) - goto error; - break; - - case 5: - case 6: /* ?? Used by SPSS 8.0. */ - skip = 1; - break; - - case 11: /* Variable display parameters */ - { - const int n_vars = data.count / 3 ; - int i; - if ( data.count % 3 || n_vars > dict_get_var_cnt(*dict) ) - { - msg (MW, _("%s: Invalid subrecord length. " - "Record: 7; Subrecord: 11"), - fh_get_file_name (r->fh)); - skip = 1; - } - - for ( i = 0 ; i < min(n_vars, dict_get_var_cnt(*dict)) ; ++i ) - { - struct - { - int32_t measure P; - int32_t width P; - int32_t align P; - } - params; - - struct variable *v; - - assertive_buf_read (r, ¶ms, sizeof(params), 0); - - v = dict_get_var(*dict, i); - - v->measure = params.measure; - v->display_width = params.width; - v->alignment = params.align; - } - } - break; - - case 13: /* SPSS 12.0 Long variable name map */ - { - char *buf, *short_name, *save_ptr; - int idx; - - /* Read data. */ - buf = xmalloc (bytes + 1); - if (!buf_read (r, buf, bytes, 0)) - { - free (buf); - goto error; - } - buf[bytes] = '\0'; - - /* Parse data. */ - for (short_name = strtok_r (buf, "=", &save_ptr), idx = 0; - short_name != NULL; - short_name = strtok_r (NULL, "=", &save_ptr), idx++) - { - char *long_name = strtok_r (NULL, "\t", &save_ptr); - struct variable *v; - - /* Validate long name. */ - if (long_name == NULL) - { - msg (MW, _("%s: Trailing garbage in long variable " - "name map."), - fh_get_file_name (r->fh)); - break; - } - if (!var_is_valid_name (long_name, false)) - { - msg (MW, _("%s: Long variable mapping to invalid " - "variable name `%s'."), - fh_get_file_name (r->fh), long_name); - break; - } - - /* Find variable using short name. */ - v = dict_lookup_var (*dict, short_name); - if (v == NULL) - { - msg (MW, _("%s: Long variable mapping for " - "nonexistent variable %s."), - fh_get_file_name (r->fh), short_name); - break; - } - - /* Identify any duplicates. */ - if ( compare_var_names(short_name, long_name, 0) && - NULL != dict_lookup_var (*dict, long_name)) - lose ((ME, _("%s: Duplicate long variable name `%s' " - "within system file."), - fh_get_file_name (r->fh), long_name)); - - - /* Set long name. - Renaming a variable may clear the short - name, but we want to retain it, so - re-set it explicitly. */ - dict_rename_var (*dict, v, long_name); - var_set_short_name (v, short_name); - - /* For compatability, make sure dictionary - is in long variable name map order. In - the common case, this has no effect, - because the dictionary and the long - variable name map are already in the - same order. */ - dict_reorder_var (*dict, v, idx); - } - - /* Free data. */ - free (buf); - } - break; - - default: - msg (MW, _("%s: Unrecognized record type 7, subtype %d " - "encountered in system file."), - fh_get_file_name (r->fh), data.subtype); - skip = 1; - } - - if (skip) - { - void *x = buf_read (r, NULL, data.size * data.count, 0); - if (x == NULL) - goto error; - free (x); - } - } - break; + struct variable *var = dict_get_var (*dict, i); + char short_name[SHORT_NAME_LEN + 1]; + char long_name[SHORT_NAME_LEN + 1]; - case 999: - { - int32_t filler; + strcpy (short_name, var_get_name (var)); - assertive_buf_read (r, &filler, sizeof filler, 0); - goto success; - } + strcpy (long_name, short_name); + str_lowercase (long_name); - default: - corrupt_msg(MW, _("%s: Unrecognized record type %d."), - fh_get_file_name (r->fh), rec_type); + /* Set long name. Renaming a variable may clear the short + name, but we want to retain it, so re-set it + explicitly. */ + dict_rename_var (*dict, var, long_name); + var_set_short_name (var, 0, short_name); } + + r->has_long_var_names = true; } -success: - /* Come here on successful completion. */ - free (var_by_idx); - return r; + recode_strings (*dict); + + /* Read record 999 data, which is just filler. */ + read_int (r); + + /* Warn if the actual amount of data per case differs from the + amount that the header claims. SPSS version 13 gets this + wrong when very long strings are involved, so don't warn in + that case. */ + if (claimed_oct_cnt != -1 && claimed_oct_cnt != r->oct_cnt + && info->version_major != 13) + sys_warn (r, _("File header claims %d variable positions but " + "%d were read from file."), + claimed_oct_cnt, r->oct_cnt); + + /* Create an index of dictionary variable widths for + sfm_read_case to use. We cannot use the `struct variable's + from the dictionary we created, because the caller owns the + dictionary and may destroy or modify its variables. */ + sfm_dictionary_to_sfm_vars (*dict, &r->sfm_vars, &r->sfm_var_cnt); + pool_register (r->pool, free, r->sfm_vars); + r->proto = caseproto_ref_pool (dict_get_proto (*dict), r->pool); + + pool_free (r->pool, var_by_value_idx); + return casereader_create_sequential + (NULL, r->proto, + r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt, + &sys_file_casereader_class, r); error: - /* Come here on unsuccessful completion. */ - sfm_close_reader (r); - free (var_by_idx); - if (*dict != NULL) - { - dict_destroy (*dict); - *dict = NULL; - } + close_reader (r); + dict_destroy (*dict); + *dict = NULL; return NULL; } -/* Read record type 7, subtype 3. */ -static int -read_machine_int32_info (struct sfm_reader *r, int size, int count) +/* Closes a system file after we're done with it. + Returns true if an I/O error has occurred on READER, false + otherwise. */ +static bool +close_reader (struct sfm_reader *r) { - int32_t data[8]; - int file_bigendian; + bool error; - int i; + if (r == NULL) + return true; + + if (r->file) + { + if (fn_close (fh_get_file_name (r->fh), r->file) == EOF) + { + msg (ME, _("Error closing system file \"%s\": %s."), + fh_get_file_name (r->fh), strerror (errno)); + r->error = true; + } + r->file = NULL; + } - if (size != sizeof (int32_t) || count != 8) - lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, " - "subtype 3. Expected size %d, count 8."), - fh_get_file_name (r->fh), size, count, sizeof (int32_t))); - - assertive_buf_read (r, data, sizeof data, 0); - if (r->reverse_endian) - for (i = 0; i < 8; i++) - bswap_int32 (&data[i]); - -#ifdef FPREP_IEEE754 - if (data[4] != 1) - lose ((ME, _("%s: Floating-point representation in system file is not " - "IEEE-754. PSPP cannot convert between floating-point " - "formats."), - fh_get_file_name (r->fh))); -#else -#error Add support for your floating-point format. -#endif - -#ifdef WORDS_BIGENDIAN - file_bigendian = 1; -#else - file_bigendian = 0; -#endif - if (r->reverse_endian) - file_bigendian ^= 1; - if (file_bigendian ^ (data[6] == 1)) - lose ((ME, _("%s: File-indicated endianness (%s) does not match " - "endianness intuited from file header (%s)."), - fh_get_file_name (r->fh), - file_bigendian ? _("big-endian") : _("little-endian"), - data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian") - : _("unknown")))); - - /* PORTME: Character representation code. */ - if (data[7] != 2 && data[7] != 3) - lose ((ME, _("%s: File-indicated character representation code (%s) is " - "not ASCII."), - fh_get_file_name (r->fh), - (data[7] == 1 ? "EBCDIC" - : (data[7] == 4 ? _("DEC Kanji") : _("Unknown"))))); - - return 1; + fh_unlock (r->lock); + fh_unref (r->fh); -error: - return 0; + error = r->error; + pool_destroy (r->pool); + + return !error; } -/* Read record type 7, subtype 4. */ -static int -read_machine_flt64_info (struct sfm_reader *r, int size, int count) +/* Destroys READER. */ +static void +sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_) { - flt64 data[3]; - int i; - - if (size != sizeof (flt64) || count != 3) - lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, " - "subtype 4. Expected size %d, count 8."), - fh_get_file_name (r->fh), size, count, sizeof (flt64))); + struct sfm_reader *r = r_; + close_reader (r); +} - assertive_buf_read (r, data, sizeof data, 0); - if (r->reverse_endian) - for (i = 0; i < 3; i++) - bswap_flt64 (&data[i]); +/* Returns true if FILE is an SPSS system file, + false otherwise. */ +bool +sfm_detect (FILE *file) +{ + char rec_type[5]; - if (data[0] != SYSMIS || data[1] != FLT64_MAX - || data[2] != second_lowest_flt64) - { - r->sysmis = data[0]; - r->highest = data[1]; - r->lowest = data[2]; - msg (MW, _("%s: File-indicated value is different from internal value " - "for at least one of the three system values. SYSMIS: " - "indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: " - "%g, %g."), - fh_get_file_name (r->fh), (double) data[0], (double) SYSMIS, - (double) data[1], (double) FLT64_MAX, - (double) data[2], (double) second_lowest_flt64); - } - - return 1; + if (fread (rec_type, 4, 1, file) != 1) + return false; + rec_type[4] = '\0'; -error: - return 0; + return !strcmp ("$FL2", rec_type); } - -static int -read_header (struct sfm_reader *r, - struct dictionary *dict, struct sfm_read_info *info) + +/* Reads the global header of the system file. + Sets DICT's file label to the system file's label. + Sets *WEIGHT_IDX to 0 if the system file is unweighted, + or to the value index of the weight variable otherwise. + Sets *CLAIMED_OCT_CNT to the number of "octs" (8-byte units) + per case that the file claims to have (although it is not + always correct). + Initializes INFO with header information. */ +static void +read_header (struct sfm_reader *r, struct dictionary *dict, + int *weight_idx, int *claimed_oct_cnt, + struct sfm_read_info *info) { - struct sysfile_header hdr; /* Disk buffer. */ - char prod_name[sizeof hdr.prod_name + 1]; /* Buffer for product name. */ - int skip_amt = 0; /* Amount of product name to omit. */ - int i; + char rec_type[5]; + char eye_catcher[61]; + uint8_t raw_layout_code[4]; + uint8_t raw_bias[8]; + char creation_date[10]; + char creation_time[9]; + char file_label[65]; + struct substring file_label_ss; + struct substring product; + + read_string (r, rec_type, sizeof rec_type); + read_string (r, eye_catcher, sizeof eye_catcher); + + if (strcmp ("$FL2", rec_type) != 0) + sys_error (r, _("This is not an SPSS system file.")); + + /* Identify integer format. */ + read_bytes (r, raw_layout_code, sizeof raw_layout_code); + if ((!integer_identify (2, raw_layout_code, sizeof raw_layout_code, + &r->integer_format) + && !integer_identify (3, raw_layout_code, sizeof raw_layout_code, + &r->integer_format)) + || (r->integer_format != INTEGER_MSB_FIRST + && r->integer_format != INTEGER_LSB_FIRST)) + sys_error (r, _("This is not an SPSS system file.")); + + *claimed_oct_cnt = read_int (r); + if (*claimed_oct_cnt < 0 || *claimed_oct_cnt > INT_MAX / 16) + *claimed_oct_cnt = -1; + + r->compressed = read_int (r) != 0; + + *weight_idx = read_int (r); + + r->case_cnt = read_int (r); + if ( r->case_cnt > INT_MAX / 2) + r->case_cnt = -1; + + + /* Identify floating-point format and obtain compression bias. */ + read_bytes (r, raw_bias, sizeof raw_bias); + if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0) + { + uint8_t zero_bias[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; - /* Read header, check magic. */ - assertive_buf_read (r, &hdr, sizeof hdr, 0); - if (strncmp ("$FL2", hdr.rec_type, 4) != 0) - lose ((ME, _("%s: Bad magic. Proper system files begin with " - "the four characters `$FL2'. This file will not be read."), - fh_get_file_name (r->fh))); - - /* Check eye-category.her string. */ - memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name); - for (i = 0; i < 60; i++) - if (!c_isprint ((unsigned char) prod_name[i])) - prod_name[i] = ' '; - for (i = 59; i >= 0; i--) - if (!c_isgraph ((unsigned char) prod_name[i])) - { - prod_name[i] = '\0'; - break; - } - prod_name[60] = '\0'; - - { -#define N_PREFIXES 2 - static const char *prefix[N_PREFIXES] = - { - "@(#) SPSS DATA FILE", - "SPSS SYSTEM FILE.", - }; + if (memcmp (raw_bias, zero_bias, 8)) + sys_warn (r, _("Compression bias is not the usual " + "value of 100, or system file uses unrecognized " + "floating-point format.")); + else + { + /* Some software is known to write all-zeros to this + field. Such software also writes floating-point + numbers in the format that we expect by default + (it seems that all software most likely does, in + reality), so don't warn in this case. */ + } - int i; + if (r->integer_format == INTEGER_MSB_FIRST) + r->float_format = FLOAT_IEEE_DOUBLE_BE; + else + r->float_format = FLOAT_IEEE_DOUBLE_LE; + } + float_convert (r->float_format, raw_bias, FLOAT_NATIVE_DOUBLE, &r->bias); - for (i = 0; i < N_PREFIXES; i++) - if (!strncmp (prefix[i], hdr.prod_name, strlen (prefix[i]))) - { - skip_amt = strlen (prefix[i]); - break; - } - } - - /* Check endianness. */ - if (hdr.layout_code == 2) - r->reverse_endian = 0; - else + read_string (r, creation_date, sizeof creation_date); + read_string (r, creation_time, sizeof creation_time); + read_string (r, file_label, sizeof file_label); + skip_bytes (r, 3); + + file_label_ss = ss_cstr (file_label); + ss_trim (&file_label_ss, ss_cstr (" ")); + if (!ss_is_empty (file_label_ss)) { - bswap_int32 (&hdr.layout_code); - if (hdr.layout_code != 2) - lose ((ME, _("%s: File layout code has unexpected value %d. Value " - "should be 2, in big-endian or little-endian format."), - fh_get_file_name (r->fh), hdr.layout_code)); - - r->reverse_endian = 1; - bswap_int32 (&hdr.case_size); - bswap_int32 (&hdr.compress); - bswap_int32 (&hdr.weight_idx); - bswap_int32 (&hdr.case_cnt); - bswap_flt64 (&hdr.bias); + ss_data (file_label_ss)[ss_length (file_label_ss)] = '\0'; + dict_set_label (dict, ss_data (file_label_ss)); } + strcpy (info->creation_date, creation_date); + strcpy (info->creation_time, creation_time); + info->integer_format = r->integer_format; + info->float_format = r->float_format; + info->compressed = r->compressed; + info->case_cnt = r->case_cnt; + + product = ss_cstr (eye_catcher); + ss_match_string (&product, ss_cstr ("@(#) SPSS DATA FILE")); + ss_trim (&product, ss_cstr (" ")); + str_copy_buf_trunc (info->product, sizeof info->product, + ss_data (product), ss_length (product)); +} - /* Copy basic info and verify correctness. */ - r->value_cnt = hdr.case_size; +/* Reads a variable (type 2) record from R and adds the + corresponding variable to DICT. + Also skips past additional variable records for long string + variables. */ +static void +read_variable_record (struct sfm_reader *r, struct dictionary *dict, + int *format_warning_cnt) +{ + int width; + int has_variable_label; + int missing_value_code; + int print_format; + int write_format; + char name[9]; + + struct variable *var; + int nv; + + width = read_int (r); + has_variable_label = read_int (r); + missing_value_code = read_int (r); + print_format = read_int (r); + write_format = read_int (r); + read_string (r, name, sizeof name); + name[strcspn (name, " ")] = '\0'; + + /* Check variable name. */ + if (name[0] == '$' || name[0] == '#') + sys_error (r, "Variable name begins with invalid character `%c'.", + name[0]); + if (!var_is_plausible_name (name, false)) + sys_error (r, _("Invalid variable name `%s'."), name); + + /* Create variable. */ + if (width < 0 || width > 255) + sys_error (r, _("Bad width %d for variable %s."), width, name); + var = dict_create_var (dict, name, width); + if (var == NULL) + sys_error (r, + _("Duplicate variable name `%s' within system file."), + name); + + /* Set the short name the same as the long name. */ + var_set_short_name (var, 0, var_get_name (var)); + + /* Get variable label, if any. */ + if (has_variable_label != 0 && has_variable_label != 1) + sys_error (r, _("Variable label indicator field is not 0 or 1.")); + if (has_variable_label == 1) + { + size_t len; + char label[255 + 1]; - /* If value count is rediculous, then force it to -1 (a sentinel value) */ - if ( r->value_cnt < 0 || - r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2)) - r->value_cnt = -1; + len = read_int (r); + if (len >= sizeof label) + sys_error (r, _("Variable %s has label of invalid length %zu."), + name, len); + read_string (r, label, len + 1); + var_set_label (var, label); - r->compressed = hdr.compress; + skip_bytes (r, ROUND_UP (len, 4) - len); + } - r->weight_idx = hdr.weight_idx - 1; + /* Set missing values. */ + if (missing_value_code != 0) + { + struct missing_values mv; + int i; - r->case_cnt = hdr.case_cnt; - if (r->case_cnt < -1 || r->case_cnt > INT_MAX / 2) - lose ((ME, - _("%s: Number of cases in file (%ld) is not between -1 and %d."), - fh_get_file_name (r->fh), (long) r->case_cnt, INT_MAX / 2)); + mv_init_pool (r->pool, &mv, var_get_width (var)); + if (var_is_numeric (var)) + { + if (missing_value_code < -3 || missing_value_code > 3 + || missing_value_code == -1) + sys_error (r, _("Numeric missing value indicator field is not " + "-3, -2, 0, 1, 2, or 3.")); + if (missing_value_code < 0) + { + double low = read_float (r); + double high = read_float (r); + mv_add_range (&mv, low, high); + missing_value_code = -missing_value_code - 2; + } + for (i = 0; i < missing_value_code; i++) + mv_add_num (&mv, read_float (r)); + } + else + { + int mv_width = MAX (width, 8); + union value value; - r->bias = hdr.bias; - if (r->bias != 100.0) - corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual " - "value of 100."), - fh_get_file_name (r->fh), r->bias); + if (missing_value_code < 1 || missing_value_code > 3) + sys_error (r, _("String missing value indicator field is not " + "0, 1, 2, or 3.")); - /* Make a file label only on the condition that the given label is - not all spaces or nulls. */ - { - int i; + value_init (&value, mv_width); + value_set_missing (&value, mv_width); + for (i = 0; i < missing_value_code; i++) + { + uint8_t *s = value_str_rw (&value, mv_width); + read_bytes (r, s, 8); + mv_add_str (&mv, s); + } + value_destroy (&value, mv_width); + } + var_set_missing_values (var, &mv); + } - for (i = sizeof hdr.file_label - 1; i >= 0; i--) - { - if (!c_isspace ((unsigned char) hdr.file_label[i]) - && hdr.file_label[i] != 0) - { - char *label = xmalloc (i + 2); - memcpy (label, hdr.file_label, i + 1); - label[i + 1] = 0; - dict_set_label (dict, label); - free (label); - break; - } - } - } + /* Set formats. */ + parse_format_spec (r, print_format, PRINT_FORMAT, var, format_warning_cnt); + parse_format_spec (r, write_format, WRITE_FORMAT, var, format_warning_cnt); - if (info) + /* Account for values. + Skip long string continuation records, if any. */ + nv = width == 0 ? 1 : DIV_RND_UP (width, 8); + r->oct_cnt += nv; + if (width > 8) { - char *cp; + int i; - memcpy (info->creation_date, hdr.creation_date, 9); - info->creation_date[9] = 0; + for (i = 1; i < nv; i++) + { + /* Check for record type 2 and width -1. */ + if (read_int (r) != 2 || read_int (r) != -1) + sys_error (r, _("Missing string continuation record.")); + + /* Skip and ignore remaining continuation data. */ + has_variable_label = read_int (r); + missing_value_code = read_int (r); + print_format = read_int (r); + write_format = read_int (r); + read_string (r, name, sizeof name); + + /* Variable label fields on continuation records have + been spotted in system files created by "SPSS Power + Macintosh Release 6.1". */ + if (has_variable_label) + skip_bytes (r, ROUND_UP (read_int (r), 4)); + } + } +} - memcpy (info->creation_time, hdr.creation_time, 8); - info->creation_time[8] = 0; +/* Translates the format spec from sysfile format to internal + format. */ +static void +parse_format_spec (struct sfm_reader *r, unsigned int s, + enum which_format which, struct variable *v, + int *format_warning_cnt) +{ + const int max_format_warnings = 8; + struct fmt_spec f; + uint8_t raw_type = s >> 16; + uint8_t w = s >> 8; + uint8_t d = s; -#ifdef WORDS_BIGENDIAN - info->big_endian = !r->reverse_endian; -#else - info->big_endian = r->reverse_endian; -#endif + bool ok; - info->compressed = hdr.compress; + if (!fmt_from_io (raw_type, &f.type)) + sys_error (r, _("Unknown variable format %"PRIu8"."), raw_type); + f.w = w; + f.d = d; - info->case_cnt = hdr.case_cnt; + msg_disable (); + ok = fmt_check_output (&f) && fmt_check_width_compat (&f, var_get_width (v)); + msg_enable (); - for (cp = &prod_name[skip_amt]; cp < &prod_name[60]; cp++) - if (c_isgraph ((unsigned char) *cp)) - break; - strcpy (info->product, cp); + if (ok) + { + if (which == PRINT_FORMAT) + var_set_print_format (v, &f); + else + var_set_write_format (v, &f); } + else if (*++format_warning_cnt <= max_format_warnings) + { + char fmt_string[FMT_STRING_LEN_MAX + 1]; + sys_warn (r, _("%s variable %s has invalid %s format %s."), + var_is_numeric (v) ? _("Numeric") : _("String"), + var_get_name (v), + which == PRINT_FORMAT ? _("print") : _("write"), + fmt_to_string (&f, fmt_string)); + + if (*format_warning_cnt == max_format_warnings) + sys_warn (r, _("Suppressing further invalid format warnings.")); + } +} - return 1; - -error: - return 0; +/* Sets the weighting variable in DICT to the variable + corresponding to the given 1-based VALUE_IDX, if VALUE_IDX is + nonzero. */ +static void +setup_weight (struct sfm_reader *r, int weight_idx, + struct variable **var_by_value_idx, struct dictionary *dict) +{ + if (weight_idx != 0) + { + struct variable *weight_var + = lookup_var_by_value_idx (r, var_by_value_idx, weight_idx); + if (var_is_numeric (weight_var)) + dict_set_weight (dict, weight_var); + else + sys_error (r, _("Weighting variable must be numeric.")); + } } -/* Reads most of the dictionary from file H; also fills in the - associated VAR_BY_IDX array. */ -static int -read_variables (struct sfm_reader *r, - struct dictionary *dict, struct variable ***var_by_idx) +/* Reads a document record, type 6, from system file R, and sets up + the documents and n_documents fields in the associated + dictionary. */ +static void +read_documents (struct sfm_reader *r, struct dictionary *dict) { - int i; + int line_cnt; + char *documents; - struct sysfile_variable sv; /* Disk buffer. */ - int long_string_count = 0; /* # of long string continuation - records still expected. */ - int next_value = 0; /* Index to next `value' structure. */ + if (dict_get_documents (dict) != NULL) + sys_error (r, _("Multiple type 6 (document) records.")); - assert(r); + line_cnt = read_int (r); + if (line_cnt <= 0) + sys_error (r, _("Number of document lines (%d) " + "must be greater than 0."), line_cnt); - *var_by_idx = 0; + documents = pool_nmalloc (r->pool, line_cnt + 1, DOC_LINE_LENGTH); + read_string (r, documents, DOC_LINE_LENGTH * line_cnt + 1); + if (strlen (documents) == DOC_LINE_LENGTH * line_cnt) + dict_set_documents (dict, documents); + else + sys_error (r, _("Document line contains null byte.")); + pool_free (r->pool, documents); +} - /* Pre-allocate variables. */ - if (r->value_cnt != -1) +/* Read a type 7 extension record. */ +static void +read_extension_record (struct sfm_reader *r, struct dictionary *dict, + struct sfm_read_info *info) +{ + int subtype = read_int (r); + size_t size = read_int (r); + size_t count = read_int (r); + size_t bytes = size * count; + + /* Check that SIZE * COUNT + 1 doesn't overflow. Adding 1 + allows an extra byte for a null terminator, used by some + extension processing routines. */ + if (size != 0 && size_overflow_p (xsum (1, xtimes (count, size)))) + sys_error (r, "Record type 7 subtype %d too large.", subtype); + + switch (subtype) { - *var_by_idx = xnmalloc (r->value_cnt, sizeof **var_by_idx); - r->vars = xnmalloc (r->value_cnt, sizeof *r->vars); - } + case 3: + read_machine_integer_info (r, size, count, info, dict); + return; + + case 4: + read_machine_float_info (r, size, count); + return; + + case 5: + /* Variable sets information. We don't use these yet. + They only apply to GUIs; see VARSETS on the APPLY + DICTIONARY command in SPSS documentation. */ + break; + + case 6: + /* DATE variable information. We don't use it yet, but we + should. */ + break; + + case 7: + /* Used by the MRSETS command. */ + break; + + case 8: + /* Used by the SPSS Data Entry software. */ + break; + + case 11: + read_display_parameters (r, size, count, dict); + return; + + case 13: + read_long_var_name_map (r, size, count, dict); + return; + + case 14: + read_long_string_map (r, size, count, dict); + return; + + case 16: + /* New in SPSS v14? Unknown purpose. */ + break; + + case 17: + read_data_file_attributes (r, size, count, dict); + return; + + case 18: + read_variable_attributes (r, size, count, dict); + return; + + case 20: + /* New in SPSS 16. Contains a single string that describes + the character encoding, e.g. "windows-1252". */ + { + char *encoding = pool_calloc (r->pool, size, count + 1); + read_string (r, encoding, count + 1); + dict_set_encoding (dict, encoding); + return; + } + case 21: + /* New in SPSS 16. Encodes value labels for long string + variables. */ + read_long_string_value_labels (r, size, count, dict); + return; - /* Read in the entry for each variable and use the info to - initialize the dictionary. */ - for (i = 0; ; ++i) - { - struct variable *vv; - char name[SHORT_NAME_LEN + 1]; - int nv; - int j; + default: + sys_warn (r, _("Unrecognized record type 7, subtype %d. Please send a copy of this file, and the syntax which created it to %s"), + subtype, PACKAGE_BUGREPORT); + break; + } - if ( r->value_cnt != -1 && i >= r->value_cnt ) - break; + skip_bytes (r, bytes); +} - assertive_buf_read (r, &sv, sizeof sv, 0); +/* Read record type 7, subtype 3. */ +static void +read_machine_integer_info (struct sfm_reader *r, size_t size, size_t count, + struct sfm_read_info *info, + struct dictionary *dict) +{ + int version_major = read_int (r); + int version_minor = read_int (r); + int version_revision = read_int (r); + int machine_code UNUSED = read_int (r); + int float_representation = read_int (r); + int compression_code UNUSED = read_int (r); + int integer_representation = read_int (r); + int character_code = read_int (r); + + int expected_float_format; + int expected_integer_format; + + if (size != 4 || count != 8) + sys_error (r, _("Bad size (%zu) or count (%zu) field on record type 7, " + "subtype 3."), + size, count); + + /* Save version info. */ + info->version_major = version_major; + info->version_minor = version_minor; + info->version_revision = version_revision; + + /* Check floating point format. */ + if (r->float_format == FLOAT_IEEE_DOUBLE_BE + || r->float_format == FLOAT_IEEE_DOUBLE_LE) + expected_float_format = 1; + else if (r->float_format == FLOAT_Z_LONG) + expected_float_format = 2; + else if (r->float_format == FLOAT_VAX_G || r->float_format == FLOAT_VAX_D) + expected_float_format = 3; + else + NOT_REACHED (); + if (float_representation != expected_float_format) + sys_error (r, _("Floating-point representation indicated by " + "system file (%d) differs from expected (%d)."), + r->float_format, expected_float_format); + + /* Check integer format. */ + if (r->integer_format == INTEGER_MSB_FIRST) + expected_integer_format = 1; + else if (r->integer_format == INTEGER_LSB_FIRST) + expected_integer_format = 2; + else + NOT_REACHED (); + if (integer_representation != expected_integer_format) + { + static const char *const endian[] = {N_("little-endian"), N_("big-endian")}; + sys_warn (r, _("Integer format indicated by system file (%s) " + "differs from expected (%s)."), + gettext (endian[integer_representation == 1]), + gettext (endian[expected_integer_format == 1])); + } - if (r->reverse_endian) - { - bswap_int32 (&sv.rec_type); - bswap_int32 (&sv.type); - bswap_int32 (&sv.has_var_label); - bswap_int32 (&sv.n_missing_values); - bswap_int32 (&sv.print); - bswap_int32 (&sv.write); - } - /* We've come to the end of the variable entries */ - if (sv.rec_type != 2) + /* + Record 7 (20) provides a much more reliable way of + setting the encoding. + The character_code is used as a fallback only. + */ + if ( NULL == dict_get_encoding (dict)) + { + switch (character_code) { - buf_unread(r, sizeof sv); - r->value_cnt = i; + case 1: + dict_set_encoding (dict, "EBCDIC-US"); break; - } - - if ( -1 == r->value_cnt ) - { - *var_by_idx = xnrealloc (*var_by_idx, i + 1, sizeof **var_by_idx); - r->vars = xnrealloc (r->vars, i + 1, sizeof *r->vars); - } + case 2: + case 3: + /* These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic] + respectively. However, there are known to be many files + in the wild with character code 2, yet have data which are + clearly not ascii. + Therefore we ignore these values. + */ + return; + case 4: + dict_set_encoding (dict, "MS_KANJI"); + break; + case 65000: + dict_set_encoding (dict, "UTF-7"); + break; + case 65001: + dict_set_encoding (dict, "UTF-8"); + break; + default: + { + char enc[100]; + snprintf (enc, 100, "CP%d", character_code); + dict_set_encoding (dict, enc); + } + break; + }; + } +} - /* If there was a long string previously, make sure that the - continuations are present; otherwise make sure there aren't - any. */ - if (long_string_count) - { - if (sv.type != -1) - lose ((ME, _("%s: position %d: String variable does not have " - "proper number of continuation records."), - fh_get_file_name (r->fh), i)); +/* Read record type 7, subtype 4. */ +static void +read_machine_float_info (struct sfm_reader *r, size_t size, size_t count) +{ + double sysmis = read_float (r); + double highest = read_float (r); + double lowest = read_float (r); + if (size != 8 || count != 3) + sys_error (r, _("Bad size (%zu) or count (%zu) on extension 4."), + size, count); - r->vars[i].width = -1; - (*var_by_idx)[i] = NULL; - long_string_count--; - continue; - } - else if (sv.type == -1) - lose ((ME, _("%s: position %d: Superfluous long string continuation " - "record."), - fh_get_file_name (r->fh), i)); - - /* Check fields for validity. */ - if (sv.type < 0 || sv.type > 255) - lose ((ME, _("%s: position %d: Bad variable type code %d."), - fh_get_file_name (r->fh), i, sv.type)); - if (sv.has_var_label != 0 && sv.has_var_label != 1) - lose ((ME, _("%s: position %d: Variable label indicator field is not " - "0 or 1."), fh_get_file_name (r->fh), i)); - if (sv.n_missing_values < -3 || sv.n_missing_values > 3 - || sv.n_missing_values == -1) - lose ((ME, _("%s: position %d: Missing value indicator field is not " - "-3, -2, 0, 1, 2, or 3."), fh_get_file_name (r->fh), i)); - - /* Copy first character of variable name. */ - if (sv.name[0] == '@' || sv.name[0] == '#') - lose ((ME, _("%s: position %d: Variable name begins with invalid " - "character."), - fh_get_file_name (r->fh), i)); - - name[0] = sv.name[0]; - - /* Copy remaining characters of variable name. */ - for (j = 1; j < SHORT_NAME_LEN; j++) - { - int c = (unsigned char) sv.name[j]; + if (sysmis != SYSMIS) + sys_warn (r, _("File specifies unexpected value %g as %s."), + sysmis, "SYSMIS"); - if (c == ' ') - break; - else - name[j] = c; - } - name[j] = 0; + if (highest != HIGHEST) + sys_warn (r, _("File specifies unexpected value %g as %s."), + highest, "HIGHEST"); - if ( ! var_is_plausible_name(name, false) ) - lose ((ME, _("%s: Invalid variable name `%s' within system file."), - fh_get_file_name (r->fh), name)); + if (lowest != LOWEST) + sys_warn (r, _("File specifies unexpected value %g as %s."), + lowest, "LOWEST"); +} - /* Create variable. */ - vv = (*var_by_idx)[i] = dict_create_var (dict, name, sv.type); - if (vv == NULL) - lose ((ME, _("%s: Duplicate variable name `%s' within system file."), - fh_get_file_name (r->fh), name)); +/* Read record type 7, subtype 11, which specifies how variables + should be displayed in GUI environments. */ +static void +read_display_parameters (struct sfm_reader *r, size_t size, size_t count, + struct dictionary *dict) +{ + size_t n_vars; + bool includes_width; + bool warned = false; + size_t i; - var_set_short_name (vv, vv->name); + if (size != 4) + { + sys_warn (r, _("Bad size %zu on extension 11."), size); + skip_bytes (r, size * count); + return; + } - /* Case reading data. */ - nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64)); - long_string_count = nv - 1; - next_value += nv; + n_vars = dict_get_var_cnt (dict); + if (count == 3 * n_vars) + includes_width = true; + else if (count == 2 * n_vars) + includes_width = false; + else + { + sys_warn (r, _("Extension 11 has bad count %zu (for %zu variables)."), + count, n_vars); + skip_bytes (r, size * count); + return; + } - /* Get variable label, if any. */ - if (sv.has_var_label == 1) - { - /* Disk buffer. */ - int32_t len; + for (i = 0; i < n_vars; ++i) + { + struct variable *v = dict_get_var (dict, i); + int measure = read_int (r); + int width = includes_width ? read_int (r) : 0; + int align = read_int (r); - /* Read length of label. */ - assertive_buf_read (r, &len, sizeof len, 0); - if (r->reverse_endian) - bswap_int32 (&len); + /* SPSS 14 sometimes seems to set string variables' measure + to zero. */ + if (0 == measure && var_is_alpha (v)) + measure = 1; - /* Check len. */ - if (len < 0 || len > 255) - lose ((ME, _("%s: Variable %s indicates variable label of invalid " - "length %d."), - fh_get_file_name (r->fh), vv->name, len)); + if (measure < 1 || measure > 3 || align < 0 || align > 2) + { + if (!warned) + sys_warn (r, _("Invalid variable display parameters " + "for variable %zu (%s). " + "Default parameters substituted."), + i, var_get_name (v)); + warned = true; + continue; + } - if ( len != 0 ) - { - /* Read label into variable structure. */ - vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32_t)), len + 1); - if (vv->label == NULL) - goto error; - vv->label[len] = '\0'; - } - } + var_set_measure (v, (measure == 1 ? MEASURE_NOMINAL + : measure == 2 ? MEASURE_ORDINAL + : MEASURE_SCALE)); + var_set_alignment (v, (align == 0 ? ALIGN_LEFT + : align == 1 ? ALIGN_RIGHT + : ALIGN_CENTRE)); + + /* Older versions (SPSS 9.0) sometimes set the display + width to zero. This causes confusion in the GUI, so + only set the width if it is nonzero. */ + if (width > 0) + var_set_display_width (v, width); + } +} - /* Set missing values. */ - if (sv.n_missing_values != 0) - { - flt64 mv[3]; - int mv_cnt = abs (sv.n_missing_values); +/* Reads record type 7, subtype 13, which gives the long name + that corresponds to each short name. Modifies variable names + in DICT accordingly. */ +static void +read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count, + struct dictionary *dict) +{ + struct text_record *text; + struct variable *var; + char *long_name; - if (vv->width > MAX_SHORT_STRING) - lose ((ME, _("%s: Long string variable %s may not have missing " - "values."), - fh_get_file_name (r->fh), vv->name)); + text = open_text_record (r, size * count); + while (read_variable_to_value_pair (r, dict, text, &var, &long_name)) + { + char **short_names; + size_t short_name_cnt; + size_t i; - assertive_buf_read (r, mv, sizeof *mv * mv_cnt, 0); + /* Validate long name. */ + if (!var_is_valid_name (long_name, false)) + { + sys_warn (r, _("Long variable mapping from %s to invalid " + "variable name `%s'."), + var_get_name (var), long_name); + continue; + } - if (r->reverse_endian && vv->type == NUMERIC) - for (j = 0; j < mv_cnt; j++) - bswap_flt64 (&mv[j]); - - if (sv.n_missing_values > 0) - { - for (j = 0; j < sv.n_missing_values; j++) - if (vv->type == NUMERIC) - mv_add_num (&vv->miss, mv[j]); - else - mv_add_str (&vv->miss, (char *) &mv[j]); - } - else - { - if (vv->type == ALPHA) - lose ((ME, _("%s: String variable %s may not have missing " - "values specified as a range."), - fh_get_file_name (r->fh), vv->name)); - - if (mv[0] == r->lowest) - mv_add_num_range (&vv->miss, LOWEST, mv[1]); - else if (mv[1] == r->highest) - mv_add_num_range (&vv->miss, mv[0], HIGHEST); - else - mv_add_num_range (&vv->miss, mv[0], mv[1]); - - if (sv.n_missing_values == -3) - mv_add_num (&vv->miss, mv[2]); - } - } + /* Identify any duplicates. */ + if (strcasecmp (var_get_short_name (var, 0), long_name) + && dict_lookup_var (dict, long_name) != NULL) + { + sys_warn (r, _("Duplicate long variable name `%s' " + "within system file."), long_name); + continue; + } - if (!parse_format_spec (r, sv.print, &vv->print, vv) - || !parse_format_spec (r, sv.write, &vv->write, vv)) - goto error; + /* Renaming a variable may clear its short names, but we + want to retain them, so we save them and re-set them + afterward. */ + short_name_cnt = var_get_short_name_cnt (var); + short_names = xnmalloc (short_name_cnt, sizeof *short_names); + for (i = 0; i < short_name_cnt; i++) + { + const char *s = var_get_short_name (var, i); + short_names[i] = s != NULL ? xstrdup (s) : NULL; + } - r->vars[i].width = vv->width; - r->vars[i].fv = vv->fv; + /* Set long name. */ + dict_rename_var (dict, var, long_name); + /* Restore short names. */ + for (i = 0; i < short_name_cnt; i++) + { + var_set_short_name (var, i, short_names[i]); + free (short_names[i]); + } + free (short_names); } + close_text_record (r, text); + r->has_long_var_names = true; +} - /* Some consistency checks. */ - if (long_string_count != 0) - lose ((ME, _("%s: Long string continuation records omitted at end of " - "dictionary."), - fh_get_file_name (r->fh))); - - if (next_value != r->value_cnt) - corrupt_msg(MW, _("%s: System file header indicates %d variable positions but " - "%d were read from file."), - fh_get_file_name (r->fh), r->value_cnt, next_value); +/* Reads record type 7, subtype 14, which gives the real length + of each very long string. Rearranges DICT accordingly. */ +static void +read_long_string_map (struct sfm_reader *r, size_t size, size_t count, + struct dictionary *dict) +{ + struct text_record *text; + struct variable *var; + char *length_s; + text = open_text_record (r, size * count); + while (read_variable_to_value_pair (r, dict, text, &var, &length_s)) + { + size_t idx = var_get_dict_index (var); + long int length; + int segment_cnt; + int i; - return 1; + /* Get length. */ + length = strtol (length_s, NULL, 10); + if (length < 1 || length > MAX_STRING) + { + sys_warn (r, _("%s listed as string of invalid length %s " + "in very length string record."), + var_get_name (var), length_s); + continue; + } -error: - return 0; -} + /* Check segments. */ + segment_cnt = sfm_width_to_segments (length); + if (segment_cnt == 1) + { + sys_warn (r, _("%s listed in very long string record with width %s, " + "which requires only one segment."), + var_get_name (var), length_s); + continue; + } + if (idx + segment_cnt > dict_get_var_cnt (dict)) + sys_error (r, _("Very long string %s overflows dictionary."), + var_get_name (var)); -/* Translates the format spec from sysfile format to internal - format. */ -static int -parse_format_spec (struct sfm_reader *r, int32_t s, - struct fmt_spec *f, const struct variable *v) -{ - f->type = translate_fmt ((s >> 16) & 0xff); - if (f->type == -1) - lose ((ME, _("%s: Bad format specifier byte (%d)."), - fh_get_file_name (r->fh), (s >> 16) & 0xff)); - f->w = (s >> 8) & 0xff; - f->d = s & 0xff; - - if ((v->type == ALPHA) ^ ((formats[f->type].cat & FCAT_STRING) != 0)) - lose ((ME, _("%s: %s variable %s has %s format specifier %s."), - fh_get_file_name (r->fh), - v->type == ALPHA ? _("String") : _("Numeric"), - v->name, - formats[f->type].cat & FCAT_STRING ? _("string") : _("numeric"), - formats[f->type].name)); - - if (!check_output_specifier (f, false) - || !check_specifier_width (f, v->width, false)) - { - msg (ME, _("%s variable %s has invalid format specifier %s."), - v->type == NUMERIC ? _("Numeric") : _("String"), - v->name, fmt_to_string (f)); - *f = v->type == NUMERIC ? f8_2 : make_output_format (FMT_A, v->width, 0); + /* Get the short names from the segments and check their + lengths. */ + for (i = 0; i < segment_cnt; i++) + { + struct variable *seg = dict_get_var (dict, idx + i); + int alloc_width = sfm_segment_alloc_width (length, i); + int width = var_get_width (seg); + + if (i > 0) + var_set_short_name (var, i, var_get_short_name (seg, 0)); + if (ROUND_UP (width, 8) != ROUND_UP (alloc_width, 8)) + sys_error (r, _("Very long string with width %ld has segment %d " + "of width %d (expected %d)"), + length, i, width, alloc_width); + } + dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1); + var_set_width (var, length); } - return 1; - -error: - return 0; + close_text_record (r, text); + dict_compact_values (dict); } /* Reads value labels from sysfile H and inserts them into the associated dictionary. */ -int +static void read_value_labels (struct sfm_reader *r, - struct dictionary *dict, struct variable **var_by_idx) + struct dictionary *dict, struct variable **var_by_value_idx) { - struct label + struct pool *subpool; + + struct label { - char raw_value[8]; /* Value as uninterpreted bytes. */ + uint8_t raw_value[8]; /* Value as uninterpreted bytes. */ union value value; /* Value. */ char *label; /* Null-terminated label string. */ }; struct label *labels = NULL; - int32_t n_labels; /* Number of labels. */ + int label_cnt; /* Number of labels. */ struct variable **var = NULL; /* Associated variables. */ - int32_t n_vars; /* Number of associated variables. */ + int var_cnt; /* Number of associated variables. */ + int max_width; /* Maximum width of string variables. */ int i; - /* First step: read the contents of the type 3 record and record its - contents. Note that we can't do much with the data since we - don't know yet whether it is of numeric or string type. */ + subpool = pool_create_subpool (r->pool); + + /* Read the type 3 record and record its contents. We can't do + much with the data yet because we don't know whether it is + of numeric or string type. */ /* Read number of labels. */ - assertive_buf_read (r, &n_labels, sizeof n_labels, 0); - if (r->reverse_endian) - bswap_int32 (&n_labels); - - if ( n_labels >= ((int32_t) ~0) / sizeof *labels) - { - corrupt_msg(MW, _("%s: Invalid number of labels: %d. Ignoring labels."), - fh_get_file_name (r->fh), n_labels); - n_labels = 0; - } + label_cnt = read_int (r); - /* Allocate memory. */ - labels = xcalloc (n_labels, sizeof *labels); - for (i = 0; i < n_labels; i++) - labels[i].label = NULL; + if (size_overflow_p (xtimes (label_cnt, sizeof *labels))) + { + sys_warn (r, _("Invalid number of labels: %d. Ignoring labels."), + label_cnt); + label_cnt = 0; + } /* Read each value/label tuple into labels[]. */ - for (i = 0; i < n_labels; i++) + labels = pool_nalloc (subpool, label_cnt, sizeof *labels); + for (i = 0; i < label_cnt; i++) { struct label *label = labels + i; unsigned char label_len; size_t padded_len; /* Read value. */ - assertive_buf_read (r, label->raw_value, sizeof label->raw_value, 0); + read_bytes (r, label->raw_value, sizeof label->raw_value); /* Read label length. */ - assertive_buf_read (r, &label_len, sizeof label_len, 0); - padded_len = ROUND_UP (label_len + 1, sizeof (flt64)); + read_bytes (r, &label_len, sizeof label_len); + padded_len = ROUND_UP (label_len + 1, 8); /* Read label, padding. */ - label->label = xmalloc (padded_len + 1); - assertive_buf_read (r, label->label, padded_len - 1, 0); + label->label = pool_alloc (subpool, padded_len + 1); + read_bytes (r, label->label, padded_len - 1); label->label[label_len] = 0; } - /* Second step: Read the type 4 record that has the list of - variables to which the value labels are to be applied. */ + /* Now, read the type 4 record that has the list of variables + to which the value labels are to be applied. */ /* Read record type of type 4 record. */ - { - int32_t rec_type; - - assertive_buf_read (r, &rec_type, sizeof rec_type, 0); - if (r->reverse_endian) - bswap_int32 (&rec_type); - - if (rec_type != 4) - lose ((ME, _("%s: Variable index record (type 4) does not immediately " - "follow value label record (type 3) as it should."), - fh_get_file_name (r->fh))); - } + if (read_int (r) != 4) + sys_error (r, _("Variable index record (type 4) does not immediately " + "follow value label record (type 3) as it should.")); /* Read number of variables associated with value label from type 4 record. */ - assertive_buf_read (r, &n_vars, sizeof n_vars, 0); - if (r->reverse_endian) - bswap_int32 (&n_vars); - if (n_vars < 1 || n_vars > dict_get_var_cnt (dict)) - lose ((ME, _("%s: Number of variables associated with a value label (%d) " - "is not between 1 and the number of variables (%d)."), - fh_get_file_name (r->fh), n_vars, dict_get_var_cnt (dict))); + var_cnt = read_int (r); + if (var_cnt < 1 || var_cnt > dict_get_var_cnt (dict)) + sys_error (r, _("Number of variables associated with a value label (%d) " + "is not between 1 and the number of variables (%zu)."), + var_cnt, dict_get_var_cnt (dict)); /* Read the list of variables. */ - var = xnmalloc (n_vars, sizeof *var); - for (i = 0; i < n_vars; i++) + var = pool_nalloc (subpool, var_cnt, sizeof *var); + max_width = 0; + for (i = 0; i < var_cnt; i++) { - int32_t var_idx; - struct variable *v; - - /* Read variable index, check range. */ - assertive_buf_read (r, &var_idx, sizeof var_idx, 0); - if (r->reverse_endian) - bswap_int32 (&var_idx); - if (var_idx < 1 || var_idx > r->value_cnt) - lose ((ME, _("%s: Variable index associated with value label (%d) is " - "not between 1 and the number of values (%d)."), - fh_get_file_name (r->fh), var_idx, r->value_cnt)); - - /* Make sure it's a real variable. */ - v = var_by_idx[var_idx - 1]; - if (v == NULL) - lose ((ME, _("%s: Variable index associated with value label (%d) " - "refers to a continuation of a string variable, not to " - "an actual variable."), - fh_get_file_name (r->fh), var_idx)); - if (v->type == ALPHA && v->width > MAX_SHORT_STRING) - lose ((ME, _("%s: Value labels are not allowed on long string " - "variables (%s)."), - fh_get_file_name (r->fh), v->name)); - - /* Add it to the list of variables. */ - var[i] = v; + var[i] = lookup_var_by_value_idx (r, var_by_value_idx, read_int (r)); + if (var_get_width (var[i]) > 8) + sys_error (r, _("Value labels may not be added to long string " + "variables (e.g. %s) using records types 3 and 4."), + var_get_name (var[i])); + max_width = MAX (max_width, var_get_width (var[i])); } /* Type check the variables. */ - for (i = 1; i < n_vars; i++) - if (var[i]->type != var[0]->type) - lose ((ME, _("%s: Variables associated with value label are not all of " - "identical type. Variable %s has %s type, but variable " - "%s has %s type."), - fh_get_file_name (r->fh), - var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"), - var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric"))); + for (i = 1; i < var_cnt; i++) + if (var_get_type (var[i]) != var_get_type (var[0])) + sys_error (r, _("Variables associated with value label are not all of " + "identical type. Variable %s is %s, but variable " + "%s is %s."), + var_get_name (var[0]), + var_is_numeric (var[0]) ? _("numeric") : _("string"), + var_get_name (var[i]), + var_is_numeric (var[i]) ? _("numeric") : _("string")); /* Fill in labels[].value, now that we know the desired type. */ - for (i = 0; i < n_labels; i++) + for (i = 0; i < label_cnt; i++) { struct label *label = labels + i; - - if (var[0]->type == ALPHA) - { - const int copy_len = min (sizeof label->raw_value, - sizeof label->label); - memcpy (label->value.s, label->raw_value, copy_len); - } else { - flt64 f; - assert (sizeof f == sizeof label->raw_value); - memcpy (&f, label->raw_value, sizeof f); - if (r->reverse_endian) - bswap_flt64 (&f); - label->value.f = f; - } + + value_init_pool (subpool, &label->value, max_width); + if (var_is_alpha (var[0])) + u8_buf_copy_rpad (value_str_rw (&label->value, max_width), max_width, + label->raw_value, sizeof label->raw_value, ' '); + else + label->value.f = float_get_double (r->float_format, label->raw_value); } - - /* Assign the value_label's to each variable. */ - for (i = 0; i < n_vars; i++) + + /* Assign the `value_label's to each variable. */ + for (i = 0; i < var_cnt; i++) { struct variable *v = var[i]; int j; /* Add each label to the variable. */ - for (j = 0; j < n_labels; j++) + for (j = 0; j < label_cnt; j++) { - struct label *label = labels + j; - if (!val_labs_replace (v->val_labs, label->value, label->label)) - continue; - - if (var[0]->type == NUMERIC) - msg (MW, _("%s: File contains duplicate label for value %g for " - "variable %s."), - fh_get_file_name (r->fh), label->value.f, v->name); - else - msg (MW, _("%s: File contains duplicate label for value `%.*s' " - "for variable %s."), - fh_get_file_name (r->fh), v->width, label->value.s, v->name); + struct label *label = &labels[j]; + if (!var_add_value_label (v, &label->value, label->label)) + { + if (var_is_numeric (var[0])) + sys_warn (r, _("Duplicate value label for %g on %s."), + label->value.f, var_get_name (v)); + else + sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."), + max_width, value_str (&label->value, max_width), + var_get_name (v)); + } } } - for (i = 0; i < n_labels; i++) - free (labels[i].label); - free (labels); - free (var); - return 1; - -error: - if (labels) - { - for (i = 0; i < n_labels; i++) - free (labels[i].label); - free (labels); - } - free (var); - return 0; + pool_destroy (subpool); } -/* Reads BYTE_CNT bytes from the file represented by H. If BUF is - non-NULL, uses that as the buffer; otherwise allocates at least - MIN_ALLOC bytes. Returns a pointer to the buffer on success, NULL - on failure. */ -static void * -buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc) +/* Reads a set of custom attributes from TEXT into ATTRS. + ATTRS may be a null pointer, in which case the attributes are + read but discarded. */ +static void +read_attributes (struct sfm_reader *r, struct text_record *text, + struct attrset *attrs) { - assert (r); + do + { + struct attribute *attr; + char *key; + int index; + + /* Parse the key. */ + key = text_get_token (text, ss_cstr ("(")); + if (key == NULL) + return; - if (buf == NULL && byte_cnt > 0 ) - buf = xmalloc (max (byte_cnt, min_alloc)); + attr = attribute_create (key); + for (index = 1; ; index++) + { + /* Parse the value. */ + char *value; + size_t length; - if ( byte_cnt == 0 ) - return buf; + value = text_get_token (text, ss_cstr ("\n")); + if (value == NULL) + { + text_warn (r, text, _("Error parsing attribute value %s[%d]"), + key, index); + break; + } - - if (1 != fread (buf, byte_cnt, 1, r->file)) - { - if (ferror (r->file)) - msg (ME, _("%s: Reading system file: %s."), - fh_get_file_name (r->fh), strerror (errno)); + length = strlen (value); + if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'') + { + value[length - 1] = '\0'; + attribute_add_value (attr, value + 1); + } + else + { + text_warn (r, text, + _("Attribute value %s[%d] is not quoted: %s"), + key, index, value); + attribute_add_value (attr, value); + } + + /* Was this the last value for this attribute? */ + if (text_match (text, ')')) + break; + } + if (attrs != NULL) + attrset_add (attrs, attr); else - corrupt_msg (ME, _("%s: Unexpected end of file."), - fh_get_file_name (r->fh)); - r->ok = false; - return NULL; + attribute_destroy (attr); } - return buf; + while (!text_match (text, '/')); +} + +/* Reads record type 7, subtype 17, which lists custom + attributes on the data file. */ +static void +read_data_file_attributes (struct sfm_reader *r, + size_t size, size_t count, + struct dictionary *dict) +{ + struct text_record *text = open_text_record (r, size * count); + read_attributes (r, text, dict_get_attributes (dict)); + close_text_record (r, text); } -/* Winds the reader BYTE_CNT bytes back in the reader stream. */ -void -buf_unread(struct sfm_reader *r, size_t byte_cnt) +static void +skip_long_string_value_labels (struct sfm_reader *r, size_t n_labels) { - assert(byte_cnt > 0); + size_t i; - if ( 0 != fseek(r->file, -byte_cnt, SEEK_CUR)) + for (i = 0; i < n_labels; i++) { - msg (ME, _("%s: Seeking system file: %s."), - fh_get_file_name (r->fh), strerror (errno)); + size_t value_length, label_length; + + value_length = read_int (r); + skip_bytes (r, value_length); + label_length = read_int (r); + skip_bytes (r, label_length); } } -/* Reads a document record, type 6, from system file R, and sets up - the documents and n_documents fields in the associated - dictionary. */ -static int -read_documents (struct sfm_reader *r, struct dictionary *dict) +static void +read_long_string_value_labels (struct sfm_reader *r, + size_t size, size_t count, + struct dictionary *d) { - int32_t line_cnt; - char *documents; + const off_t start = ftello (r->file); + while (ftello (r->file) - start < size * count) + { + char var_name[VAR_NAME_LEN + 1]; + size_t n_labels, i; + struct variable *v; + union value value; + int var_name_len; + int width; + + /* Read header. */ + var_name_len = read_int (r); + if (var_name_len > VAR_NAME_LEN) + sys_error (r, _("Variable name length in long string value label " + "record (%d) exceeds %d-byte limit."), + var_name_len, VAR_NAME_LEN); + read_string (r, var_name, var_name_len + 1); + width = read_int (r); + n_labels = read_int (r); + + v = dict_lookup_var (d, var_name); + if (v == NULL) + { + sys_warn (r, _("Ignoring long string value record for " + "unknown variable %s."), var_name); + skip_long_string_value_labels (r, n_labels); + continue; + } + if (var_is_numeric (v)) + { + sys_warn (r, _("Ignoring long string value record for " + "numeric variable %s."), var_name); + skip_long_string_value_labels (r, n_labels); + continue; + } + if (width != var_get_width (v)) + { + sys_warn (r, _("Ignoring long string value record for variable %s " + "because the record's width (%d) does not match the " + "variable's width (%d)"), + var_name, width, var_get_width (v)); + skip_long_string_value_labels (r, n_labels); + continue; + } - if (dict_get_documents (dict) != NULL) - lose ((ME, _("%s: System file contains multiple " - "type 6 (document) records."), - fh_get_file_name (r->fh))); + /* Read values. */ + value_init_pool (r->pool, &value, width); + for (i = 0; i < n_labels; i++) + { + size_t value_length, label_length; + char label[256]; + bool skip = false; + + /* Read value. */ + value_length = read_int (r); + if (value_length == width) + read_bytes (r, value_str_rw (&value, width), width); + else + { + sys_warn (r, _("Ignoring long string value %zu for variable %s, " + "with width %d, that has bad value width %zu."), + i, var_get_name (v), width, value_length); + skip_bytes (r, value_length); + skip = true; + } - assertive_buf_read (r, &line_cnt, sizeof line_cnt, 0); - if (line_cnt <= 0) - lose ((ME, _("%s: Number of document lines (%ld) " - "must be greater than 0."), - fh_get_file_name (r->fh), (long) line_cnt)); - - documents = buf_read (r, NULL, 80 * line_cnt, line_cnt * 80 + 1); - /* FIXME? Run through asciify. */ - if (documents == NULL) - return 0; - documents[80 * line_cnt] = '\0'; - dict_set_documents (dict, documents); - free (documents); - return 1; + /* Read label. */ + label_length = read_int (r); + read_string (r, label, MIN (sizeof label, label_length + 1)); + if (label_length >= sizeof label) + { + /* Skip and silently ignore label text after the + first 255 bytes. The maximum documented length + of a label is 120 bytes so this is more than + generous. */ + skip_bytes (r, sizeof label - (label_length + 1)); + } -error: - return 0; + if (!skip && !var_add_value_label (v, &value, label)) + sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."), + width, value_str (&value, width), var_get_name (v)); + } + } } - -/* Data reader. */ -/* Reads compressed data into H->BUF and sets other pointers - appropriately. Returns nonzero only if both no errors occur and - data was read. */ -static int -buffer_input (struct sfm_reader *r) -{ - size_t amt; - if (!r->ok) - return false; - if (r->buf == NULL) - r->buf = xnmalloc (128, sizeof *r->buf); - amt = fread (r->buf, sizeof *r->buf, 128, r->file); - if (ferror (r->file)) +/* Reads record type 7, subtype 18, which lists custom + attributes on individual variables. */ +static void +read_variable_attributes (struct sfm_reader *r, + size_t size, size_t count, + struct dictionary *dict) +{ + struct text_record *text = open_text_record (r, size * count); + for (;;) { - msg (ME, _("%s: Error reading file: %s."), - fh_get_file_name (r->fh), strerror (errno)); - r->ok = false; - return 0; + struct variable *var; + if (!text_read_short_name (r, dict, text, ss_cstr (":"), &var)) + break; + read_attributes (r, text, var != NULL ? var_get_attributes (var) : NULL); } - r->ptr = r->buf; - r->end = &r->buf[amt]; - return amt; + close_text_record (r, text); } -/* Reads a single case consisting of compressed data from system - file H into the array BUF[] according to reader R, and - returns nonzero only if successful. */ -/* Data in system files is compressed in this manner. Data - values are grouped into sets of eight ("octets"). Each value - in an octet has one instruction byte that are output together. - Each instruction byte gives a value for that byte or indicates - that the value can be found following the instructions. */ -static int -read_compressed_data (struct sfm_reader *r, flt64 *buf) + +/* Case reader. */ + +static void partial_record (struct sfm_reader *r) + NO_RETURN; + +static void read_error (struct casereader *, const struct sfm_reader *); + +static bool read_case_number (struct sfm_reader *, double *); +static bool read_case_string (struct sfm_reader *, uint8_t *, size_t); +static int read_opcode (struct sfm_reader *); +static bool read_compressed_number (struct sfm_reader *, double *); +static bool read_compressed_string (struct sfm_reader *, uint8_t *); +static bool read_whole_strings (struct sfm_reader *, uint8_t *, size_t); +static bool skip_whole_strings (struct sfm_reader *, size_t); + +/* Reads and returns one case from READER's file. Returns a null + pointer if not successful. */ +static struct ccase * +sys_file_casereader_read (struct casereader *reader, void *r_) { - const unsigned char *p_end = r->x + sizeof (flt64); - unsigned char *p = r->y; + struct sfm_reader *r = r_; + struct ccase *volatile c; + int i; - const flt64 *buf_beg = buf; - const flt64 *buf_end = &buf[r->value_cnt]; + if (r->error) + return NULL; - for (;;) + c = case_create (r->proto); + if (setjmp (r->bail_out)) { - for (; p < p_end; p++){ - switch (*p) - { - case 0: - /* Code 0 is ignored. */ - continue; - case 252: - /* Code 252 is end of file. */ - if (buf_beg == buf) - return 0; - lose ((ME, _("%s: Compressed data is corrupted. Data ends " - "in partial case."), - fh_get_file_name (r->fh))); - case 253: - /* Code 253 indicates that the value is stored explicitly - following the instruction bytes. */ - if (r->ptr == NULL || r->ptr >= r->end) - if (!buffer_input (r)) - lose ((ME, _("%s: Unexpected end of file."), - fh_get_file_name (r->fh))); - memcpy (buf++, r->ptr++, sizeof *buf); - if (buf >= buf_end) - goto success; - break; - case 254: - /* Code 254 indicates a string that is all blanks. */ - memset (buf++, ' ', sizeof *buf); - if (buf >= buf_end) - goto success; - break; - case 255: - /* Code 255 indicates the system-missing value. */ - *buf = r->sysmis; - if (r->reverse_endian) - bswap_flt64 (buf); - buf++; - if (buf >= buf_end) - goto success; - break; - default: - /* Codes 1 through 251 inclusive are taken to indicate a - value of (BYTE - BIAS), where BYTE is the byte's value - and BIAS is the compression bias (generally 100.0). */ - *buf = *p - r->bias; - if (r->reverse_endian) - bswap_flt64 (buf); - buf++; - if (buf >= buf_end) - goto success; - break; - } - } - /* We have reached the end of this instruction octet. Read - another. */ - if (r->ptr == NULL || r->ptr >= r->end) + casereader_force_error (reader); + case_unref (c); + return NULL; + } + + for (i = 0; i < r->sfm_var_cnt; i++) + { + struct sfm_var *sv = &r->sfm_vars[i]; + union value *v = case_data_rw_idx (c, sv->case_index); + + if (sv->var_width == 0) { - if (!buffer_input (r)) - { - if (buf_beg != buf) - lose ((ME, _("%s: Unexpected end of file."), - fh_get_file_name (r->fh))); - else - return 0; - } + if (!read_case_number (r, &v->f)) + goto eof; + } + else + { + uint8_t *s = value_str_rw (v, sv->var_width); + if (!read_case_string (r, s + sv->offset, sv->segment_width)) + goto eof; + if (!skip_whole_strings (r, ROUND_DOWN (sv->padding, 8))) + partial_record (r); } - memcpy (r->x, r->ptr++, sizeof *buf); - p = r->x; } + return c; + +eof: + case_unref (c); + if (i != 0) + partial_record (r); + if (r->case_cnt != -1) + read_error (reader, r); + return NULL; +} - abort (); +/* Issues an error that R ends in a partial record. */ +static void +partial_record (struct sfm_reader *r) +{ + sys_error (r, _("File ends in partial case.")); +} -success: - /* We have filled up an entire record. Update state and return - successfully. */ - r->y = ++p; - return 1; +/* Issues an error that an unspecified error occurred SFM, and + marks R tainted. */ +static void +read_error (struct casereader *r, const struct sfm_reader *sfm) +{ + msg (ME, _("Error reading case from file %s."), fh_get_name (sfm->fh)); + casereader_force_error (r); +} -error: - /* I/O error. */ - r->ok = false; - return 0; +/* Reads a number from R and stores its value in *D. + If R is compressed, reads a compressed number; + otherwise, reads a number in the regular way. + Returns true if successful, false if end of file is + reached immediately. */ +static bool +read_case_number (struct sfm_reader *r, double *d) +{ + if (!r->compressed) + { + uint8_t number[8]; + if (!try_read_bytes (r, number, sizeof number)) + return false; + float_convert (r->float_format, number, FLOAT_NATIVE_DOUBLE, d); + return true; + } + else + return read_compressed_number (r, d); } -/* Reads one case from READER's file into C. Returns nonzero - only if successful. */ -int -sfm_read_case (struct sfm_reader *r, struct ccase *c) +/* Reads LENGTH string bytes from R into S. + Always reads a multiple of 8 bytes; if LENGTH is not a + multiple of 8, then extra bytes are read and discarded without + being written to S. + Reads compressed strings if S is compressed. + Returns true if successful, false if end of file is + reached immediately. */ +static bool +read_case_string (struct sfm_reader *r, uint8_t *s, size_t length) { - if (!r->ok) - return 0; - - if (!r->compressed && sizeof (flt64) == sizeof (double)) + size_t whole = ROUND_DOWN (length, 8); + size_t partial = length % 8; + + if (whole) { - /* Fast path: external and internal representations are the - same, except possibly for endianness or SYSMIS. Read - directly into the case's buffer, then fix up any minor - details as needed. */ - if (!fread_ok (r, case_data_all_rw (c), - sizeof (union value) * r->value_cnt)) - return 0; - - /* Fix up endianness if needed. */ - if (r->reverse_endian) - { - int i; - - for (i = 0; i < r->value_cnt; i++) - if (r->vars[i].width == 0) - bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f); - } + if (!read_whole_strings (r, s, whole)) + return false; + } - /* Fix up SYSMIS values if needed. - I don't think this will ever actually kick in, but it - can't hurt. */ - if (r->sysmis != SYSMIS) + if (partial) + { + uint8_t bounce[8]; + if (!read_whole_strings (r, bounce, sizeof bounce)) { - int i; - - for (i = 0; i < r->value_cnt; i++) - if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis) - case_data_rw (c, r->vars[i].fv)->f = SYSMIS; + if (whole) + partial_record (r); + return false; } + memcpy (s + whole, bounce, partial); } - else - { - /* Slow path: internal and external representations differ. - Read into a bounce buffer, then copy to C. */ - flt64 *bounce; - flt64 *bounce_cur; - size_t bounce_size; - int read_ok; - int i; - bounce_size = sizeof *bounce * r->value_cnt; - bounce = bounce_cur = local_alloc (bounce_size); + return true; +} - if (!r->compressed) - read_ok = fread_ok (r, bounce, bounce_size); - else - read_ok = read_compressed_data (r, bounce); - if (!read_ok) +/* Reads and returns the next compression opcode from R. */ +static int +read_opcode (struct sfm_reader *r) +{ + assert (r->compressed); + for (;;) + { + int opcode; + if (r->opcode_idx >= sizeof r->opcodes) { - local_free (bounce); - return 0; + if (!try_read_bytes (r, r->opcodes, sizeof r->opcodes)) + return -1; + r->opcode_idx = 0; } + opcode = r->opcodes[r->opcode_idx++]; - for (i = 0; i < r->value_cnt; i++) - { - struct sfm_var *v = &r->vars[i]; + if (opcode != 0) + return opcode; + } +} - if (v->width == 0) - { - flt64 f = *bounce_cur++; - if (r->reverse_endian) - bswap_flt64 (&f); - case_data_rw (c, v->fv)->f = f == r->sysmis ? SYSMIS : f; - } - else if (v->width != -1) - { - memcpy (case_data_rw (c, v->fv)->s, bounce_cur, v->width); - bounce_cur += DIV_RND_UP (v->width, sizeof (flt64)); - } - } +/* Reads a compressed number from R and stores its value in D. + Returns true if successful, false if end of file is + reached immediately. */ +static bool +read_compressed_number (struct sfm_reader *r, double *d) +{ + int opcode = read_opcode (r); + switch (opcode) + { + case -1: + case 252: + return false; + + case 253: + *d = read_float (r); + break; + + case 254: + sys_error (r, _("Compressed data is corrupt.")); - local_free (bounce); + case 255: + *d = SYSMIS; + break; + + default: + *d = opcode - r->bias; + break; } - return 1; + + return true; } -static int -fread_ok (struct sfm_reader *r, void *buffer, size_t byte_cnt) +/* Reads a compressed 8-byte string segment from R and stores it + in DST. + Returns true if successful, false if end of file is + reached immediately. */ +static bool +read_compressed_string (struct sfm_reader *r, uint8_t *dst) { - size_t read_bytes = fread (buffer, 1, byte_cnt, r->file); + switch (read_opcode (r)) + { + case -1: + case 252: + return false; + + case 253: + read_bytes (r, dst, 8); + break; + + case 254: + memset (dst, ' ', 8); + break; + + default: + sys_error (r, _("Compressed data is corrupt.")); + } + + return true; +} - if (read_bytes == byte_cnt) - return 1; +/* Reads LENGTH string bytes from R into S. + LENGTH must be a multiple of 8. + Reads compressed strings if S is compressed. + Returns true if successful, false if end of file is + reached immediately. */ +static bool +read_whole_strings (struct sfm_reader *r, uint8_t *s, size_t length) +{ + assert (length % 8 == 0); + if (!r->compressed) + return try_read_bytes (r, s, length); else { - if (ferror (r->file)) - { - msg (ME, _("%s: Reading system file: %s."), - fh_get_file_name (r->fh), strerror (errno)); - r->ok = false; - } - else if (read_bytes != 0) - { - msg (ME, _("%s: Partial record at end of system file."), - fh_get_file_name (r->fh)); - r->ok = false; - } - return 0; + size_t ofs; + for (ofs = 0; ofs < length; ofs += 8) + if (!read_compressed_string (r, s + ofs)) + { + if (ofs != 0) + partial_record (r); + return false; + } + return true; } } + +/* Skips LENGTH string bytes from R. + LENGTH must be a multiple of 8. + (LENGTH is also limited to 1024, but that's only because the + current caller never needs more than that many bytes.) + Returns true if successful, false if end of file is + reached immediately. */ +static bool +skip_whole_strings (struct sfm_reader *r, size_t length) +{ + uint8_t buffer[1024]; + assert (length < sizeof buffer); + return read_whole_strings (r, buffer, length); +} -/* Returns true if an I/O error has occurred on READER, false - otherwise. */ -bool -sfm_read_error (const struct sfm_reader *reader) +/* Creates and returns a table that can be used for translating a value + index into a case to a "struct variable *" for DICT. Multiple + system file fields reference variables this way. + + This table must be created before processing the very long + string extension record, because that record causes some + values to be deleted from the case and the dictionary to be + compacted. */ +static struct variable ** +make_var_by_value_idx (struct sfm_reader *r, struct dictionary *dict) { - return !reader->ok; + struct variable **var_by_value_idx; + int value_idx = 0; + int i; + + var_by_value_idx = pool_nmalloc (r->pool, + r->oct_cnt, sizeof *var_by_value_idx); + for (i = 0; i < dict_get_var_cnt (dict); i++) + { + struct variable *v = dict_get_var (dict, i); + int nv = var_is_numeric (v) ? 1 : DIV_RND_UP (var_get_width (v), 8); + int j; + + var_by_value_idx[value_idx++] = v; + for (j = 1; j < nv; j++) + var_by_value_idx[value_idx++] = NULL; + } + assert (value_idx == r->oct_cnt); + + return var_by_value_idx; } -/* Returns true if FILE is an SPSS system file, - false otherwise. */ -bool -sfm_detect (FILE *file) +/* Returns the "struct variable" corresponding to the given + 1-basd VALUE_IDX in VAR_BY_VALUE_IDX. Verifies that the index + is valid. */ +static struct variable * +lookup_var_by_value_idx (struct sfm_reader *r, + struct variable **var_by_value_idx, int value_idx) +{ + struct variable *var; + + if (value_idx < 1 || value_idx > r->oct_cnt) + sys_error (r, _("Variable index %d not in valid range 1...%d."), + value_idx, r->oct_cnt); + + var = var_by_value_idx[value_idx - 1]; + if (var == NULL) + sys_error (r, _("Variable index %d refers to long string " + "continuation."), + value_idx); + + return var; +} + +/* Returns the variable in D with the given SHORT_NAME, + or a null pointer if there is none. */ +static struct variable * +lookup_var_by_short_name (struct dictionary *d, const char *short_name) +{ + struct variable *var; + size_t var_cnt; + size_t i; + + /* First try looking up by full name. This often succeeds. */ + var = dict_lookup_var (d, short_name); + if (var != NULL && !strcasecmp (var_get_short_name (var, 0), short_name)) + return var; + + /* Iterate through the whole dictionary as a fallback. */ + var_cnt = dict_get_var_cnt (d); + for (i = 0; i < var_cnt; i++) + { + var = dict_get_var (d, i); + if (!strcasecmp (var_get_short_name (var, 0), short_name)) + return var; + } + + return NULL; +} + +/* Helpers for reading records that contain structured text + strings. */ + +/* Maximum number of warnings to issue for a single text + record. */ +#define MAX_TEXT_WARNINGS 5 + +/* State. */ +struct text_record + { + struct substring buffer; /* Record contents. */ + size_t pos; /* Current position in buffer. */ + int n_warnings; /* Number of warnings issued or suppressed. */ + }; + +/* Reads SIZE bytes into a text record for R, + and returns the new text record. */ +static struct text_record * +open_text_record (struct sfm_reader *r, size_t size) +{ + struct text_record *text = pool_alloc (r->pool, sizeof *text); + char *buffer = pool_malloc (r->pool, size + 1); + read_bytes (r, buffer, size); + text->buffer = ss_buffer (buffer, size); + text->pos = 0; + text->n_warnings = 0; + return text; +} + +/* Closes TEXT, frees its storage, and issues a final warning + about suppressed warnings if necesary. */ +static void +close_text_record (struct sfm_reader *r, struct text_record *text) +{ + if (text->n_warnings > MAX_TEXT_WARNINGS) + sys_warn (r, _("Suppressed %d additional related warnings."), + text->n_warnings - MAX_TEXT_WARNINGS); + pool_free (r->pool, ss_data (text->buffer)); +} + +/* Reads a variable=value pair from TEXT. + Looks up the variable in DICT and stores it into *VAR. + Stores a null-terminated value into *VALUE. */ +static bool +read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict, + struct text_record *text, + struct variable **var, char **value) +{ + for (;;) + { + if (!text_read_short_name (r, dict, text, ss_cstr ("="), var)) + return false; + + *value = text_get_token (text, ss_buffer ("\t\0", 2)); + if (*value == NULL) + return false; + + text->pos += ss_span (ss_substr (text->buffer, text->pos, SIZE_MAX), + ss_buffer ("\t\0", 2)); + + if (*var != NULL) + return true; + } +} + +static bool +text_read_short_name (struct sfm_reader *r, struct dictionary *dict, + struct text_record *text, struct substring delimiters, + struct variable **var) +{ + char *short_name = text_get_token (text, delimiters); + if (short_name == NULL) + return false; + + *var = lookup_var_by_short_name (dict, short_name); + if (*var == NULL) + text_warn (r, text, _("Variable map refers to unknown variable %s."), + short_name); + return true; +} + +/* Displays a warning for the current file position, limiting the + number to MAX_TEXT_WARNINGS for TEXT. */ +static void +text_warn (struct sfm_reader *r, struct text_record *text, + const char *format, ...) +{ + if (text->n_warnings++ < MAX_TEXT_WARNINGS) + { + va_list args; + + va_start (args, format); + sys_msg (r, MW, format, args); + va_end (args); + } +} + +static char * +text_get_token (struct text_record *text, struct substring delimiters) { - struct sysfile_header hdr; + struct substring token; - if (fread (&hdr, sizeof hdr, 1, file) != 1) + if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token)) + return NULL; + ss_data (token)[ss_length (token)] = '\0'; + return ss_data (token); +} + +static bool +text_match (struct text_record *text, char c) +{ + if (text->buffer.string[text->pos] == c) + { + text->pos++; + return true; + } + else return false; - if (strncmp ("$FL2", hdr.rec_type, 4)) +} + +/* Messages. */ + +/* Displays a corruption message. */ +static void +sys_msg (struct sfm_reader *r, int class, const char *format, va_list args) +{ + struct msg m; + struct string text; + + ds_init_empty (&text); + ds_put_format (&text, "\"%s\" near offset 0x%lx: ", + fh_get_file_name (r->fh), (unsigned long) ftell (r->file)); + ds_put_vformat (&text, format, args); + + m.category = msg_class_to_category (class); + m.severity = msg_class_to_severity (class); + m.where.file_name = NULL; + m.where.line_number = 0; + m.text = ds_cstr (&text); + + msg_emit (&m); +} + +/* Displays a warning for the current file position. */ +static void +sys_warn (struct sfm_reader *r, const char *format, ...) +{ + va_list args; + + va_start (args, format); + sys_msg (r, MW, format, args); + va_end (args); +} + +/* Displays an error for the current file position, + marks it as in an error state, + and aborts reading it using longjmp. */ +static void +sys_error (struct sfm_reader *r, const char *format, ...) +{ + va_list args; + + va_start (args, format); + sys_msg (r, ME, format, args); + va_end (args); + + r->error = true; + longjmp (r->bail_out, 1); +} + +/* Reads BYTE_CNT bytes into BUF. + Returns true if exactly BYTE_CNT bytes are successfully read. + Aborts if an I/O error or a partial read occurs. + If EOF_IS_OK, then an immediate end-of-file causes false to be + returned; otherwise, immediate end-of-file causes an abort + too. */ +static inline bool +read_bytes_internal (struct sfm_reader *r, bool eof_is_ok, + void *buf, size_t byte_cnt) +{ + size_t bytes_read = fread (buf, 1, byte_cnt, r->file); + if (bytes_read == byte_cnt) + return true; + else if (ferror (r->file)) + sys_error (r, _("System error: %s."), strerror (errno)); + else if (!eof_is_ok || bytes_read != 0) + sys_error (r, _("Unexpected end of file.")); + else return false; - return true; } +/* Reads BYTE_CNT into BUF. + Aborts upon I/O error or if end-of-file is encountered. */ +static void +read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt) +{ + read_bytes_internal (r, false, buf, byte_cnt); +} + +/* Reads BYTE_CNT bytes into BUF. + Returns true if exactly BYTE_CNT bytes are successfully read. + Returns false if an immediate end-of-file is encountered. + Aborts if an I/O error or a partial read occurs. */ +static bool +try_read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt) +{ + return read_bytes_internal (r, true, buf, byte_cnt); +} + +/* Reads a 32-bit signed integer from R and returns its value in + host format. */ +static int +read_int (struct sfm_reader *r) +{ + uint8_t integer[4]; + read_bytes (r, integer, sizeof integer); + return integer_get (r->integer_format, integer, sizeof integer); +} + +/* Reads a 64-bit floating-point number from R and returns its + value in host format. */ +static double +read_float (struct sfm_reader *r) +{ + uint8_t number[8]; + read_bytes (r, number, sizeof number); + return float_get_double (r->float_format, number); +} + +/* Reads exactly SIZE - 1 bytes into BUFFER + and stores a null byte into BUFFER[SIZE - 1]. */ +static void +read_string (struct sfm_reader *r, char *buffer, size_t size) +{ + assert (size > 0); + read_bytes (r, buffer, size - 1); + buffer[size - 1] = '\0'; +} + +/* Skips BYTES bytes forward in R. */ +static void +skip_bytes (struct sfm_reader *r, size_t bytes) +{ + while (bytes > 0) + { + char buffer[1024]; + size_t chunk = MIN (sizeof buffer, bytes); + read_bytes (r, buffer, chunk); + bytes -= chunk; + } +} + +static const struct casereader_class sys_file_casereader_class = + { + sys_file_casereader_read, + sys_file_casereader_destroy, + NULL, + NULL, + };