X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fsys-file-reader.c;h=84342369c37fd11a117f202d84aac2698aaee565;hb=a628323f67aa963f1e0ec866dc6cd9ede022de82;hp=de9ae0d655f831e7097f696581bca5164ca8be8d;hpb=9f4661992f4b481c6dafa6fd53c94ecfe7b3af8c;p=pspp-builds.git diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index de9ae0d6..84342369 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2007 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2010, 2011 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,8 +16,8 @@ #include -#include -#include +#include "data/sys-file-reader.h" +#include "data/sys-file-private.h" #include #include @@ -25,33 +25,36 @@ #include #include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "c-ctype.h" -#include "inttostr.h" -#include "minmax.h" -#include "unlocked-io.h" -#include "xalloc.h" -#include "xsize.h" +#include "data/attributes.h" +#include "data/case.h" +#include "data/casereader-provider.h" +#include "data/casereader.h" +#include "data/dictionary.h" +#include "data/file-handle-def.h" +#include "data/file-name.h" +#include "data/format.h" +#include "data/missing-values.h" +#include "data/mrset.h" +#include "data/short-names.h" +#include "data/value-labels.h" +#include "data/value.h" +#include "data/variable.h" +#include "libpspp/array.h" +#include "libpspp/assertion.h" +#include "libpspp/compiler.h" +#include "libpspp/i18n.h" +#include "libpspp/message.h" +#include "libpspp/misc.h" +#include "libpspp/pool.h" +#include "libpspp/str.h" +#include "libpspp/stringi-set.h" + +#include "gl/c-ctype.h" +#include "gl/inttostr.h" +#include "gl/minmax.h" +#include "gl/unlocked-io.h" +#include "gl/xalloc.h" +#include "gl/xsize.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -66,9 +69,10 @@ struct sfm_reader /* File state. */ struct file_handle *fh; /* File handle. */ + struct fh_lock *lock; /* Mutual exclusion for file handle. */ FILE *file; /* File stream. */ bool error; /* I/O or corruption error? */ - size_t value_cnt; /* Number of "union value"s in struct case. */ + struct caseproto *proto; /* Format of output cases. */ /* File format. */ enum integer_format integer_format; /* On-disk integer format. */ @@ -84,9 +88,10 @@ struct sfm_reader double bias; /* Compression bias, usually 100.0. */ uint8_t opcodes[8]; /* Current block of opcodes. */ size_t opcode_idx; /* Next opcode to interpret, 8 if none left. */ + bool corruption_warning; /* Warned about possible corruption? */ }; -static struct casereader_class sys_file_casereader_class; +static const struct casereader_class sys_file_casereader_class; static bool close_reader (struct sfm_reader *); @@ -95,10 +100,14 @@ static struct variable **make_var_by_value_idx (struct sfm_reader *, static struct variable *lookup_var_by_value_idx (struct sfm_reader *, struct variable **, int value_idx); +static struct variable *lookup_var_by_short_name (struct dictionary *, + const char *short_name); +static void sys_msg (struct sfm_reader *r, int class, + const char *format, va_list args) + PRINTF_FORMAT (3, 0); static void sys_warn (struct sfm_reader *, const char *, ...) PRINTF_FORMAT (2, 3); - static void sys_error (struct sfm_reader *, const char *, ...) PRINTF_FORMAT (2, 3) NO_RETURN; @@ -110,15 +119,30 @@ static double read_float (struct sfm_reader *); static void read_string (struct sfm_reader *, char *, size_t); static void skip_bytes (struct sfm_reader *, size_t); -static struct variable_to_value_map *open_variable_to_value_map ( - struct sfm_reader *, size_t size); -static void close_variable_to_value_map (struct sfm_reader *r, - struct variable_to_value_map *); -static bool read_variable_to_value_map (struct sfm_reader *, - struct dictionary *, - struct variable_to_value_map *, - struct variable **var, char **value, - int *warning_cnt); +static struct text_record *open_text_record (struct sfm_reader *, size_t size); +static void close_text_record (struct sfm_reader *r, + struct text_record *); +static bool read_variable_to_value_pair (struct sfm_reader *, + struct dictionary *, + struct text_record *, + struct variable **var, char **value); +static void text_warn (struct sfm_reader *r, struct text_record *text, + const char *format, ...) + PRINTF_FORMAT (3, 4); +static char *text_get_token (struct text_record *, + struct substring delimiters, char *delimiter); +static bool text_match (struct text_record *, char c); +static bool text_read_variable_name (struct sfm_reader *, struct dictionary *, + struct text_record *, + struct substring delimiters, + struct variable **); +static bool text_read_short_name (struct sfm_reader *, struct dictionary *, + struct text_record *, + struct substring delimiters, + struct variable **); +static const char *text_parse_counted_string (struct sfm_reader *, + struct text_record *); +static size_t text_pos (const struct text_record *); static bool close_reader (struct sfm_reader *r); @@ -149,9 +173,13 @@ static void read_extension_record (struct sfm_reader *, struct dictionary *, struct sfm_read_info *); static void read_machine_integer_info (struct sfm_reader *, size_t size, size_t count, - struct sfm_read_info *); + struct sfm_read_info *, + struct dictionary * + ); static void read_machine_float_info (struct sfm_reader *, size_t size, size_t count); +static void read_mrsets (struct sfm_reader *, size_t size, size_t count, + struct dictionary *); static void read_display_parameters (struct sfm_reader *, size_t size, size_t count, struct dictionary *); @@ -161,7 +189,71 @@ static void read_long_var_name_map (struct sfm_reader *, static void read_long_string_map (struct sfm_reader *, size_t size, size_t count, struct dictionary *); +static void read_data_file_attributes (struct sfm_reader *, + size_t size, size_t count, + struct dictionary *); +static void read_variable_attributes (struct sfm_reader *, + size_t size, size_t count, + struct dictionary *); +static void read_long_string_value_labels (struct sfm_reader *, + size_t size, size_t count, + struct dictionary *); + +/* Convert all the strings in DICT from the dict encoding to UTF8 */ +static void +recode_strings (struct dictionary *dict) +{ + int i; + const char *enc = dict_get_encoding (dict); + + if ( NULL == enc) + enc = get_default_encoding (); + + for (i = 0 ; i < dict_get_var_cnt (dict); ++i) + { + /* Convert the long variable name */ + struct variable *var = dict_get_var (dict, i); + const char *native_name = var_get_name (var); + char *utf8_name = recode_string (UTF8, enc, native_name, -1); + if ( 0 != strcmp (utf8_name, native_name)) + { + if ( NULL == dict_lookup_var (dict, utf8_name)) + dict_rename_var (dict, var, utf8_name); + else + msg (MW, + _("Recoded variable name duplicates an existing `%s' within system file."), utf8_name); + } + + free (utf8_name); + + /* Convert the variable label */ + if (var_has_label (var)) + { + char *utf8_label = recode_string (UTF8, enc, var_get_label (var), -1); + var_set_label (var, utf8_label); + free (utf8_label); + } + + if (var_has_value_labels (var)) + { + const struct val_lab *vl = NULL; + const struct val_labs *vlabs = var_get_value_labels (var); + + for (vl = val_labs_first (vlabs); vl != NULL; vl = val_labs_next (vlabs, vl)) + { + const union value *val = val_lab_get_value (vl); + const char *label = val_lab_get_label (vl); + char *new_label = NULL; + + new_label = recode_string (UTF8, enc, label, -1); + + var_replace_value_label (var, val, new_label); + free (new_label); + } + } + } +} /* Opens the system file designated by file handle FH for reading. Reads the system file's dictionary into *DICT. @@ -179,19 +271,32 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, int claimed_oct_cnt; int rec_type; - if (!fh_open (fh, FH_REF_FILE, "system file", "rs")) - return NULL; - *dict = dict_create (); /* Create and initialize reader. */ r = pool_create_container (struct sfm_reader, pool); - r->fh = fh; - r->file = fn_open (fh_get_file_name (fh), "rb"); + r->fh = fh_ref (fh); + r->lock = NULL; + r->file = NULL; r->error = false; r->oct_cnt = 0; r->has_long_var_names = false; r->opcode_idx = sizeof r->opcodes; + r->corruption_warning = false; + + /* TRANSLATORS: this fragment will be interpolated into + messages in fh_lock() that identify types of files. */ + r->lock = fh_lock (fh, FH_REF_FILE, N_("system file"), FH_ACC_READ, false); + if (r->lock == NULL) + goto error; + + r->file = fn_open (fh_get_file_name (fh), "rb"); + if (r->file == NULL) + { + msg (ME, _("Error opening `%s' for reading as a system file: %s."), + fh_get_file_name (r->fh), strerror (errno)); + goto error; + } /* Initialize info. */ if (info == NULL) @@ -199,19 +304,8 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, memset (info, 0, sizeof *info); if (setjmp (r->bail_out)) - { - close_reader (r); - dict_destroy (*dict); - *dict = NULL; - return NULL; - } + goto error; - if (r->file == NULL) - { - msg (ME, _("Error opening \"%s\" for reading as a system file: %s."), - fh_get_file_name (r->fh), strerror (errno)); - longjmp (r->bail_out, 1); - } /* Read header. */ read_header (r, *dict, &weight_idx, &claimed_oct_cnt, info); @@ -261,8 +355,8 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, for (i = 0; i < dict_get_var_cnt (*dict); i++) { struct variable *var = dict_get_var (*dict, i); - char short_name [SHORT_NAME_LEN + 1]; - char long_name [SHORT_NAME_LEN + 1]; + char short_name[SHORT_NAME_LEN + 1]; + char long_name[SHORT_NAME_LEN + 1]; strcpy (short_name, var_get_name (var)); @@ -279,6 +373,8 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, r->has_long_var_names = true; } + recode_strings (*dict); + /* Read record 999 data, which is just filler. */ read_int (r); @@ -298,13 +394,19 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, dictionary and may destroy or modify its variables. */ sfm_dictionary_to_sfm_vars (*dict, &r->sfm_vars, &r->sfm_var_cnt); pool_register (r->pool, free, r->sfm_vars); + r->proto = caseproto_ref_pool (dict_get_proto (*dict), r->pool); pool_free (r->pool, var_by_value_idx); - r->value_cnt = dict_get_next_value_idx (*dict); return casereader_create_sequential - (NULL, r->value_cnt, + (NULL, r->proto, r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt, &sys_file_casereader_class, r); + +error: + close_reader (r); + dict_destroy (*dict); + *dict = NULL; + return NULL; } /* Closes a system file after we're done with it. @@ -322,15 +424,15 @@ close_reader (struct sfm_reader *r) { if (fn_close (fh_get_file_name (r->fh), r->file) == EOF) { - msg (ME, _("Error closing system file \"%s\": %s."), + msg (ME, _("Error closing system file `%s': %s."), fh_get_file_name (r->fh), strerror (errno)); r->error = true; } r->file = NULL; } - if (r->fh != NULL) - fh_close (r->fh, "system file", "rs"); + fh_unlock (r->lock); + fh_unref (r->fh); error = r->error; pool_destroy (r->pool); @@ -411,15 +513,25 @@ read_header (struct sfm_reader *r, struct dictionary *dict, if ( r->case_cnt > INT_MAX / 2) r->case_cnt = -1; - /* Identify floating-point format and obtain compression bias. */ read_bytes (r, raw_bias, sizeof raw_bias); if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0) { - sys_warn (r, _("Compression bias (%g) is not the usual " - "value of 100, or system file uses unrecognized " - "floating-point format."), - r->bias); + uint8_t zero_bias[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + + if (memcmp (raw_bias, zero_bias, 8)) + sys_warn (r, _("Compression bias is not the usual " + "value of 100, or system file uses unrecognized " + "floating-point format.")); + else + { + /* Some software is known to write all-zeros to this + field. Such software also writes floating-point + numbers in the format that we expect by default + (it seems that all software most likely does, in + reality), so don't warn in this case. */ + } + if (r->integer_format == INTEGER_MSB_FIRST) r->float_format = FLOAT_IEEE_DOUBLE_BE; else @@ -482,14 +594,14 @@ read_variable_record (struct sfm_reader *r, struct dictionary *dict, /* Check variable name. */ if (name[0] == '$' || name[0] == '#') - sys_error (r, "Variable name begins with invalid character `%c'.", + sys_error (r, _("Variable name begins with invalid character `%c'."), name[0]); if (!var_is_plausible_name (name, false)) sys_error (r, _("Invalid variable name `%s'."), name); /* Create variable. */ if (width < 0 || width > 255) - sys_error (r, _("Bad variable width %d."), width); + sys_error (r, _("Bad width %d for variable %s."), width, name); var = dict_create_var (dict, name, width); if (var == NULL) sys_error (r, @@ -504,16 +616,20 @@ read_variable_record (struct sfm_reader *r, struct dictionary *dict, sys_error (r, _("Variable label indicator field is not 0 or 1.")); if (has_variable_label == 1) { - size_t len; + size_t len, read_len; char label[255 + 1]; len = read_int (r); - if (len >= sizeof label) - sys_error (r, _("Variable %s has label of invalid length %zu."), - name, len); - read_string (r, label, len + 1); + + /* Read up to 255 bytes of label. */ + read_len = MIN (sizeof label - 1, len); + read_string (r, label, read_len + 1); var_set_label (var, label); + /* Skip unread label bytes. */ + skip_bytes (r, len - read_len); + + /* Skip label padding up to multiple of 4 bytes. */ skip_bytes (r, ROUND_UP (len, 4) - len); } @@ -523,7 +639,7 @@ read_variable_record (struct sfm_reader *r, struct dictionary *dict, struct missing_values mv; int i; - mv_init (&mv, var_get_width (var)); + mv_init_pool (r->pool, &mv, var_get_width (var)); if (var_is_numeric (var)) { if (missing_value_code < -3 || missing_value_code > 3 @@ -534,27 +650,31 @@ read_variable_record (struct sfm_reader *r, struct dictionary *dict, { double low = read_float (r); double high = read_float (r); - mv_add_num_range (&mv, low, high); + mv_add_range (&mv, low, high); missing_value_code = -missing_value_code - 2; } for (i = 0; i < missing_value_code; i++) mv_add_num (&mv, read_float (r)); } - else if (var_get_width (var) <= MAX_SHORT_STRING) + else { + int mv_width = MAX (width, 8); + union value value; + if (missing_value_code < 1 || missing_value_code > 3) sys_error (r, _("String missing value indicator field is not " "0, 1, 2, or 3.")); + + value_init (&value, mv_width); + value_set_missing (&value, mv_width); for (i = 0; i < missing_value_code; i++) { - char string[9]; - read_string (r, string, sizeof string); - mv_add_str (&mv, string); + uint8_t *s = value_str_rw (&value, mv_width); + read_bytes (r, s, 8); + mv_add_str (&mv, s); } + value_destroy (&value, mv_width); } - else - sys_error (r, _("Long string variable %s may not have missing " - "values."), name); var_set_missing_values (var, &mv); } @@ -623,7 +743,7 @@ parse_format_spec (struct sfm_reader *r, unsigned int s, else var_set_write_format (v, &f); } - else if (*++format_warning_cnt <= max_format_warnings) + else if (++*format_warning_cnt <= max_format_warnings) { char fmt_string[FMT_STRING_LEN_MAX + 1]; sys_warn (r, _("%s variable %s has invalid %s format %s."), @@ -651,7 +771,9 @@ setup_weight (struct sfm_reader *r, int weight_idx, if (var_is_numeric (weight_var)) dict_set_weight (dict, weight_var); else - sys_error (r, _("Weighting variable must be numeric.")); + sys_error (r, _("Weighting variable must be numeric " + "(not string variable `%s')."), + var_get_name (weight_var)); } } @@ -700,7 +822,7 @@ read_extension_record (struct sfm_reader *r, struct dictionary *dict, switch (subtype) { case 3: - read_machine_integer_info (r, size, count, info); + read_machine_integer_info (r, size, count, info, dict); return; case 4: @@ -719,7 +841,12 @@ read_extension_record (struct sfm_reader *r, struct dictionary *dict, break; case 7: - /* Unknown purpose. */ + case 19: + read_mrsets (r, size, count, dict); + return; + + case 8: + /* Used by the SPSS Data Entry software. */ break; case 11: @@ -735,16 +862,38 @@ read_extension_record (struct sfm_reader *r, struct dictionary *dict, return; case 16: - /* New in SPSS v14? Unknown purpose. */ + /* Extended number of cases. Not important. */ break; case 17: - /* Text field that defines variable attributes. New in - SPSS 14. */ - break; + read_data_file_attributes (r, size, count, dict); + return; + + case 18: + read_variable_attributes (r, size, count, dict); + return; + + case 20: + /* New in SPSS 16. Contains a single string that describes + the character encoding, e.g. "windows-1252". */ + { + char *encoding = pool_calloc (r->pool, size, count + 1); + read_string (r, encoding, count + 1); + dict_set_encoding (dict, encoding); + return; + } + + case 21: + /* New in SPSS 16. Encodes value labels for long string + variables. */ + read_long_string_value_labels (r, size, count, dict); + return; default: - sys_warn (r, _("Unrecognized record type 7, subtype %d."), subtype); + sys_warn (r, _("Unrecognized record type 7, subtype %d. Please send " + "a copy of this file, and the syntax which created it " + "to %s."), + subtype, PACKAGE_BUGREPORT); break; } @@ -754,7 +903,8 @@ read_extension_record (struct sfm_reader *r, struct dictionary *dict, /* Read record type 7, subtype 3. */ static void read_machine_integer_info (struct sfm_reader *r, size_t size, size_t count, - struct sfm_read_info *info) + struct sfm_read_info *info, + struct dictionary *dict) { int version_major = read_int (r); int version_minor = read_int (r); @@ -763,7 +913,7 @@ read_machine_integer_info (struct sfm_reader *r, size_t size, size_t count, int float_representation = read_int (r); int compression_code UNUSED = read_int (r); int integer_representation = read_int (r); - int character_code UNUSED = read_int (r); + int character_code = read_int (r); int expected_float_format; int expected_integer_format; @@ -791,7 +941,7 @@ read_machine_integer_info (struct sfm_reader *r, size_t size, size_t count, if (float_representation != expected_float_format) sys_error (r, _("Floating-point representation indicated by " "system file (%d) differs from expected (%d)."), - r->float_format, expected_float_format); + float_representation, expected_float_format); /* Check integer format. */ if (r->integer_format == INTEGER_MSB_FIRST) @@ -801,12 +951,48 @@ read_machine_integer_info (struct sfm_reader *r, size_t size, size_t count, else NOT_REACHED (); if (integer_representation != expected_integer_format) + sys_warn (r, _("Integer format indicated by system file (%d) " + "differs from expected (%d)."), + integer_representation, expected_integer_format); + + /* + Record 7 (20) provides a much more reliable way of + setting the encoding. + The character_code is used as a fallback only. + */ + if ( NULL == dict_get_encoding (dict)) { - static const char *endian[] = {N_("little-endian"), N_("big-endian")}; - sys_warn (r, _("Integer format indicated by system file (%s) " - "differs from expected (%s)."), - gettext (endian[integer_representation == 1]), - gettext (endian[expected_integer_format == 1])); + switch (character_code) + { + case 1: + dict_set_encoding (dict, "EBCDIC-US"); + break; + case 2: + case 3: + /* These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic] + respectively. However, there are known to be many files + in the wild with character code 2, yet have data which are + clearly not ascii. + Therefore we ignore these values. + */ + return; + case 4: + dict_set_encoding (dict, "MS_KANJI"); + break; + case 65000: + dict_set_encoding (dict, "UTF-7"); + break; + case 65001: + dict_set_encoding (dict, "UTF-8"); + break; + default: + { + char enc[100]; + snprintf (enc, 100, "CP%d", character_code); + dict_set_encoding (dict, enc); + } + break; + }; } } @@ -823,11 +1009,178 @@ read_machine_float_info (struct sfm_reader *r, size_t size, size_t count) size, count); if (sysmis != SYSMIS) - sys_warn (r, _("File specifies unexpected value %g as SYSMIS."), sysmis); + sys_warn (r, _("File specifies unexpected value %g as %s."), + sysmis, "SYSMIS"); + if (highest != HIGHEST) - sys_warn (r, _("File specifies unexpected value %g as HIGHEST."), highest); + sys_warn (r, _("File specifies unexpected value %g as %s."), + highest, "HIGHEST"); + if (lowest != LOWEST) - sys_warn (r, _("File specifies unexpected value %g as LOWEST."), lowest); + sys_warn (r, _("File specifies unexpected value %g as %s."), + lowest, "LOWEST"); +} + +/* Read record type 7, subtype 7 or 19. */ +static void +read_mrsets (struct sfm_reader *r, size_t size, size_t count, + struct dictionary *dict) +{ + struct text_record *text; + struct mrset *mrset; + + text = open_text_record (r, size * count); + for (;;) + { + const char *name, *label, *counted; + struct stringi_set var_names; + size_t allocated_vars; + char delimiter; + int width; + + mrset = xzalloc (sizeof *mrset); + + name = text_get_token (text, ss_cstr ("="), NULL); + if (name == NULL) + break; + mrset->name = xstrdup (name); + + if (mrset->name[0] != '$') + { + sys_warn (r, _("`%s' does not begin with `$' at offset %zu " + "in MRSETS record."), mrset->name, text_pos (text)); + break; + } + + if (text_match (text, 'C')) + { + mrset->type = MRSET_MC; + if (!text_match (text, ' ')) + { + sys_warn (r, _("Missing space following `%c' at offset %zu " + "in MRSETS record."), 'C', text_pos (text)); + break; + } + } + else if (text_match (text, 'D')) + { + mrset->type = MRSET_MD; + mrset->cat_source = MRSET_VARLABELS; + } + else if (text_match (text, 'E')) + { + char *number; + + mrset->type = MRSET_MD; + mrset->cat_source = MRSET_COUNTEDVALUES; + if (!text_match (text, ' ')) + { + sys_warn (r, _("Missing space following `%c' at offset %zu " + "in MRSETS record."), 'E', text_pos (text)); + break; + } + + number = text_get_token (text, ss_cstr (" "), NULL); + if (!strcmp (number, "11")) + mrset->label_from_var_label = true; + else if (strcmp (number, "1")) + sys_warn (r, _("Unexpected label source value `%s' " + "following `E' at offset %zu in MRSETS record."), + number, text_pos (text)); + } + else + { + sys_warn (r, _("Missing `C', `D', or `E' at offset %zu " + "in MRSETS record."), + text_pos (text)); + break; + } + + if (mrset->type == MRSET_MD) + { + counted = text_parse_counted_string (r, text); + if (counted == NULL) + break; + } + + label = text_parse_counted_string (r, text); + if (label == NULL) + break; + mrset->label = label[0] != '\0' ? xstrdup (label) : NULL; + + stringi_set_init (&var_names); + allocated_vars = 0; + width = INT_MAX; + do + { + struct variable *var; + const char *var_name; + + var_name = text_get_token (text, ss_cstr (" \n"), &delimiter); + if (var_name == NULL) + { + sys_warn (r, _("Missing new-line parsing variable names " + "at offset %zu in MRSETS record."), + text_pos (text)); + break; + } + + var = lookup_var_by_short_name (dict, var_name); + if (var == NULL) + continue; + if (!stringi_set_insert (&var_names, var_name)) + { + sys_warn (r, _("Duplicate variable name %s " + "at offset %zu in MRSETS record."), + var_name, text_pos (text)); + continue; + } + + if (mrset->label == NULL && mrset->label_from_var_label + && var_has_label (var)) + mrset->label = xstrdup (var_get_label (var)); + + if (mrset->n_vars + && var_get_type (var) != var_get_type (mrset->vars[0])) + { + sys_warn (r, _("MRSET %s contains both string and " + "numeric variables."), name); + continue; + } + width = MIN (width, var_get_width (var)); + + if (mrset->n_vars >= allocated_vars) + mrset->vars = x2nrealloc (mrset->vars, &allocated_vars, + sizeof *mrset->vars); + mrset->vars[mrset->n_vars++] = var; + } + while (delimiter != '\n'); + + if (mrset->n_vars < 2) + { + sys_warn (r, _("MRSET %s has only %zu variables."), mrset->name, + mrset->n_vars); + mrset_destroy (mrset); + continue; + } + + if (mrset->type == MRSET_MD) + { + mrset->width = width; + value_init (&mrset->counted, width); + if (width == 0) + mrset->counted.f = strtod (counted, NULL); + else + value_copy_str_rpad (&mrset->counted, width, + (const uint8_t *) counted, ' '); + } + + dict_add_mrset (dict, mrset); + mrset = NULL; + stringi_set_destroy (&var_names); + } + mrset_destroy (mrset); + close_text_record (r, text); } /* Read record type 7, subtype 11, which specifies how variables @@ -836,19 +1189,36 @@ static void read_display_parameters (struct sfm_reader *r, size_t size, size_t count, struct dictionary *dict) { - const size_t n_vars = count / 3 ; + size_t n_vars; + bool includes_width; bool warned = false; - int i; + size_t i; - if (count % 3 || n_vars != dict_get_var_cnt (dict)) - sys_error (r, _("Bad size (%zu) or count (%zu) on extension 11."), - size, count); + if (size != 4) + { + sys_warn (r, _("Bad size %zu on extension 11."), size); + skip_bytes (r, size * count); + return; + } + + n_vars = dict_get_var_cnt (dict); + if (count == 3 * n_vars) + includes_width = true; + else if (count == 2 * n_vars) + includes_width = false; + else + { + sys_warn (r, _("Extension 11 has bad count %zu (for %zu variables)."), + count, n_vars); + skip_bytes (r, size * count); + return; + } for (i = 0; i < n_vars; ++i) { struct variable *v = dict_get_var (dict, i); int measure = read_int (r); - int width = read_int (r); + int width = includes_width ? read_int (r) : 0; int align = read_int (r); /* SPSS 14 sometimes seems to set string variables' measure @@ -856,16 +1226,13 @@ read_display_parameters (struct sfm_reader *r, size_t size, size_t count, if (0 == measure && var_is_alpha (v)) measure = 1; - /* Older versions (SPSS 9.0) sometimes set the display width - to zero. This causes confusion especially in the GUI */ - if (0 == width) - width = 8; - if (measure < 1 || measure > 3 || align < 0 || align > 2) { if (!warned) - sys_warn (r, _("Invalid variable display parameters. " - "Default parameters substituted.")); + sys_warn (r, _("Invalid variable display parameters " + "for variable %zu (%s). " + "Default parameters substituted."), + i, var_get_name (v)); warned = true; continue; } @@ -873,10 +1240,15 @@ read_display_parameters (struct sfm_reader *r, size_t size, size_t count, var_set_measure (v, (measure == 1 ? MEASURE_NOMINAL : measure == 2 ? MEASURE_ORDINAL : MEASURE_SCALE)); - var_set_display_width (v, width); var_set_alignment (v, (align == 0 ? ALIGN_LEFT : align == 1 ? ALIGN_RIGHT : ALIGN_CENTRE)); + + /* Older versions (SPSS 9.0) sometimes set the display + width to zero. This causes confusion in the GUI, so + only set the width if it is nonzero. */ + if (width > 0) + var_set_display_width (v, width); } } @@ -887,14 +1259,12 @@ static void read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count, struct dictionary *dict) { - struct variable_to_value_map *map; + struct text_record *text; struct variable *var; char *long_name; - int warning_cnt = 0; - map = open_variable_to_value_map (r, size * count); - while (read_variable_to_value_map (r, dict, map, &var, &long_name, - &warning_cnt)) + text = open_text_record (r, size * count); + while (read_variable_to_value_pair (r, dict, text, &var, &long_name)) { char **short_names; size_t short_name_cnt; @@ -940,7 +1310,7 @@ read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count, } free (short_names); } - close_variable_to_value_map (r, map); + close_text_record (r, text); r->has_long_var_names = true; } @@ -950,14 +1320,12 @@ static void read_long_string_map (struct sfm_reader *r, size_t size, size_t count, struct dictionary *dict) { - struct variable_to_value_map *map; + struct text_record *text; struct variable *var; char *length_s; - int warning_cnt = 0; - map = open_variable_to_value_map (r, size * count); - while (read_variable_to_value_map (r, dict, map, &var, &length_s, - &warning_cnt)) + text = open_text_record (r, size * count); + while (read_variable_to_value_pair (r, dict, text, &var, &length_s)) { size_t idx = var_get_dict_index (var); long int length; @@ -969,7 +1337,7 @@ read_long_string_map (struct sfm_reader *r, size_t size, size_t count, if (length < 1 || length > MAX_STRING) { sys_warn (r, _("%s listed as string of invalid length %s " - "in very length string record."), + "in very long string record."), var_get_name (var), length_s); continue; } @@ -999,13 +1367,13 @@ read_long_string_map (struct sfm_reader *r, size_t size, size_t count, var_set_short_name (var, i, var_get_short_name (seg, 0)); if (ROUND_UP (width, 8) != ROUND_UP (alloc_width, 8)) sys_error (r, _("Very long string with width %ld has segment %d " - "of width %d (expected %d)"), + "of width %d (expected %d)."), length, i, width, alloc_width); } dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1); var_set_width (var, length); } - close_variable_to_value_map (r, map); + close_text_record (r, text); dict_compact_values (dict); } @@ -1019,7 +1387,7 @@ read_value_labels (struct sfm_reader *r, struct label { - char raw_value[8]; /* Value as uninterpreted bytes. */ + uint8_t raw_value[8]; /* Value as uninterpreted bytes. */ union value value; /* Value. */ char *label; /* Null-terminated label string. */ }; @@ -1029,6 +1397,7 @@ read_value_labels (struct sfm_reader *r, struct variable **var = NULL; /* Associated variables. */ int var_cnt; /* Number of associated variables. */ + int max_width; /* Maximum width of string variables. */ int i; @@ -1087,12 +1456,15 @@ read_value_labels (struct sfm_reader *r, /* Read the list of variables. */ var = pool_nalloc (subpool, var_cnt, sizeof *var); + max_width = 0; for (i = 0; i < var_cnt; i++) { var[i] = lookup_var_by_value_idx (r, var_by_value_idx, read_int (r)); - if (var_is_long_string (var[i])) - sys_error (r, _("Value labels are not allowed on long string " - "variables (%s)."), var_get_name (var[i])); + if (var_get_width (var[i]) > 8) + sys_error (r, _("Value labels may not be added to long string " + "variables (e.g. %s) using records types 3 and 4."), + var_get_name (var[i])); + max_width = MAX (max_width, var_get_width (var[i])); } /* Type check the variables. */ @@ -1111,9 +1483,10 @@ read_value_labels (struct sfm_reader *r, { struct label *label = labels + i; + value_init_pool (subpool, &label->value, max_width); if (var_is_alpha (var[0])) - buf_copy_rpad (label->value.s, sizeof label->value.s, - label->raw_value, sizeof label->raw_value); + u8_buf_copy_rpad (value_str_rw (&label->value, max_width), max_width, + label->raw_value, sizeof label->raw_value, ' '); else label->value.f = float_get_double (r->float_format, label->raw_value); } @@ -1134,8 +1507,8 @@ read_value_labels (struct sfm_reader *r, sys_warn (r, _("Duplicate value label for %g on %s."), label->value.f, var_get_name (v)); else - sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."), - var_get_width (v), label->value.s, + sys_warn (r, _("Duplicate value label for `%.*s' on %s."), + max_width, value_str (&label->value, max_width), var_get_name (v)); } } @@ -1143,6 +1516,203 @@ read_value_labels (struct sfm_reader *r, pool_destroy (subpool); } + +/* Reads a set of custom attributes from TEXT into ATTRS. + ATTRS may be a null pointer, in which case the attributes are + read but discarded. */ +static void +read_attributes (struct sfm_reader *r, struct text_record *text, + struct attrset *attrs) +{ + do + { + struct attribute *attr; + char *key; + int index; + + /* Parse the key. */ + key = text_get_token (text, ss_cstr ("("), NULL); + if (key == NULL) + return; + + attr = attribute_create (key); + for (index = 1; ; index++) + { + /* Parse the value. */ + char *value; + size_t length; + + value = text_get_token (text, ss_cstr ("\n"), NULL); + if (value == NULL) + { + text_warn (r, text, _("Error parsing attribute value %s[%d]."), + key, index); + break; + } + + length = strlen (value); + if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'') + { + value[length - 1] = '\0'; + attribute_add_value (attr, value + 1); + } + else + { + text_warn (r, text, + _("Attribute value %s[%d] is not quoted: %s."), + key, index, value); + attribute_add_value (attr, value); + } + + /* Was this the last value for this attribute? */ + if (text_match (text, ')')) + break; + } + if (attrs != NULL) + attrset_add (attrs, attr); + else + attribute_destroy (attr); + } + while (!text_match (text, '/')); +} + +/* Reads record type 7, subtype 17, which lists custom + attributes on the data file. */ +static void +read_data_file_attributes (struct sfm_reader *r, + size_t size, size_t count, + struct dictionary *dict) +{ + struct text_record *text = open_text_record (r, size * count); + read_attributes (r, text, dict_get_attributes (dict)); + close_text_record (r, text); +} + +static void +skip_long_string_value_labels (struct sfm_reader *r, size_t n_labels) +{ + size_t i; + + for (i = 0; i < n_labels; i++) + { + size_t value_length, label_length; + + value_length = read_int (r); + skip_bytes (r, value_length); + label_length = read_int (r); + skip_bytes (r, label_length); + } +} + +static void +read_long_string_value_labels (struct sfm_reader *r, + size_t size, size_t count, + struct dictionary *d) +{ + const off_t start = ftello (r->file); + while (ftello (r->file) - start < size * count) + { + char var_name[VAR_NAME_LEN + 1]; + size_t n_labels, i; + struct variable *v; + union value value; + int var_name_len; + int width; + + /* Read header. */ + var_name_len = read_int (r); + if (var_name_len > VAR_NAME_LEN) + sys_error (r, _("Variable name length in long string value label " + "record (%d) exceeds %d-byte limit."), + var_name_len, VAR_NAME_LEN); + read_string (r, var_name, var_name_len + 1); + width = read_int (r); + n_labels = read_int (r); + + v = dict_lookup_var (d, var_name); + if (v == NULL) + { + sys_warn (r, _("Ignoring long string value record for " + "unknown variable %s."), var_name); + skip_long_string_value_labels (r, n_labels); + continue; + } + if (var_is_numeric (v)) + { + sys_warn (r, _("Ignoring long string value record for " + "numeric variable %s."), var_name); + skip_long_string_value_labels (r, n_labels); + continue; + } + if (width != var_get_width (v)) + { + sys_warn (r, _("Ignoring long string value record for variable %s " + "because the record's width (%d) does not match the " + "variable's width (%d)."), + var_name, width, var_get_width (v)); + skip_long_string_value_labels (r, n_labels); + continue; + } + + /* Read values. */ + value_init_pool (r->pool, &value, width); + for (i = 0; i < n_labels; i++) + { + size_t value_length, label_length; + char label[256]; + bool skip = false; + + /* Read value. */ + value_length = read_int (r); + if (value_length == width) + read_bytes (r, value_str_rw (&value, width), width); + else + { + sys_warn (r, _("Ignoring long string value %zu for variable %s, " + "with width %d, that has bad value width %zu."), + i, var_get_name (v), width, value_length); + skip_bytes (r, value_length); + skip = true; + } + + /* Read label. */ + label_length = read_int (r); + read_string (r, label, MIN (sizeof label, label_length + 1)); + if (label_length >= sizeof label) + { + /* Skip and silently ignore label text after the + first 255 bytes. The maximum documented length + of a label is 120 bytes so this is more than + generous. */ + skip_bytes (r, (label_length + 1) - sizeof label); + } + + if (!skip && !var_add_value_label (v, &value, label)) + sys_warn (r, _("Duplicate value label for `%.*s' on %s."), + width, value_str (&value, width), var_get_name (v)); + } + } +} + + +/* Reads record type 7, subtype 18, which lists custom + attributes on individual variables. */ +static void +read_variable_attributes (struct sfm_reader *r, + size_t size, size_t count, + struct dictionary *dict) +{ + struct text_record *text = open_text_record (r, size * count); + for (;;) + { + struct variable *var; + if (!text_read_variable_name (r, dict, text, ss_cstr (":"), &var)) + break; + read_attributes (r, text, var != NULL ? var_get_attributes (var) : NULL); + } + close_text_record (r, text); +} + /* Case reader. */ @@ -1152,31 +1722,31 @@ static void partial_record (struct sfm_reader *r) static void read_error (struct casereader *, const struct sfm_reader *); static bool read_case_number (struct sfm_reader *, double *); -static bool read_case_string (struct sfm_reader *, char *, size_t); +static bool read_case_string (struct sfm_reader *, uint8_t *, size_t); static int read_opcode (struct sfm_reader *); static bool read_compressed_number (struct sfm_reader *, double *); -static bool read_compressed_string (struct sfm_reader *, char *); -static bool read_whole_strings (struct sfm_reader *, char *, size_t); +static bool read_compressed_string (struct sfm_reader *, uint8_t *); +static bool read_whole_strings (struct sfm_reader *, uint8_t *, size_t); static bool skip_whole_strings (struct sfm_reader *, size_t); -/* Reads one case from READER's file into C. Returns true only - if successful. */ -static bool -sys_file_casereader_read (struct casereader *reader, void *r_, - struct ccase *c) +/* Reads and returns one case from READER's file. Returns a null + pointer if not successful. */ +static struct ccase * +sys_file_casereader_read (struct casereader *reader, void *r_) { struct sfm_reader *r = r_; + struct ccase *volatile c; int i; if (r->error) - return false; + return NULL; - case_create (c, r->value_cnt); + c = case_create (r->proto); if (setjmp (r->bail_out)) { casereader_force_error (reader); - case_destroy (c); - return false; + case_unref (c); + return NULL; } for (i = 0; i < r->sfm_var_cnt; i++) @@ -1184,28 +1754,29 @@ sys_file_casereader_read (struct casereader *reader, void *r_, struct sfm_var *sv = &r->sfm_vars[i]; union value *v = case_data_rw_idx (c, sv->case_index); - if (sv->width == 0) + if (sv->var_width == 0) { if (!read_case_number (r, &v->f)) goto eof; } else { - if (!read_case_string (r, v->s + sv->offset, sv->width)) + uint8_t *s = value_str_rw (v, sv->var_width); + if (!read_case_string (r, s + sv->offset, sv->segment_width)) goto eof; if (!skip_whole_strings (r, ROUND_DOWN (sv->padding, 8))) partial_record (r); } } - return true; + return c; eof: - case_destroy (c); if (i != 0) partial_record (r); if (r->case_cnt != -1) read_error (reader, r); - return false; + case_unref (c); + return NULL; } /* Issues an error that R ends in a partial record. */ @@ -1252,7 +1823,7 @@ read_case_number (struct sfm_reader *r, double *d) Returns true if successful, false if end of file is reached immediately. */ static bool -read_case_string (struct sfm_reader *r, char *s, size_t length) +read_case_string (struct sfm_reader *r, uint8_t *s, size_t length) { size_t whole = ROUND_DOWN (length, 8); size_t partial = length % 8; @@ -1265,7 +1836,7 @@ read_case_string (struct sfm_reader *r, char *s, size_t length) if (partial) { - char bounce[8]; + uint8_t bounce[8]; if (!read_whole_strings (r, bounce, sizeof bounce)) { if (whole) @@ -1317,7 +1888,14 @@ read_compressed_number (struct sfm_reader *r, double *d) break; case 254: - sys_error (r, _("Compressed data is corrupt.")); + float_convert (r->float_format, " ", FLOAT_NATIVE_DOUBLE, d); + if (!r->corruption_warning) + { + r->corruption_warning = true; + sys_warn (r, _("Possible compressed data corruption: " + "compressed spaces appear in numeric field.")); + } + break; case 255: *d = SYSMIS; @@ -1336,9 +1914,10 @@ read_compressed_number (struct sfm_reader *r, double *d) Returns true if successful, false if end of file is reached immediately. */ static bool -read_compressed_string (struct sfm_reader *r, char *dst) +read_compressed_string (struct sfm_reader *r, uint8_t *dst) { - switch (read_opcode (r)) + int opcode = read_opcode (r); + switch (opcode) { case -1: case 252: @@ -1353,7 +1932,25 @@ read_compressed_string (struct sfm_reader *r, char *dst) break; default: - sys_error (r, _("Compressed data is corrupt.")); + { + double value = opcode - r->bias; + float_convert (FLOAT_NATIVE_DOUBLE, &value, r->float_format, dst); + if (value == 0.0) + { + /* This has actually been seen "in the wild". The submitter of the + file that showed that the contents decoded as spaces, but they + were at the end of the field so it's possible that the null + bytes just acted as null terminators. */ + } + else if (!r->corruption_warning) + { + r->corruption_warning = true; + sys_warn (r, _("Possible compressed data corruption: " + "string contains compressed integer (opcode %d)."), + opcode); + } + } + break; } return true; @@ -1365,7 +1962,7 @@ read_compressed_string (struct sfm_reader *r, char *dst) Returns true if successful, false if end of file is reached immediately. */ static bool -read_whole_strings (struct sfm_reader *r, char *s, size_t length) +read_whole_strings (struct sfm_reader *r, uint8_t *s, size_t length) { assert (length % 8 == 0); if (!r->compressed) @@ -1393,7 +1990,7 @@ read_whole_strings (struct sfm_reader *r, char *s, size_t length) static bool skip_whole_strings (struct sfm_reader *r, size_t length) { - char buffer[1024]; + uint8_t buffer[1024]; assert (length < sizeof buffer); return read_whole_strings (r, buffer, length); } @@ -1478,82 +2075,214 @@ lookup_var_by_short_name (struct dictionary *d, const char *short_name) return NULL; } -/* Helpers for reading records that contain "variable=value" - pairs. */ +/* Helpers for reading records that contain structured text + strings. */ + +/* Maximum number of warnings to issue for a single text + record. */ +#define MAX_TEXT_WARNINGS 5 /* State. */ -struct variable_to_value_map +struct text_record { struct substring buffer; /* Record contents. */ size_t pos; /* Current position in buffer. */ + int n_warnings; /* Number of warnings issued or suppressed. */ }; -/* Reads SIZE bytes into a "variable=value" map for R, - and returns the map. */ -static struct variable_to_value_map * -open_variable_to_value_map (struct sfm_reader *r, size_t size) +/* Reads SIZE bytes into a text record for R, + and returns the new text record. */ +static struct text_record * +open_text_record (struct sfm_reader *r, size_t size) { - struct variable_to_value_map *map = pool_alloc (r->pool, sizeof *map); + struct text_record *text = pool_alloc (r->pool, sizeof *text); char *buffer = pool_malloc (r->pool, size + 1); read_bytes (r, buffer, size); - map->buffer = ss_buffer (buffer, size); - map->pos = 0; - return map; + text->buffer = ss_buffer (buffer, size); + text->pos = 0; + text->n_warnings = 0; + return text; } -/* Closes MAP and frees its storage. - Not really needed, because the pool will free the map anyway, - but can be used to free it earlier. */ +/* Closes TEXT, frees its storage, and issues a final warning + about suppressed warnings if necesary. */ static void -close_variable_to_value_map (struct sfm_reader *r, - struct variable_to_value_map *map) +close_text_record (struct sfm_reader *r, struct text_record *text) { - pool_free (r->pool, ss_data (map->buffer)); + if (text->n_warnings > MAX_TEXT_WARNINGS) + sys_warn (r, _("Suppressed %d additional related warnings."), + text->n_warnings - MAX_TEXT_WARNINGS); + pool_free (r->pool, ss_data (text->buffer)); } -/* Reads the next variable=value pair from MAP. +/* Reads a variable=value pair from TEXT. Looks up the variable in DICT and stores it into *VAR. Stores a null-terminated value into *VALUE. */ static bool -read_variable_to_value_map (struct sfm_reader *r, struct dictionary *dict, - struct variable_to_value_map *map, - struct variable **var, char **value, - int *warning_cnt) +read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict, + struct text_record *text, + struct variable **var, char **value) { - int max_warnings = 5; - for (;;) { - struct substring short_name_ss, value_ss; + if (!text_read_short_name (r, dict, text, ss_cstr ("="), var)) + return false; + + *value = text_get_token (text, ss_buffer ("\t\0", 2), NULL); + if (*value == NULL) + return false; - if (!ss_tokenize (map->buffer, ss_cstr ("="), &map->pos, &short_name_ss) - || !ss_tokenize (map->buffer, ss_buffer ("\t\0", 2), &map->pos, - &value_ss)) - { - if (*warning_cnt > max_warnings) - sys_warn (r, _("Suppressed %d additional variable map warnings."), - *warning_cnt - max_warnings); - return false; - } + text->pos += ss_span (ss_substr (text->buffer, text->pos, SIZE_MAX), + ss_buffer ("\t\0", 2)); + + if (*var != NULL) + return true; + } +} - map->pos += ss_span (ss_substr (map->buffer, map->pos, SIZE_MAX), - ss_buffer ("\t\0", 2)); +static bool +text_read_variable_name (struct sfm_reader *r, struct dictionary *dict, + struct text_record *text, struct substring delimiters, + struct variable **var) +{ + char *name; - ss_data (short_name_ss)[ss_length (short_name_ss)] = '\0'; - *var = lookup_var_by_short_name (dict, ss_data (short_name_ss)); - if (*var == NULL) - { - if (++*warning_cnt <= max_warnings) - sys_warn (r, _("Variable map refers to unknown variable %s."), - ss_data (short_name_ss)); - continue; - } + name = text_get_token (text, delimiters, NULL); + if (name == NULL) + return false; + + *var = dict_lookup_var (dict, name); + if (*var != NULL) + return true; + + text_warn (r, text, _("Dictionary record refers to unknown variable %s."), + name); + return false; +} + + +static bool +text_read_short_name (struct sfm_reader *r, struct dictionary *dict, + struct text_record *text, struct substring delimiters, + struct variable **var) +{ + char *short_name = text_get_token (text, delimiters, NULL); + if (short_name == NULL) + return false; + + *var = lookup_var_by_short_name (dict, short_name); + if (*var == NULL) + text_warn (r, text, _("Dictionary record refers to unknown variable %s."), + short_name); + return true; +} + +/* Displays a warning for the current file position, limiting the + number to MAX_TEXT_WARNINGS for TEXT. */ +static void +text_warn (struct sfm_reader *r, struct text_record *text, + const char *format, ...) +{ + if (text->n_warnings++ < MAX_TEXT_WARNINGS) + { + va_list args; + + va_start (args, format); + sys_msg (r, MW, format, args); + va_end (args); + } +} + +static char * +text_get_token (struct text_record *text, struct substring delimiters, + char *delimiter) +{ + struct substring token; + char *end; + + if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token)) + return NULL; + + end = &ss_data (token)[ss_length (token)]; + if (delimiter != NULL) + *delimiter = *end; + *end = '\0'; + return ss_data (token); +} + +/* Reads a integer value expressed in decimal, then a space, then a string that + consists of exactly as many bytes as specified by the integer, then a space, + from TEXT. Returns the string, null-terminated, as a subset of TEXT's + buffer (so the caller should not free the string). */ +static const char * +text_parse_counted_string (struct sfm_reader *r, struct text_record *text) +{ + size_t start; + size_t n; + char *s; + + start = text->pos; + n = 0; + for (;;) + { + int c = text->buffer.string[text->pos]; + if (c < '0' || c > '9') + break; + n = (n * 10) + (c - '0'); + text->pos++; + } + if (start == text->pos) + { + sys_warn (r, _("Expecting digit at offset %zu in MRSETS record."), + text->pos); + return NULL; + } + + if (!text_match (text, ' ')) + { + sys_warn (r, _("Expecting space at offset %zu in MRSETS record."), + text->pos); + return NULL; + } - ss_data (value_ss)[ss_length (value_ss)] = '\0'; - *value = ss_data (value_ss); + if (text->pos + n > text->buffer.length) + { + sys_warn (r, _("%zu-byte string starting at offset %zu " + "exceeds record length %zu."), + n, text->pos, text->buffer.length); + return NULL; + } + + s = &text->buffer.string[text->pos]; + if (s[n] != ' ') + { + sys_warn (r, + _("Expecting space at offset %zu following %zu-byte string."), + text->pos + n, n); + return NULL; + } + s[n] = '\0'; + text->pos += n + 1; + return s; +} +static bool +text_match (struct text_record *text, char c) +{ + if (text->buffer.string[text->pos] == c) + { + text->pos++; return true; } + else + return false; +} + +/* Returns the current byte offset inside the TEXT's string. */ +static size_t +text_pos (const struct text_record *text) +{ + return text->pos; } /* Messages. */ @@ -1566,14 +2295,16 @@ sys_msg (struct sfm_reader *r, int class, const char *format, va_list args) struct string text; ds_init_empty (&text); - ds_put_format (&text, "\"%s\" near offset 0x%lx: ", - fh_get_file_name (r->fh), (unsigned long) ftell (r->file)); + ds_put_format (&text, "`%s' near offset 0x%llx: ", + fh_get_file_name (r->fh), (long long int) ftello (r->file)); ds_put_vformat (&text, format, args); m.category = msg_class_to_category (class); m.severity = msg_class_to_severity (class); m.where.file_name = NULL; m.where.line_number = 0; + m.where.first_column = 0; + m.where.last_column = 0; m.text = ds_cstr (&text); msg_emit (&m); @@ -1688,7 +2419,7 @@ skip_bytes (struct sfm_reader *r, size_t bytes) } } -static struct casereader_class sys_file_casereader_class = +static const struct casereader_class sys_file_casereader_class = { sys_file_casereader_read, sys_file_casereader_destroy,