+ return;
+ }
+
+ /* Rename each of the variables, one by one. (In a correctly constructed
+ system file, this cannot create any intermediate duplicate variable names,
+ because all of the new variable names are longer than any of the old
+ variable names and thus there cannot be any overlaps.) */
+ text = open_text_record (r, record, true);
+ while (read_variable_to_value_pair (r, dict, text, &var, &long_name))
+ {
+ /* Validate long name. */
+ if (!dict_id_is_valid (dict, long_name, false)
+ || long_name[0] == '$' || long_name[0] == '#')
+ {
+ sys_warn (r, record->pos,
+ _("Long variable mapping from %s to invalid "
+ "variable name `%s'."),
+ var_get_name (var), long_name);
+ continue;
+ }
+
+ rename_var_and_save_short_names (r, record->pos, dict, var, long_name);
+ }
+ close_text_record (r, text);
+}
+
+/* Reads record type 7, subtype 14, which gives the real length
+ of each very long string. Rearranges DICT accordingly. */
+static bool
+parse_long_string_map (struct sfm_reader *r,
+ const struct sfm_extension_record *record,
+ struct dictionary *dict)
+{
+ struct text_record *text;
+ struct variable *var;
+ char *length_s;
+
+ text = open_text_record (r, record, true);
+ while (read_variable_to_value_pair (r, dict, text, &var, &length_s))
+ {
+ size_t idx = var_get_dict_index (var);
+ long int length;
+ int segment_cnt;
+ int i;
+
+ /* Get length. */
+ length = strtol (length_s, NULL, 10);
+ if (length < 1 || length > MAX_STRING)
+ {
+ sys_warn (r, record->pos,
+ _("%s listed as string of invalid length %s "
+ "in very long string record."),
+ var_get_name (var), length_s);
+ continue;
+ }
+
+ /* Check segments. */
+ segment_cnt = sfm_width_to_segments (length);
+ if (segment_cnt == 1)
+ {
+ sys_warn (r, record->pos,
+ _("%s listed in very long string record with width %s, "
+ "which requires only one segment."),
+ var_get_name (var), length_s);
+ continue;
+ }
+ if (idx + segment_cnt > dict_get_var_cnt (dict))
+ {
+ sys_error (r, record->pos,
+ _("Very long string %s overflows dictionary."),
+ var_get_name (var));
+ return false;
+ }
+
+ /* Get the short names from the segments and check their
+ lengths. */
+ for (i = 0; i < segment_cnt; i++)
+ {
+ struct variable *seg = dict_get_var (dict, idx + i);
+ int alloc_width = sfm_segment_alloc_width (length, i);
+ int width = var_get_width (seg);
+
+ if (i > 0)
+ var_set_short_name (var, i, var_get_short_name (seg, 0));
+ if (ROUND_UP (width, 8) != ROUND_UP (alloc_width, 8))
+ {
+ sys_error (r, record->pos,
+ _("Very long string with width %ld has segment %d "
+ "of width %d (expected %d)."),
+ length, i, width, alloc_width);
+ return false;
+ }
+ }
+ dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1);
+ var_set_width (var, length);
+ }
+ close_text_record (r, text);
+ dict_compact_values (dict);
+
+ return true;
+}
+
+static bool
+parse_value_labels (struct sfm_reader *r, struct dictionary *dict,
+ const struct sfm_var_record *var_recs, size_t n_var_recs,
+ const struct sfm_value_label_record *record)
+{
+ struct variable **vars;
+ char **utf8_labels;
+ size_t i;
+
+ utf8_labels = pool_nmalloc (r->pool, record->n_labels, sizeof *utf8_labels);
+ for (i = 0; i < record->n_labels; i++)
+ utf8_labels[i] = recode_string_pool ("UTF-8", dict_get_encoding (dict),
+ record->labels[i].label, -1,
+ r->pool);
+
+ vars = pool_nmalloc (r->pool, record->n_vars, sizeof *vars);
+ for (i = 0; i < record->n_vars; i++)
+ {
+ vars[i] = lookup_var_by_index (r, record->pos,
+ var_recs, n_var_recs, record->vars[i]);
+ if (vars[i] == NULL)
+ return false;
+ }
+
+ for (i = 1; i < record->n_vars; i++)
+ if (var_get_type (vars[i]) != var_get_type (vars[0]))
+ {
+ sys_error (r, record->pos,
+ _("Variables associated with value label are not all of "
+ "identical type. Variable %s is %s, but variable "
+ "%s is %s."),
+ var_get_name (vars[0]),
+ var_is_numeric (vars[0]) ? _("numeric") : _("string"),
+ var_get_name (vars[i]),
+ var_is_numeric (vars[i]) ? _("numeric") : _("string"));
+ return false;
+ }
+
+ for (i = 0; i < record->n_vars; i++)
+ {
+ struct variable *var = vars[i];
+ int width;
+ size_t j;
+
+ width = var_get_width (var);
+ if (width > 8)
+ {
+ sys_error (r, record->pos,
+ _("Value labels may not be added to long string "
+ "variables (e.g. %s) using records types 3 and 4."),
+ var_get_name (var));
+ return false;
+ }
+
+ for (j = 0; j < record->n_labels; j++)
+ {
+ struct sfm_value_label *label = &record->labels[j];
+ union value value;
+
+ value_init (&value, width);
+ if (width == 0)
+ value.f = parse_float (r, label->value, 0);
+ else
+ memcpy (value.s, label->value, width);
+
+ if (!var_add_value_label (var, &value, utf8_labels[j]))
+ {
+ if (r->written_by_readstat)
+ {
+ /* Ignore the problem. ReadStat is buggy and emits value
+ labels whose values are longer than string variables'
+ widths, that are identical in the actual width of the
+ variable, e.g. both values "ABC123" and "ABC456" for a
+ string variable with width 3. */
+ }
+ else if (var_is_numeric (var))
+ sys_warn (r, record->pos,
+ _("Duplicate value label for %g on %s."),
+ value.f, var_get_name (var));
+ else
+ sys_warn (r, record->pos,
+ _("Duplicate value label for `%.*s' on %s."),
+ width, value.s, var_get_name (var));
+ }
+
+ value_destroy (&value, width);
+ }
+ }
+
+ pool_free (r->pool, vars);
+ for (i = 0; i < record->n_labels; i++)
+ pool_free (r->pool, utf8_labels[i]);
+ pool_free (r->pool, utf8_labels);
+
+ return true;
+}
+
+static struct variable *
+lookup_var_by_index (struct sfm_reader *r, off_t offset,
+ const struct sfm_var_record *var_recs, size_t n_var_recs,
+ int idx)
+{
+ const struct sfm_var_record *rec;
+
+ if (idx < 1 || idx > n_var_recs)
+ {
+ sys_error (r, offset,
+ _("Variable index %d not in valid range 1...%zu."),
+ idx, n_var_recs);
+ return NULL;
+ }
+
+ rec = &var_recs[idx - 1];
+ if (rec->var == NULL)
+ {
+ sys_error (r, offset,
+ _("Variable index %d refers to long string continuation."),
+ idx);
+ return NULL;
+ }
+
+ return rec->var;
+}
+
+/* Parses a set of custom attributes from TEXT into ATTRS.
+ ATTRS may be a null pointer, in which case the attributes are
+ read but discarded. */
+static void
+parse_attributes (struct sfm_reader *r, struct text_record *text,
+ struct attrset *attrs)
+{
+ do
+ {
+ struct attribute *attr;
+ char *key;
+ int index;
+
+ /* Parse the key. */
+ key = text_get_token (text, ss_cstr ("("), NULL);
+ if (key == NULL)
+ return;
+
+ attr = attribute_create (key);
+ for (index = 1; ; index++)
+ {
+ /* Parse the value. */
+ char *value;
+ size_t length;
+
+ value = text_get_token (text, ss_cstr ("\n"), NULL);
+ if (value == NULL)
+ {
+ text_warn (r, text, _("Error parsing attribute value %s[%d]."),
+ key, index);
+ break;
+ }
+
+ length = strlen (value);
+ if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'')
+ {
+ value[length - 1] = '\0';
+ attribute_add_value (attr, value + 1);
+ }
+ else
+ {
+ text_warn (r, text,
+ _("Attribute value %s[%d] is not quoted: %s."),
+ key, index, value);
+ attribute_add_value (attr, value);
+ }
+
+ /* Was this the last value for this attribute? */
+ if (text_match (text, ')'))
+ break;
+ }
+ if (attrs != NULL && attribute_get_n_values (attr) > 0)
+ {
+ if (!attrset_try_add (attrs, attr))
+ {
+ text_warn (r, text, _("Duplicate attribute %s."),
+ attribute_get_name (attr));
+ attribute_destroy (attr);
+ }
+ }
+ else
+ attribute_destroy (attr);
+ }
+ while (!text_match (text, '/'));
+}
+
+/* Reads record type 7, subtype 17, which lists custom
+ attributes on the data file. */
+static void
+parse_data_file_attributes (struct sfm_reader *r,
+ const struct sfm_extension_record *record,
+ struct dictionary *dict)
+{
+ struct text_record *text = open_text_record (r, record, true);
+ parse_attributes (r, text, dict_get_attributes (dict));
+ close_text_record (r, text);
+}
+
+/* Parses record type 7, subtype 18, which lists custom
+ attributes on individual variables. */
+static void
+parse_variable_attributes (struct sfm_reader *r,
+ const struct sfm_extension_record *record,
+ struct dictionary *dict)
+{
+ struct text_record *text;
+ struct variable *var;
+
+ text = open_text_record (r, record, true);
+ while (text_read_variable_name (r, dict, text, ss_cstr (":"), &var))
+ parse_attributes (r, text, var != NULL ? var_get_attributes (var) : NULL);
+ close_text_record (r, text);
+}
+
+static void
+assign_variable_roles (struct sfm_reader *r, struct dictionary *dict)
+{
+ size_t n_warnings = 0;
+ size_t i;
+
+ for (i = 0; i < dict_get_var_cnt (dict); i++)
+ {
+ struct variable *var = dict_get_var (dict, i);
+ struct attrset *attrs = var_get_attributes (var);
+ const struct attribute *attr = attrset_lookup (attrs, "$@Role");
+ if (attr != NULL && attribute_get_n_values (attr) > 0)
+ {
+ int value = atoi (attribute_get_value (attr, 0));
+ enum var_role role;
+
+ switch (value)
+ {
+ case 0:
+ role = ROLE_INPUT;
+ break;
+
+ case 1:
+ role = ROLE_TARGET;
+ break;
+
+ case 2:
+ role = ROLE_BOTH;
+ break;
+
+ case 3:
+ role = ROLE_NONE;
+ break;
+
+ case 4:
+ role = ROLE_PARTITION;
+ break;
+
+ case 5:
+ role = ROLE_SPLIT;
+ break;
+
+ default:
+ role = ROLE_INPUT;
+ if (n_warnings++ == 0)
+ sys_warn (r, -1, _("Invalid role for variable %s."),
+ var_get_name (var));
+ }
+
+ var_set_role (var, role);
+ }
+ }
+
+ if (n_warnings > 1)
+ sys_warn (r, -1, _("%zu other variables had invalid roles."),
+ n_warnings - 1);
+}
+
+static bool
+check_overflow (struct sfm_reader *r,
+ const struct sfm_extension_record *record,
+ size_t ofs, size_t length)
+{
+ size_t end = record->size * record->count;
+ if (length >= end || ofs + length > end)
+ {
+ sys_warn (r, record->pos + end,
+ _("Extension record subtype %d ends unexpectedly."),
+ record->subtype);
+ return false;
+ }
+ return true;
+}
+
+static void
+parse_long_string_value_labels (struct sfm_reader *r,
+ const struct sfm_extension_record *record,
+ struct dictionary *dict)
+{
+ const char *dict_encoding = dict_get_encoding (dict);
+ size_t end = record->size * record->count;
+ size_t ofs = 0;
+
+ while (ofs < end)
+ {
+ char *var_name;
+ size_t n_labels, i;
+ struct variable *var;
+ union value value;
+ int var_name_len;
+ int width;
+
+ /* Parse variable name length. */
+ if (!check_overflow (r, record, ofs, 4))
+ return;
+ var_name_len = parse_int (r, record->data, ofs);
+ ofs += 4;
+
+ /* Parse variable name, width, and number of labels. */
+ if (!check_overflow (r, record, ofs, var_name_len)
+ || !check_overflow (r, record, ofs, var_name_len + 8))
+ return;
+ var_name = recode_string_pool ("UTF-8", dict_encoding,
+ (const char *) record->data + ofs,
+ var_name_len, r->pool);
+ width = parse_int (r, record->data, ofs + var_name_len);
+ n_labels = parse_int (r, record->data, ofs + var_name_len + 4);
+ ofs += var_name_len + 8;
+
+ /* Look up 'var' and validate. */
+ var = dict_lookup_var (dict, var_name);
+ if (var == NULL)
+ sys_warn (r, record->pos + ofs,
+ _("Ignoring long string value label record for "
+ "unknown variable %s."), var_name);
+ else if (var_is_numeric (var))
+ {
+ sys_warn (r, record->pos + ofs,
+ _("Ignoring long string value label record for "
+ "numeric variable %s."), var_name);
+ var = NULL;
+ }
+ else if (width != var_get_width (var))
+ {
+ sys_warn (r, record->pos + ofs,
+ _("Ignoring long string value label record for variable "
+ "%s because the record's width (%d) does not match the "
+ "variable's width (%d)."),
+ var_name, width, var_get_width (var));
+ var = NULL;
+ }
+
+ /* Parse values. */
+ value_init_pool (r->pool, &value, width);
+ for (i = 0; i < n_labels; i++)
+ {
+ size_t value_length, label_length;
+ bool skip = var == NULL;
+
+ /* Parse value length. */
+ if (!check_overflow (r, record, ofs, 4))
+ return;
+ value_length = parse_int (r, record->data, ofs);
+ ofs += 4;
+
+ /* Parse value. */
+ if (!check_overflow (r, record, ofs, value_length))
+ return;
+ if (!skip)
+ {
+ if (value_length == width)
+ memcpy (value.s, (const uint8_t *) record->data + ofs, width);
+ else
+ {
+ sys_warn (r, record->pos + ofs,
+ _("Ignoring long string value label %zu for "
+ "variable %s, with width %d, that has bad value "
+ "width %zu."),
+ i, var_get_name (var), width, value_length);
+ skip = true;
+ }
+ }
+ ofs += value_length;
+
+ /* Parse label length. */
+ if (!check_overflow (r, record, ofs, 4))
+ return;
+ label_length = parse_int (r, record->data, ofs);
+ ofs += 4;
+
+ /* Parse label. */
+ if (!check_overflow (r, record, ofs, label_length))
+ return;
+ if (!skip)
+ {
+ char *label;
+
+ label = recode_string_pool ("UTF-8", dict_encoding,
+ (const char *) record->data + ofs,
+ label_length, r->pool);
+ if (!var_add_value_label (var, &value, label))
+ sys_warn (r, record->pos + ofs,
+ _("Duplicate value label for `%.*s' on %s."),
+ width, value.s, var_get_name (var));
+ pool_free (r->pool, label);
+ }
+ ofs += label_length;
+ }
+ }
+}
+
+static void
+parse_long_string_missing_values (struct sfm_reader *r,
+ const struct sfm_extension_record *record,
+ struct dictionary *dict)
+{
+ const char *dict_encoding = dict_get_encoding (dict);
+ size_t end = record->size * record->count;
+ size_t ofs = 0;
+
+ while (ofs < end)
+ {
+ struct missing_values mv;
+ char *var_name;
+ struct variable *var;
+ int n_missing_values;
+ int var_name_len;
+ size_t i;
+
+ /* Parse variable name length. */
+ if (!check_overflow (r, record, ofs, 4))
+ return;
+ var_name_len = parse_int (r, record->data, ofs);
+ ofs += 4;
+
+ /* Parse variable name. */
+ if (!check_overflow (r, record, ofs, var_name_len)
+ || !check_overflow (r, record, ofs, var_name_len + 1))
+ return;
+ var_name = recode_string_pool ("UTF-8", dict_encoding,
+ (const char *) record->data + ofs,
+ var_name_len, r->pool);
+ ofs += var_name_len;
+
+ /* Parse number of missing values. */
+ n_missing_values = ((const uint8_t *) record->data)[ofs];
+ if (n_missing_values < 1 || n_missing_values > 3)
+ sys_warn (r, record->pos + ofs,
+ _("Long string missing values record says variable %s "
+ "has %d missing values, but only 1 to 3 missing values "
+ "are allowed."),
+ var_name, n_missing_values);
+ ofs++;
+
+ /* Look up 'var' and validate. */
+ var = dict_lookup_var (dict, var_name);
+ if (var == NULL)
+ sys_warn (r, record->pos + ofs,
+ _("Ignoring long string missing value record for "
+ "unknown variable %s."), var_name);
+ else if (var_is_numeric (var))
+ {
+ sys_warn (r, record->pos + ofs,
+ _("Ignoring long string missing value record for "
+ "numeric variable %s."), var_name);
+ var = NULL;
+ }
+
+ /* Parse values. */
+ mv_init_pool (r->pool, &mv, var ? var_get_width (var) : 8);
+ for (i = 0; i < n_missing_values; i++)
+ {
+ size_t value_length;
+
+ /* Parse value length. */
+ if (!check_overflow (r, record, ofs, 4))
+ return;
+ value_length = parse_int (r, record->data, ofs);
+ ofs += 4;
+
+ /* Parse value. */
+ if (!check_overflow (r, record, ofs, value_length))
+ return;
+ if (var != NULL
+ && i < 3
+ && !mv_add_str (&mv, (const uint8_t *) record->data + ofs,
+ value_length))
+ sys_warn (r, record->pos + ofs,
+ _("Ignoring long string missing value %zu for variable "
+ "%s, with width %d, that has bad value width %zu."),
+ i, var_get_name (var), var_get_width (var),
+ value_length);
+ ofs += value_length;
+ }
+ if (var != NULL)
+ var_set_missing_values (var, &mv);
+ }
+}
+\f
+/* Case reader. */
+
+static void partial_record (struct sfm_reader *);
+
+static void read_error (struct casereader *, const struct sfm_reader *);
+
+static bool read_case_number (struct sfm_reader *, double *);
+static int read_case_string (struct sfm_reader *, uint8_t *, size_t);
+static int read_opcode (struct sfm_reader *);
+static bool read_compressed_number (struct sfm_reader *, double *);
+static int read_compressed_string (struct sfm_reader *, uint8_t *);
+static int read_whole_strings (struct sfm_reader *, uint8_t *, size_t);
+static bool skip_whole_strings (struct sfm_reader *, size_t);
+
+/* Reads and returns one case from READER's file. Returns a null
+ pointer if not successful. */
+static struct ccase *
+sys_file_casereader_read (struct casereader *reader, void *r_)
+{
+ struct sfm_reader *r = r_;
+ struct ccase *c;
+ int retval;
+ int i;
+
+ if (r->error || !r->sfm_var_cnt)
+ return NULL;
+
+ c = case_create (r->proto);
+
+ for (i = 0; i < r->sfm_var_cnt; i++)
+ {
+ struct sfm_var *sv = &r->sfm_vars[i];
+ union value *v = case_data_rw_idx (c, sv->case_index);
+
+ if (sv->var_width == 0)
+ retval = read_case_number (r, &v->f);
+ else
+ {
+ retval = read_case_string (r, v->s + sv->offset, sv->segment_width);
+ if (retval == 1)
+ {
+ retval = skip_whole_strings (r, ROUND_DOWN (sv->padding, 8));
+ if (retval == 0)
+ sys_error (r, r->pos, _("File ends in partial string value."));
+ }
+ }
+
+ if (retval != 1)
+ goto eof;
+ }
+ return c;
+
+eof:
+ if (i != 0)
+ partial_record (r);
+ if (r->case_cnt != -1)
+ read_error (reader, r);
+ case_unref (c);
+ return NULL;
+}
+
+/* Issues an error that R ends in a partial record. */
+static void
+partial_record (struct sfm_reader *r)
+{
+ sys_error (r, r->pos, _("File ends in partial case."));
+}
+
+/* Issues an error that an unspecified error occurred SFM, and
+ marks R tainted. */
+static void
+read_error (struct casereader *r, const struct sfm_reader *sfm)
+{
+ msg (ME, _("Error reading case from file %s."), fh_get_name (sfm->fh));
+ casereader_force_error (r);
+}
+
+/* Reads a number from R and stores its value in *D.
+ If R is compressed, reads a compressed number;
+ otherwise, reads a number in the regular way.
+ Returns true if successful, false if end of file is
+ reached immediately. */
+static bool
+read_case_number (struct sfm_reader *r, double *d)
+{
+ if (r->compression == ANY_COMP_NONE)
+ {
+ uint8_t number[8];
+ if (!try_read_bytes (r, number, sizeof number))
+ return false;
+ float_convert (r->float_format, number, FLOAT_NATIVE_DOUBLE, d);
+ return true;
+ }
+ else
+ return read_compressed_number (r, d);
+}
+
+/* Reads LENGTH string bytes from R into S. Always reads a multiple of 8
+ bytes; if LENGTH is not a multiple of 8, then extra bytes are read and
+ discarded without being written to S. Reads compressed strings if S is
+ compressed. Returns 1 if successful, 0 if end of file is reached
+ immediately, or -1 for some kind of error. */
+static int
+read_case_string (struct sfm_reader *r, uint8_t *s, size_t length)
+{
+ size_t whole = ROUND_DOWN (length, 8);
+ size_t partial = length % 8;
+
+ if (whole)
+ {
+ int retval = read_whole_strings (r, s, whole);
+ if (retval != 1)
+ return retval;
+ }
+
+ if (partial)
+ {
+ uint8_t bounce[8];
+ int retval = read_whole_strings (r, bounce, sizeof bounce);
+ if (retval == -1)
+ return -1;
+ else if (!retval)
+ {
+ if (whole)
+ {
+ partial_record (r);
+ return -1;
+ }
+ return 0;
+ }
+ memcpy (s + whole, bounce, partial);
+ }
+
+ return 1;
+}
+
+/* Reads and returns the next compression opcode from R. */
+static int
+read_opcode (struct sfm_reader *r)
+{
+ assert (r->compression != ANY_COMP_NONE);
+ for (;;)
+ {
+ int opcode;
+ if (r->opcode_idx >= sizeof r->opcodes)
+ {
+
+ int retval = try_read_compressed_bytes (r, r->opcodes,
+ sizeof r->opcodes);
+ if (retval != 1)
+ return -1;
+ r->opcode_idx = 0;
+ }
+ opcode = r->opcodes[r->opcode_idx++];
+
+ if (opcode != 0)
+ return opcode;
+ }
+}
+
+/* Reads a compressed number from R and stores its value in D.
+ Returns true if successful, false if end of file is
+ reached immediately. */
+static bool
+read_compressed_number (struct sfm_reader *r, double *d)
+{
+ int opcode = read_opcode (r);
+ switch (opcode)
+ {
+ case -1:
+ case 252:
+ return false;
+
+ case 253:
+ return read_compressed_float (r, d);
+
+ case 254:
+ float_convert (r->float_format, " ", FLOAT_NATIVE_DOUBLE, d);
+ if (!r->corruption_warning)
+ {
+ r->corruption_warning = true;
+ sys_warn (r, r->pos,
+ _("Possible compressed data corruption: "
+ "compressed spaces appear in numeric field."));
+ }
+ break;
+
+ case 255:
+ *d = SYSMIS;
+ break;
+
+ default:
+ *d = opcode - r->bias;
+ break;
+ }
+
+ return true;
+}
+
+/* Reads a compressed 8-byte string segment from R and stores it in DST. */
+static int
+read_compressed_string (struct sfm_reader *r, uint8_t *dst)
+{
+ int opcode;
+ int retval;
+
+ opcode = read_opcode (r);
+ switch (opcode)
+ {
+ case -1:
+ case 252:
+ return 0;
+
+ case 253:
+ retval = read_compressed_bytes (r, dst, 8);
+ return retval == 1 ? 1 : -1;
+
+ case 254:
+ memset (dst, ' ', 8);
+ return 1;
+
+ default:
+ {
+ double value = opcode - r->bias;
+ float_convert (FLOAT_NATIVE_DOUBLE, &value, r->float_format, dst);
+ if (value == 0.0)
+ {
+ /* This has actually been seen "in the wild". The submitter of the
+ file that showed that the contents decoded as spaces, but they
+ were at the end of the field so it's possible that the null
+ bytes just acted as null terminators. */
+ }
+ else if (!r->corruption_warning)
+ {
+ r->corruption_warning = true;
+ sys_warn (r, r->pos,
+ _("Possible compressed data corruption: "
+ "string contains compressed integer (opcode %d)."),
+ opcode);
+ }
+ }
+ return 1;
+ }
+}
+
+/* Reads LENGTH string bytes from R into S. LENGTH must be a multiple of 8.
+ Reads compressed strings if S is compressed. Returns 1 if successful, 0 if
+ end of file is reached immediately, or -1 for some kind of error. */
+static int
+read_whole_strings (struct sfm_reader *r, uint8_t *s, size_t length)
+{
+ assert (length % 8 == 0);
+ if (r->compression == ANY_COMP_NONE)
+ return try_read_bytes (r, s, length);
+ else
+ {
+ size_t ofs;
+
+ for (ofs = 0; ofs < length; ofs += 8)
+ {
+ int retval = read_compressed_string (r, s + ofs);
+ if (retval != 1)
+ {
+ if (ofs != 0)
+ {
+ partial_record (r);
+ return -1;
+ }
+ return retval;
+ }
+ }
+ return 1;
+ }
+}
+
+/* Skips LENGTH string bytes from R.
+ LENGTH must be a multiple of 8.
+ (LENGTH is also limited to 1024, but that's only because the
+ current caller never needs more than that many bytes.)
+ Returns true if successful, false if end of file is
+ reached immediately. */
+static bool
+skip_whole_strings (struct sfm_reader *r, size_t length)
+{
+ uint8_t buffer[1024];
+ assert (length < sizeof buffer);
+ return read_whole_strings (r, buffer, length);
+}
+\f
+/* Helpers for reading records that contain structured text
+ strings. */
+
+/* Maximum number of warnings to issue for a single text
+ record. */
+#define MAX_TEXT_WARNINGS 5
+
+/* State. */
+struct text_record
+ {
+ struct substring buffer; /* Record contents. */
+ off_t start; /* Starting offset in file. */
+ size_t pos; /* Current position in buffer. */
+ int n_warnings; /* Number of warnings issued or suppressed. */
+ bool recoded; /* Recoded into UTF-8? */
+ };
+
+static struct text_record *
+open_text_record (struct sfm_reader *r,
+ const struct sfm_extension_record *record,
+ bool recode_to_utf8)
+{
+ struct text_record *text;
+ struct substring raw;
+
+ text = pool_alloc (r->pool, sizeof *text);
+ raw = ss_buffer (record->data, record->size * record->count);
+ text->start = record->pos;
+ text->buffer = (recode_to_utf8
+ ? recode_substring_pool ("UTF-8", r->encoding, raw, r->pool)
+ : raw);
+ text->pos = 0;
+ text->n_warnings = 0;
+ text->recoded = recode_to_utf8;
+
+ return text;
+}
+
+/* Closes TEXT, frees its storage, and issues a final warning
+ about suppressed warnings if necessary. */
+static void
+close_text_record (struct sfm_reader *r, struct text_record *text)
+{
+ if (text->n_warnings > MAX_TEXT_WARNINGS)
+ sys_warn (r, -1, _("Suppressed %d additional related warnings."),
+ text->n_warnings - MAX_TEXT_WARNINGS);
+ if (text->recoded)
+ pool_free (r->pool, ss_data (text->buffer));
+}
+
+/* Reads a variable=value pair from TEXT.
+ Looks up the variable in DICT and stores it into *VAR.
+ Stores a null-terminated value into *VALUE. */
+static bool
+read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict,
+ struct text_record *text,
+ struct variable **var, char **value)
+{
+ for (;;)
+ {
+ if (!text_read_short_name (r, dict, text, ss_cstr ("="), var))
+ return false;
+
+ *value = text_get_token (text, ss_buffer ("\t\0", 2), NULL);
+ if (*value == NULL)
+ return false;
+
+ text->pos += ss_span (ss_substr (text->buffer, text->pos, SIZE_MAX),
+ ss_buffer ("\t\0", 2));