pc+-file-reader, sys-file-reader: Fix misuses of zero as null pointer.

[pspp] / src / data / sys-file-reader.c
diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c

index 9a0c054c1889c23b2f63a1a5010effa61b77f2fc..b2db755732311d6c6ba8dda47044225edffa80c2 100644 (file)
--- a/src/data/sys-file-reader.c
+++ b/src/data/sys-file-reader.c
@@ -1,5 +1,5 @@
  /* PSPP - a program for statistical analysis.
-   Copyright (C) 1997-9, 2000, 2006, 2007 Free Software Foundation, Inc.
+   Copyright (C) 1997-2000, 2006-2007, 2009-2016 Free Software Foundation, Inc.
  
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -16,123 +16,304 @@
  
  #include <config.h>
  
-#include <data/sys-file-reader.h>
-#include <data/sys-file-private.h>
+#include "data/sys-file-private.h"
  
  #include <errno.h>
  #include <float.h>
  #include <inttypes.h>
-#include <setjmp.h>
  #include <stdlib.h>
-
-#include <libpspp/alloc.h>
-#include <libpspp/assertion.h>
-#include <libpspp/message.h>
-#include <libpspp/compiler.h>
-#include <libpspp/magic.h>
-#include <libpspp/misc.h>
-#include <libpspp/pool.h>
-#include <libpspp/str.h>
-#include <libpspp/hash.h>
-#include <libpspp/array.h>
-
-#include <data/case.h>
-#include <data/casereader-provider.h>
-#include <data/casereader.h>
-#include <data/dictionary.h>
-#include <data/file-handle-def.h>
-#include <data/file-name.h>
-#include <data/format.h>
-#include <data/missing-values.h>
-#include <data/value-labels.h>
-#include <data/variable.h>
-#include <data/value.h>
-
-#include "c-ctype.h"
-#include "inttostr.h"
-#include "minmax.h"
-#include "unlocked-io.h"
-#include "xsize.h"
+#include <sys/stat.h>
+#include <zlib.h>
+
+#include "data/any-reader.h"
+#include "data/attributes.h"
+#include "data/case.h"
+#include "data/casereader-provider.h"
+#include "data/casereader.h"
+#include "data/dictionary.h"
+#include "data/file-handle-def.h"
+#include "data/file-name.h"
+#include "data/format.h"
+#include "data/identifier.h"
+#include "data/missing-values.h"
+#include "data/mrset.h"
+#include "data/short-names.h"
+#include "data/value-labels.h"
+#include "data/value.h"
+#include "data/variable.h"
+#include "libpspp/array.h"
+#include "libpspp/assertion.h"
+#include "libpspp/compiler.h"
+#include "libpspp/i18n.h"
+#include "libpspp/ll.h"
+#include "libpspp/message.h"
+#include "libpspp/misc.h"
+#include "libpspp/pool.h"
+#include "libpspp/str.h"
+#include "libpspp/stringi-set.h"
+
+#include "gl/c-strtod.h"
+#include "gl/c-ctype.h"
+#include "gl/inttostr.h"
+#include "gl/localcharset.h"
+#include "gl/minmax.h"
+#include "gl/unlocked-io.h"
+#include "gl/xalloc.h"
+#include "gl/xalloc-oversized.h"
+#include "gl/xsize.h"
  
  #include "gettext.h"
  #define _(msgid) gettext (msgid)
  #define N_(msgid) (msgid)
  
+enum
+  {
+    /* subtypes 0-2 unknown */
+    EXT_INTEGER       = 3,      /* Machine integer info. */
+    EXT_FLOAT         = 4,      /* Machine floating-point info. */
+    EXT_VAR_SETS      = 5,      /* Variable sets. */
+    EXT_DATE          = 6,      /* DATE. */
+    EXT_MRSETS        = 7,      /* Multiple response sets. */
+    EXT_DATA_ENTRY    = 8,      /* SPSS Data Entry. */
+    /* subtype 9 unknown */
+    EXT_PRODUCT_INFO  = 10,     /* Extra product info text. */
+    EXT_DISPLAY       = 11,     /* Variable display parameters. */
+    /* subtype 12 unknown */
+    EXT_LONG_NAMES    = 13,     /* Long variable names. */
+    EXT_LONG_STRINGS  = 14,     /* Long strings. */
+    /* subtype 15 unknown */
+    EXT_NCASES        = 16,     /* Extended number of cases. */
+    EXT_FILE_ATTRS    = 17,     /* Data file attributes. */
+    EXT_VAR_ATTRS     = 18,     /* Variable attributes. */
+    EXT_MRSETS2       = 19,     /* Multiple response sets (extended). */
+    EXT_ENCODING      = 20,     /* Character encoding. */
+    EXT_LONG_LABELS   = 21,     /* Value labels for long strings. */
+    EXT_LONG_MISSING  = 22,     /* Missing values for long strings. */
+    EXT_DATAVIEW      = 24      /* "Format properties in dataview table". */
+  };
+
+/* Fields from the top-level header record. */
+struct sfm_header_record
+  {
+    char magic[5];              /* First 4 bytes of file, then null. */
+    int weight_idx;             /* 0 if unweighted, otherwise a var index. */
+    int nominal_case_size;      /* Number of var positions. */
+
+    /* These correspond to the members of struct any_file_info or a dictionary
+       but in the system file's encoding rather than ASCII. */
+    char creation_date[10];    /* "dd mmm yy". */
+    char creation_time[9];     /* "hh:mm:ss". */
+    char eye_catcher[61];       /* Eye-catcher string, then product name. */
+    char file_label[65];        /* File label. */
+  };
+
+struct sfm_var_record
+  {
+    off_t pos;
+    int width;
+    char name[9];
+    int print_format;
+    int write_format;
+    int missing_value_code;
+    uint8_t missing[24];
+    char *label;
+    struct variable *var;
+  };
+
+struct sfm_value_label
+  {
+    uint8_t value[8];
+    char *label;
+  };
+
+struct sfm_value_label_record
+  {
+    off_t pos;
+    struct sfm_value_label *labels;
+    unsigned int n_labels;
+
+    int *vars;
+    unsigned int n_vars;
+  };
+
+struct sfm_document_record
+  {
+    off_t pos;
+    char *documents;
+    size_t n_lines;
+  };
+
+struct sfm_mrset
+  {
+    const char *name;           /* Name. */
+    const char *label;          /* Human-readable label for group. */
+    enum mrset_type type;       /* Group type. */
+    const char **vars;          /* Constituent variables' names. */
+    size_t n_vars;              /* Number of constituent variables. */
+
+    /* MRSET_MD only. */
+    enum mrset_md_cat_source cat_source; /* Source of category labels. */
+    bool label_from_var_label;  /* 'label' taken from variable label? */
+    const char *counted;        /* Counted value, as string. */
+  };
+
+struct sfm_extension_record
+  {
+    struct ll ll;               /* In struct sfm_reader 'var_attrs' list. */
+    int subtype;                /* Record subtype. */
+    off_t pos;                  /* Starting offset in file. */
+    unsigned int size;          /* Size of data elements. */
+    unsigned int count;         /* Number of data elements. */
+    void *data;                 /* Contents. */
+  };
+
  /* System file reader. */
  struct sfm_reader
    {
+    struct any_reader any_reader;
+
      /* Resource tracking. */
      struct pool *pool;          /* All system file state. */
-    jmp_buf bail_out;           /* longjmp() target for error handling. */
+
+    /* File data. */
+    struct any_read_info info;
+    struct sfm_header_record header;
+    struct sfm_var_record *vars;
+    size_t n_vars;
+    struct sfm_value_label_record *labels;
+    size_t n_labels;
+    struct sfm_document_record *document;
+    struct sfm_mrset *mrsets;
+    size_t n_mrsets;
+    struct sfm_extension_record *extensions[32];
+    struct ll_list var_attrs;   /* Contains "struct sfm_extension_record"s. */
  
      /* File state. */
      struct file_handle *fh;     /* File handle. */
+    struct fh_lock *lock;       /* Mutual exclusion for file handle. */
      FILE *file;                 /* File stream. */
+    off_t pos;                  /* Position in file. */
      bool error;                 /* I/O or corruption error? */
-    size_t value_cnt;           /* Number of "union value"s in struct case. */
+    struct caseproto *proto;    /* Format of output cases. */
  
      /* File format. */
      enum integer_format integer_format; /* On-disk integer format. */
      enum float_format float_format; /* On-disk floating point format. */
-    int flt64_cnt;             /* Number of 8-byte units per case. */
-    struct sfm_var *vars;       /* Variables. */
-    size_t var_cnt;             /* Number of variables. */
-    int32_t case_cnt;           /* Number of cases */
-    bool has_long_var_names;    /* File has a long variable name map */
-    bool has_vls;               /* File has one or more very long strings? */
+    struct sfm_var *sfm_vars;   /* Variables. */
+    size_t sfm_var_cnt;         /* Number of variables. */
+    int case_cnt;               /* Number of cases */
+    const char *encoding;       /* String encoding. */
+    bool written_by_readstat; /* From https://github.com/WizardMac/ReadStat? */
  
      /* Decompression. */
-    bool compressed;           /* File is compressed? */
+    enum any_compression compression;
      double bias;               /* Compression bias, usually 100.0. */
      uint8_t opcodes[8];         /* Current block of opcodes. */
      size_t opcode_idx;          /* Next opcode to interpret, 8 if none left. */
+    bool corruption_warning;    /* Warned about possible corruption? */
+
+    /* ZLIB decompression. */
+    long long int ztrailer_ofs; /* Offset of ZLIB trailer at end of file. */
+#define ZIN_BUF_SIZE  4096
+    uint8_t *zin_buf;           /* Inflation input buffer. */
+#define ZOUT_BUF_SIZE 16384
+    uint8_t *zout_buf;          /* Inflation output buffer. */
+    unsigned int zout_end;      /* Number of bytes of data in zout_buf. */
+    unsigned int zout_pos;      /* First unconsumed byte in zout_buf. */
+    z_stream zstream;           /* ZLIB inflater. */
    };
  
-/* A variable in a system file. */
-struct sfm_var
-  {
-    int width;                  /* 0=numeric, otherwise string width. */
-    int case_index;             /* Index into case. */
-  };
-
-static struct casereader_class sys_file_casereader_class;
-
-static bool close_reader (struct sfm_reader *);
+static const struct casereader_class sys_file_casereader_class;
  
-static struct variable **make_var_by_value_idx (struct sfm_reader *,
-                                                struct dictionary *);
-static struct variable *lookup_var_by_value_idx (struct sfm_reader *,
-                                                 struct variable **,
-                                                 int value_idx);
-
-static void sys_warn (struct sfm_reader *, const char *, ...)
-     PRINTF_FORMAT (2, 3);
-
-static void sys_error (struct sfm_reader *, const char *, ...)
-     PRINTF_FORMAT (2, 3)
-     NO_RETURN;
-
-static void read_bytes (struct sfm_reader *, void *, size_t);
-static bool try_read_bytes (struct sfm_reader *, void *, size_t);
-static int32_t read_int32 (struct sfm_reader *);
-static double read_flt64 (struct sfm_reader *);
-static void read_string (struct sfm_reader *, char *, size_t);
-static void skip_bytes (struct sfm_reader *, size_t);
-
-static int32_t int32_to_native (const struct sfm_reader *, const uint8_t[4]);
-static double flt64_to_double (const struct sfm_reader *, const uint8_t[8]);
-
-static struct variable_to_value_map *open_variable_to_value_map (
-  struct sfm_reader *, size_t size);
-static void close_variable_to_value_map (struct sfm_reader *r,
-                                         struct variable_to_value_map *);
-static bool read_variable_to_value_map (struct sfm_reader *,
-                                        struct dictionary *,
-                                        struct variable_to_value_map *,
-                                        struct variable **var, char **value,
-                                        int *warning_cnt);
+static struct sfm_reader *
+sfm_reader_cast (const struct any_reader *r_)
+{
+  assert (r_->klass == &sys_file_reader_class);
+  return UP_CAST (r_, struct sfm_reader, any_reader);
+}
  
-static bool close_reader (struct sfm_reader *r);
+static bool sfm_close (struct any_reader *);
+
+static struct variable *lookup_var_by_index (struct sfm_reader *, off_t,
+                                             const struct sfm_var_record *,
+                                             size_t n, int idx);
+
+static void sys_msg (struct sfm_reader *r, off_t, int class,
+                     const char *format, va_list args)
+     PRINTF_FORMAT (4, 0);
+static void sys_warn (struct sfm_reader *, off_t, const char *, ...)
+     PRINTF_FORMAT (3, 4);
+static void sys_error (struct sfm_reader *, off_t, const char *, ...)
+     PRINTF_FORMAT (3, 4);
+
+static bool read_bytes (struct sfm_reader *, void *, size_t)
+  WARN_UNUSED_RESULT;
+static int try_read_bytes (struct sfm_reader *, void *, size_t)
+  WARN_UNUSED_RESULT;
+static bool read_int (struct sfm_reader *, int *) WARN_UNUSED_RESULT;
+static bool read_uint (struct sfm_reader *, unsigned int *) WARN_UNUSED_RESULT;
+static bool read_int64 (struct sfm_reader *, long long int *)
+  WARN_UNUSED_RESULT;
+static bool read_uint64 (struct sfm_reader *, unsigned long long int *)
+  WARN_UNUSED_RESULT;
+static bool read_string (struct sfm_reader *, char *, size_t)
+  WARN_UNUSED_RESULT;
+static bool skip_bytes (struct sfm_reader *, size_t) WARN_UNUSED_RESULT;
+
+/* ZLIB compressed data handling. */
+static bool read_zheader (struct sfm_reader *) WARN_UNUSED_RESULT;
+static bool open_zstream (struct sfm_reader *) WARN_UNUSED_RESULT;
+static bool close_zstream (struct sfm_reader *) WARN_UNUSED_RESULT;
+static int read_bytes_zlib (struct sfm_reader *, void *, size_t)
+  WARN_UNUSED_RESULT;
+static int read_compressed_bytes (struct sfm_reader *, void *, size_t)
+  WARN_UNUSED_RESULT;
+static int try_read_compressed_bytes (struct sfm_reader *, void *, size_t)
+  WARN_UNUSED_RESULT;
+static bool read_compressed_float (struct sfm_reader *, double *)
+  WARN_UNUSED_RESULT;
+
+static char *fix_line_ends (const char *);
+
+static int parse_int (const struct sfm_reader *, const void *data, size_t ofs);
+static double parse_float (const struct sfm_reader *,
+                           const void *data, size_t ofs);
+
+static bool read_variable_record (struct sfm_reader *,
+                                  struct sfm_var_record *);
+static bool read_value_label_record (struct sfm_reader *,
+                                     struct sfm_value_label_record *);
+static bool read_document_record (struct sfm_reader *);
+static bool read_extension_record (struct sfm_reader *, int subtype,
+                                   struct sfm_extension_record **);
+static bool skip_extension_record (struct sfm_reader *, int subtype);
+
+static struct text_record *open_text_record (
+  struct sfm_reader *, const struct sfm_extension_record *,
+  bool recode_to_utf8);
+static void close_text_record (struct sfm_reader *,
+                               struct text_record *);
+static bool read_variable_to_value_pair (struct sfm_reader *,
+                                         struct dictionary *,
+                                         struct text_record *,
+                                         struct variable **var, char **value);
+static void text_warn (struct sfm_reader *r, struct text_record *text,
+                       const char *format, ...)  PRINTF_FORMAT (3, 4);
+static char *text_get_token (struct text_record *,
+                             struct substring delimiters, char *delimiter);
+static bool text_match (struct text_record *, char c);
+static bool text_read_variable_name (struct sfm_reader *, struct dictionary *,
+                                     struct text_record *,
+                                     struct substring delimiters,
+                                     struct variable **);
+static bool text_read_short_name (struct sfm_reader *, struct dictionary *,
+                                  struct text_record *,
+                                  struct substring delimiters,
+                                  struct variable **);
+static const char *text_parse_counted_string (struct sfm_reader *,
+                                              struct text_record *);
+static size_t text_pos (const struct text_record *);
+static const char *text_get_all (const struct text_record *);
  \f
  /* Dictionary reader. */
  
@@ -142,203 +323,603 @@ enum which_format
      WRITE_FORMAT
    };
  
-static void read_header (struct sfm_reader *, struct dictionary *,
-                         int *weight_idx, int *claimed_flt64_cnt,
-                         struct sfm_read_info *);
-static void read_variable_record (struct sfm_reader *, struct dictionary *,
-                                  int *format_warning_cnt);
-static void parse_format_spec (struct sfm_reader *, uint32_t,
-                               enum which_format, struct variable *,
-                               int *format_warning_cnt);
-static void setup_weight (struct sfm_reader *, int weight_idx,
-                          struct variable **var_by_value_idx,
-                          struct dictionary *);
-static void read_documents (struct sfm_reader *, struct dictionary *);
-static void read_value_labels (struct sfm_reader *, struct dictionary *,
-                               struct variable **var_by_value_idx);
-
-static void read_extension_record (struct sfm_reader *, struct dictionary *);
-static void read_machine_int32_info (struct sfm_reader *,
-                                     size_t size, size_t count);
-static void read_machine_flt64_info (struct sfm_reader *,
-                                     size_t size, size_t count);
-static void read_display_parameters (struct sfm_reader *,
-                                     size_t size, size_t count,
+static bool read_dictionary (struct sfm_reader *);
+static bool read_record (struct sfm_reader *, int type,
+                         size_t *allocated_vars, size_t *allocated_labels);
+static bool read_header (struct sfm_reader *, struct any_read_info *,
+                         struct sfm_header_record *);
+static void parse_header (struct sfm_reader *,
+                          const struct sfm_header_record *,
+                          struct any_read_info *, struct dictionary *);
+static bool parse_variable_records (struct sfm_reader *, struct dictionary *,
+                                    struct sfm_var_record *, size_t n);
+static void parse_format_spec (struct sfm_reader *, off_t pos,
+                               unsigned int format, enum which_format,
+                               struct variable *, int *format_warning_cnt);
+static void parse_document (struct dictionary *, struct sfm_document_record *);
+static void parse_display_parameters (struct sfm_reader *,
+                                      const struct sfm_extension_record *,
+                                      struct dictionary *);
+static bool parse_machine_integer_info (struct sfm_reader *,
+                                        const struct sfm_extension_record *,
+                                        struct any_read_info *);
+static void parse_machine_float_info (struct sfm_reader *,
+                                      const struct sfm_extension_record *);
+static void parse_extra_product_info (struct sfm_reader *,
+                                      const struct sfm_extension_record *,
+                                      struct any_read_info *);
+static void parse_mrsets (struct sfm_reader *,
+                          const struct sfm_extension_record *,
+                          size_t *allocated_mrsets);
+static void decode_mrsets (struct sfm_reader *, struct dictionary *);
+static void parse_long_var_name_map (struct sfm_reader *,
+                                     const struct sfm_extension_record *,
                                       struct dictionary *);
-static void read_long_var_name_map (struct sfm_reader *,
-                                    size_t size, size_t count,
-                                    struct dictionary *);
-static void read_long_string_map (struct sfm_reader *,
-                                  size_t size, size_t count,
-                                  struct dictionary *);
-
-
-/* Opens the system file designated by file handle FH for
-   reading.  Reads the system file's dictionary into *DICT.
-   If INFO is non-null, then it receives additional info about the
-   system file. */
-struct casereader *
-sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
-                 struct sfm_read_info *info)
-{
-  struct sfm_reader *volatile r = NULL;
-  struct variable **var_by_value_idx;
-  int format_warning_cnt = 0;
-  int weight_idx;
-  int claimed_flt64_cnt;
-  int rec_type;
-  size_t i;
-
-  if (!fh_open (fh, FH_REF_FILE, "system file", "rs"))
-    return NULL;
+static bool parse_long_string_map (struct sfm_reader *,
+                                   const struct sfm_extension_record *,
+                                   struct dictionary *);
+static bool parse_value_labels (struct sfm_reader *, struct dictionary *,
+                                const struct sfm_var_record *,
+                                size_t n_var_recs,
+                                const struct sfm_value_label_record *);
+static void parse_data_file_attributes (struct sfm_reader *,
+                                        const struct sfm_extension_record *,
+                                        struct dictionary *);
+static void parse_variable_attributes (struct sfm_reader *,
+                                       const struct sfm_extension_record *,
+                                       struct dictionary *);
+static void assign_variable_roles (struct sfm_reader *, struct dictionary *);
+static void parse_long_string_value_labels (struct sfm_reader *,
+                                            const struct sfm_extension_record *,
+                                            struct dictionary *);
+static void parse_long_string_missing_values (
+  struct sfm_reader *, const struct sfm_extension_record *,
+  struct dictionary *);
+
+/* Frees the strings inside INFO. */
+void
+any_read_info_destroy (struct any_read_info *info)
+{
+  if (info)
+    {
+      free (info->creation_date);
+      free (info->creation_time);
+      free (info->product);
+      free (info->product_ext);
+    }
+}
  
-  *dict = dict_create ();
+/* Tries to open FH for reading as a system file.  Returns an sfm_reader if
+   successful, otherwise NULL. */
+static struct any_reader *
+sfm_open (struct file_handle *fh)
+{
+  size_t allocated_mrsets = 0;
+  struct sfm_reader *r;
  
    /* Create and initialize reader. */
-  r = pool_create_container (struct sfm_reader, pool);
-  r->fh = fh;
-  r->file = fn_open (fh_get_file_name (fh), "rb");
-  r->error = false;
-  r->flt64_cnt = 0;
-  r->has_vls = false;
-  r->has_long_var_names = false;
+  r = xzalloc (sizeof *r);
+  r->any_reader.klass = &sys_file_reader_class;
+  r->pool = pool_create ();
+  pool_register (r->pool, free, r);
+  r->fh = fh_ref (fh);
    r->opcode_idx = sizeof r->opcodes;
+  ll_init (&r->var_attrs);
  
-  if (setjmp (r->bail_out))
-    {
-      close_reader (r);
-      dict_destroy (*dict);
-      *dict = NULL;
-      return NULL;
-    }
+  /* TRANSLATORS: this fragment will be interpolated into
+     messages in fh_lock() that identify types of files. */
+  r->lock = fh_lock (fh, FH_REF_FILE, N_("system file"), FH_ACC_READ, false);
+  if (r->lock == NULL)
+    goto error;
  
+  r->file = fn_open (fh, "rb");
    if (r->file == NULL)
      {
-      msg (ME, _("Error opening \"%s\" for reading as a system file: %s."),
+      msg (ME, _("Error opening `%s' for reading as a system file: %s."),
             fh_get_file_name (r->fh), strerror (errno));
-      longjmp (r->bail_out, 1);
+      goto error;
+    }
+
+  if (!read_dictionary (r))
+    goto error;
+
+  if (r->extensions[EXT_MRSETS] != NULL)
+    parse_mrsets (r, r->extensions[EXT_MRSETS], &allocated_mrsets);
+
+  if (r->extensions[EXT_MRSETS2] != NULL)
+    parse_mrsets (r, r->extensions[EXT_MRSETS2], &allocated_mrsets);
+
+  return &r->any_reader;
+
+error:
+  if (r)
+    sfm_close (&r->any_reader);
+  return NULL;
+}
+
+static bool
+read_dictionary (struct sfm_reader *r)
+{
+  size_t allocated_vars;
+  size_t allocated_labels;
+
+  if (!read_header (r, &r->info, &r->header))
+    return false;
+
+  allocated_vars = 0;
+  allocated_labels = 0;
+  for (;;)
+    {
+      int type;
+
+      if (!read_int (r, &type))
+        return false;
+      if (type == 999)
+        break;
+      if (!read_record (r, type, &allocated_vars, &allocated_labels))
+        return false;
      }
  
-  /* Read header. */
-  read_header (r, *dict, &weight_idx, &claimed_flt64_cnt, info);
+  if (!skip_bytes (r, 4))
+    return false;
+
+  if (r->compression == ANY_COMP_ZLIB && !read_zheader (r))
+    return false;
+
+  return true;
+}
  
-  /* Read all the variable definition records. */
-  rec_type = read_int32 (r);
-  while (rec_type == 2)
+static bool
+read_record (struct sfm_reader *r, int type,
+             size_t *allocated_vars, size_t *allocated_labels)
+{
+  int subtype;
+
+  switch (type)
      {
-      read_variable_record (r, *dict, &format_warning_cnt);
-      rec_type = read_int32 (r);
+    case 2:
+      if (r->n_vars >= *allocated_vars)
+        r->vars = pool_2nrealloc (r->pool, r->vars, allocated_vars,
+                                  sizeof *r->vars);
+      return read_variable_record (r, &r->vars[r->n_vars++]);
+
+    case 3:
+      if (r->n_labels >= *allocated_labels)
+        r->labels = pool_2nrealloc (r->pool, r->labels, allocated_labels,
+                                    sizeof *r->labels);
+      return read_value_label_record (r, &r->labels[r->n_labels++]);
+
+    case 4:
+      /* A Type 4 record is always immediately after a type 3 record,
+         so the code for type 3 records reads the type 4 record too. */
+      sys_error (r, r->pos, _("Misplaced type 4 record."));
+      return false;
+
+    case 6:
+      if (r->document != NULL)
+        {
+          sys_error (r, r->pos, _("Duplicate type 6 (document) record."));
+          return false;
+        }
+      return read_document_record (r);
+
+    case 7:
+      if (!read_int (r, &subtype))
+        return false;
+      else if (subtype < 0
+               || subtype >= sizeof r->extensions / sizeof *r->extensions)
+        {
+          sys_warn (r, r->pos,
+                    _("Unrecognized record type 7, subtype %d.  For help, "
+                      "please send this file to %s and mention that you were "
+                      "using %s."),
+                    subtype, PACKAGE_BUGREPORT, PACKAGE_STRING);
+          return skip_extension_record (r, subtype);
+        }
+      else if (subtype == 18)
+        {
+          /* System files written by "Stata 14.1/-savespss- 1.77 by S.Radyakin"
+             put each variable attribute into a separate record with subtype
+             18.  I'm surprised that SPSS puts up with this. */
+          struct sfm_extension_record *ext;
+          bool ok = read_extension_record (r, subtype, &ext);
+          if (ok && ext)
+            ll_push_tail (&r->var_attrs, &ext->ll);
+          return ok;
+        }
+      else if (r->extensions[subtype] != NULL)
+        {
+          sys_warn (r, r->pos,
+                    _("Record type 7, subtype %d found here has the same "
+                      "type as the record found near offset 0x%llx.  For "
+                      "help, please send this file to %s and mention that "
+                      "you were using %s."),
+                    subtype, (long long int) r->extensions[subtype]->pos,
+                    PACKAGE_BUGREPORT, PACKAGE_STRING);
+          return skip_extension_record (r, subtype);
+        }
+      else
+        return read_extension_record (r, subtype, &r->extensions[subtype]);
+
+    default:
+      sys_error (r, r->pos, _("Unrecognized record type %d."), type);
+      return false;
      }
  
-  /* Figure out the case format. */
-  var_by_value_idx = make_var_by_value_idx (r, *dict);
-  setup_weight (r, weight_idx, var_by_value_idx, *dict);
+  NOT_REACHED ();
+}
+
+/* Returns the character encoding obtained from R, or a null pointer if R
+   doesn't have an indication of its character encoding.  */
+static const char *
+sfm_get_encoding (const struct sfm_reader *r)
+{
+  /* The EXT_ENCODING record is the best way to determine dictionary
+     encoding. */
+  if (r->extensions[EXT_ENCODING])
+    return r->extensions[EXT_ENCODING]->data;
  
-  /* Read all the rest of the dictionary records. */
-  while (rec_type != 999)
+  /* But EXT_INTEGER is better than nothing as a fallback. */
+  if (r->extensions[EXT_INTEGER])
      {
-      switch (rec_type)
+      int codepage = parse_int (r, r->extensions[EXT_INTEGER]->data, 7 * 4);
+      const char *encoding;
+
+      switch (codepage)
          {
+        case 1:
+          return "EBCDIC-US";
+
+        case 2:
          case 3:
-          read_value_labels (r, *dict, var_by_value_idx);
+          /* These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
+             respectively.  However, many files have character code 2 but data
+             which are clearly not ASCII.  Therefore, ignore these values. */
            break;
  
          case 4:
-          sys_error (r, _("Misplaced type 4 record."));
+          return "MS_KANJI";
  
-        case 6:
-          read_documents (r, *dict);
+        default:
+          encoding = sys_get_encoding_from_codepage (codepage);
+          if (encoding != NULL)
+            return encoding;
            break;
+        }
+    }
  
-        case 7:
-          read_extension_record (r, *dict);
-          break;
+  /* If the file magic number is EBCDIC then its character data is too. */
+  if (!strcmp (r->header.magic, EBCDIC_MAGIC))
+    return "EBCDIC-US";
  
-        default:
-          sys_error (r, _("Unrecognized record type %d."), rec_type);
+  return NULL;
+}
+
+struct get_strings_aux
+  {
+    struct pool *pool;
+    char **titles;
+    char **strings;
+    bool *ids;
+    size_t allocated;
+    size_t n;
+  };
+
+static void
+add_string__ (struct get_strings_aux *aux,
+              const char *string, bool id, char *title)
+{
+  if (aux->n >= aux->allocated)
+    {
+      aux->allocated = 2 * (aux->allocated + 1);
+      aux->titles = pool_realloc (aux->pool, aux->titles,
+                                  aux->allocated * sizeof *aux->titles);
+      aux->strings = pool_realloc (aux->pool, aux->strings,
+                                   aux->allocated * sizeof *aux->strings);
+      aux->ids = pool_realloc (aux->pool, aux->ids,
+                               aux->allocated * sizeof *aux->ids);
+    }
+
+  aux->titles[aux->n] = title;
+  aux->strings[aux->n] = pool_strdup (aux->pool, string);
+  aux->ids[aux->n] = id;
+  aux->n++;
+}
+
+static void PRINTF_FORMAT (3, 4)
+add_string (struct get_strings_aux *aux,
+            const char *string, const char *title, ...)
+{
+  va_list args;
+
+  va_start (args, title);
+  add_string__ (aux, string, false, pool_vasprintf (aux->pool, title, args));
+  va_end (args);
+}
+
+static void PRINTF_FORMAT (3, 4)
+add_id (struct get_strings_aux *aux, const char *id, const char *title, ...)
+{
+  va_list args;
+
+  va_start (args, title);
+  add_string__ (aux, id, true, pool_vasprintf (aux->pool, title, args));
+  va_end (args);
+}
+
+/* Retrieves significant string data from R in its raw format, to allow the
+   caller to try to detect the encoding in use.
+
+   Returns the number of strings retrieved N.  Sets each of *TITLESP, *IDSP,
+   and *STRINGSP to an array of N elements allocated from POOL.  For each I in
+   0...N-1, UTF-8 string *TITLESP[I] describes *STRINGSP[I], which is in
+   whatever encoding system file R uses.  *IDS[I] is true if *STRINGSP[I] must
+   be a valid PSPP language identifier, false if *STRINGSP[I] is free-form
+   text. */
+static size_t
+sfm_get_strings (const struct any_reader *r_, struct pool *pool,
+                 char ***titlesp, bool **idsp, char ***stringsp)
+{
+  struct sfm_reader *r = sfm_reader_cast (r_);
+  const struct sfm_mrset *mrset;
+  struct get_strings_aux aux;
+  size_t var_idx;
+  size_t i, j, k;
+
+  aux.pool = pool;
+  aux.titles = NULL;
+  aux.strings = NULL;
+  aux.ids = NULL;
+  aux.allocated = 0;
+  aux.n = 0;
+
+  var_idx = 0;
+  for (i = 0; i < r->n_vars; i++)
+    if (r->vars[i].width != -1)
+      add_id (&aux, r->vars[i].name, _("Variable %zu"), ++var_idx);
+
+  var_idx = 0;
+  for (i = 0; i < r->n_vars; i++)
+    if (r->vars[i].width != -1)
+      {
+        var_idx++;
+        if (r->vars[i].label)
+          add_string (&aux, r->vars[i].label, _("Variable %zu Label"),
+                      var_idx);
+      }
+
+  k = 0;
+  for (i = 0; i < r->n_labels; i++)
+    for (j = 0; j < r->labels[i].n_labels; j++)
+      add_string (&aux, r->labels[i].labels[j].label,
+                  _("Value Label %zu"), k++);
+
+  add_string (&aux, r->header.creation_date, _("Creation Date"));
+  add_string (&aux, r->header.creation_time, _("Creation Time"));
+  add_string (&aux, r->header.eye_catcher, _("Product"));
+  add_string (&aux, r->header.file_label, _("File Label"));
+
+  if (r->extensions[EXT_PRODUCT_INFO])
+    add_string (&aux, r->extensions[EXT_PRODUCT_INFO]->data,
+                _("Extra Product Info"));
+
+  if (r->document)
+    {
+      size_t i;
+
+      for (i = 0; i < r->document->n_lines; i++)
+        {
+          char line[81];
+
+          memcpy (line, r->document->documents + i * 80, 80);
+          line[80] = '\0';
+
+          add_string (&aux, line, _("Document Line %zu"), i + 1);
          }
-      rec_type = read_int32 (r);
      }
  
+  for (mrset = r->mrsets; mrset < &r->mrsets[r->n_mrsets]; mrset++)
+    {
+      size_t mrset_idx = mrset - r->mrsets + 1;
+
+      add_id (&aux, mrset->name, _("MRSET %zu"), mrset_idx);
+      if (mrset->label[0])
+        add_string (&aux, mrset->label, _("MRSET %zu Label"), mrset_idx);
+
+      /* Skip the variables because they ought to be duplicates. */
+
+      if (mrset->counted)
+        add_string (&aux, mrset->counted, _("MRSET %zu Counted Value"),
+                    mrset_idx);
+    }
+
+  /* data file attributes */
+  /* variable attributes */
+  /* long var map */
+  /* long string value labels */
+  /* long string missing values */
+
+  *titlesp = aux.titles;
+  *idsp = aux.ids;
+  *stringsp = aux.strings;
+  return aux.n;
+}
  
-  if ( ! r->has_long_var_names )
+/* Decodes the dictionary read from R, saving it into into *DICT.  Character
+   strings in R are decoded using ENCODING, or an encoding obtained from R if
+   ENCODING is null, or the locale encoding if R specifies no encoding.
+
+   If INFOP is non-null, then it receives additional info about the system
+   file, which the caller must eventually free with any_read_info_destroy()
+   when it is no longer needed.
+
+   This function consumes R.  The caller must use it again later, even to
+   destroy it with sfm_close(). */
+static struct casereader *
+sfm_decode (struct any_reader *r_, const char *encoding,
+            struct dictionary **dictp, struct any_read_info *infop)
+{
+  struct sfm_reader *r = sfm_reader_cast (r_);
+  struct dictionary *dict;
+  size_t i;
+
+  if (encoding == NULL)
      {
-      int i;
-      for (i = 0; i < dict_get_var_cnt (*dict); i++)
-       {
-         struct variable *var = dict_get_var (*dict, i);
-         char short_name [SHORT_NAME_LEN + 1];
-         char long_name [SHORT_NAME_LEN + 1];
+      encoding = sfm_get_encoding (r);
+      if (encoding == NULL)
+        {
+          sys_warn (r, -1, _("This system file does not indicate its own "
+                             "character encoding.  Using default encoding "
+                             "%s.  For best results, specify an encoding "
+                             "explicitly.  Use SYSFILE INFO with "
+                             "ENCODING=\"DETECT\" to analyze the possible "
+                             "encodings."),
+                    locale_charset ());
+          encoding = locale_charset ();
+        }
+    }
  
-         strcpy (short_name, var_get_name (var));
+  dict = dict_create (encoding);
+  r->encoding = dict_get_encoding (dict);
  
-         strcpy (long_name, short_name);
-         str_lowercase (long_name);
+  /* These records don't use variables at all. */
+  if (r->document != NULL)
+    parse_document (dict, r->document);
  
-         /* Set long name.  Renaming a variable may clear the short
-            name, but we want to retain it, so re-set it
-            explicitly. */
-         dict_rename_var (*dict, var, long_name);
-         var_set_short_name (var, 0, short_name);
-       }
+  if (r->extensions[EXT_INTEGER] != NULL
+      && !parse_machine_integer_info (r, r->extensions[EXT_INTEGER], &r->info))
+    goto error;
+
+  if (r->extensions[EXT_FLOAT] != NULL)
+    parse_machine_float_info (r, r->extensions[EXT_FLOAT]);
+
+  if (r->extensions[EXT_PRODUCT_INFO] != NULL)
+    parse_extra_product_info (r, r->extensions[EXT_PRODUCT_INFO], &r->info);
+
+  if (r->extensions[EXT_FILE_ATTRS] != NULL)
+    parse_data_file_attributes (r, r->extensions[EXT_FILE_ATTRS], dict);
+
+  parse_header (r, &r->header, &r->info, dict);
+
+  /* Parse the variable records, the basis of almost everything else. */
+  if (!parse_variable_records (r, dict, r->vars, r->n_vars))
+    goto error;
  
-      r->has_long_var_names = true;
+  /* Parse value labels and the weight variable immediately after the variable
+     records.  These records use indexes into var_recs[], so we must parse them
+     before those indexes become invalidated by very long string variables. */
+  for (i = 0; i < r->n_labels; i++)
+    if (!parse_value_labels (r, dict, r->vars, r->n_vars, &r->labels[i]))
+      goto error;
+  if (r->header.weight_idx != 0)
+    {
+      struct variable *weight_var;
+
+      weight_var = lookup_var_by_index (r, 76, r->vars, r->n_vars,
+                                        r->header.weight_idx);
+      if (weight_var != NULL)
+        {
+          if (var_is_numeric (weight_var))
+            dict_set_weight (dict, weight_var);
+          else
+            sys_warn (r, -1, _("Ignoring string variable `%s' set "
+                               "as weighting variable."),
+                      var_get_name (weight_var));
+        }
      }
  
-  /* Read record 999 data, which is just filler. */
-  read_int32 (r);
+  if (r->extensions[EXT_DISPLAY] != NULL)
+    parse_display_parameters (r, r->extensions[EXT_DISPLAY], dict);
+
+  /* The following records use short names, so they need to be parsed before
+     parse_long_var_name_map() changes short names to long names. */
+  decode_mrsets (r, dict);
+
+  if (r->extensions[EXT_LONG_STRINGS] != NULL
+      && !parse_long_string_map (r, r->extensions[EXT_LONG_STRINGS], dict))
+    goto error;
+
+  /* Now rename variables to their long names. */
+  parse_long_var_name_map (r, r->extensions[EXT_LONG_NAMES], dict);
  
-  if (claimed_flt64_cnt != -1 && claimed_flt64_cnt != r->flt64_cnt)
-    sys_warn (r, _("File header claims %d variable positions but "
-                   "%d were read from file."),
-              claimed_flt64_cnt, r->flt64_cnt);
+  /* The following records use long names, so they need to follow renaming. */
+  if (!ll_is_empty (&r->var_attrs))
+    {
+      struct sfm_extension_record *ext;
+      ll_for_each (ext, struct sfm_extension_record, ll, &r->var_attrs)
+        parse_variable_attributes (r, ext, dict);
+
+      /* Roles use the $@Role attribute.  */
+      assign_variable_roles (r, dict);
+    }
+  if (r->extensions[EXT_LONG_LABELS] != NULL)
+    parse_long_string_value_labels (r, r->extensions[EXT_LONG_LABELS], dict);
+  if (r->extensions[EXT_LONG_MISSING] != NULL)
+    parse_long_string_missing_values (r, r->extensions[EXT_LONG_MISSING],
+                                      dict);
+
+  /* Warn if the actual amount of data per case differs from the
+     amount that the header claims.  SPSS version 13 gets this
+     wrong when very long strings are involved, so don't warn in
+     that case. */
+  if (r->header.nominal_case_size > 0
+      && r->header.nominal_case_size != r->n_vars
+      && r->info.version_major != 13)
+    sys_warn (r, -1, _("File header claims %d variable positions but "
+                       "%zu were read from file."),
+              r->header.nominal_case_size, r->n_vars);
  
    /* Create an index of dictionary variable widths for
       sfm_read_case to use.  We cannot use the `struct variable's
       from the dictionary we created, because the caller owns the
       dictionary and may destroy or modify its variables. */
-  r->var_cnt = dict_get_var_cnt (*dict);
-  r->vars = pool_nalloc (r->pool, r->var_cnt, sizeof *r->vars);
-  for (i = 0; i < r->var_cnt; i++)
+  sfm_dictionary_to_sfm_vars (dict, &r->sfm_vars, &r->sfm_var_cnt);
+  pool_register (r->pool, free, r->sfm_vars);
+  r->proto = caseproto_ref_pool (dict_get_proto (dict), r->pool);
+
+  *dictp = dict;
+  if (infop)
      {
-      struct variable *v = dict_get_var (*dict, i);
-      struct sfm_var *sv = &r->vars[i];
-      sv->width = var_get_width (v);
-      sv->case_index = var_get_case_index (v);
+      *infop = r->info;
+      memset (&r->info, 0, sizeof r->info);
      }
  
-  pool_free (r->pool, var_by_value_idx);
-  r->value_cnt = dict_get_next_value_idx (*dict);
    return casereader_create_sequential
-    (NULL, r->value_cnt,
+    (NULL, r->proto,
       r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt,
                                         &sys_file_casereader_class, r);
+
+error:
+  sfm_close (r_);
+  dict_destroy (dict);
+  *dictp = NULL;
+  return NULL;
  }
  
-/* Closes a system file after we're done with it.
+/* Closes R, which should have been returned by sfm_open() but not already
+   closed with sfm_decode() or this function.
     Returns true if an I/O error has occurred on READER, false
     otherwise. */
  static bool
-close_reader (struct sfm_reader *r)
+sfm_close (struct any_reader *r_)
  {
+  struct sfm_reader *r = sfm_reader_cast (r_);
    bool error;
  
-  if (r == NULL)
-    return true;
-
    if (r->file)
      {
-      if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
+      if (fn_close (r->fh, r->file) == EOF)
          {
-          msg (ME, _("Error closing system file \"%s\": %s."),
+          msg (ME, _("Error closing system file `%s': %s."),
                 fh_get_file_name (r->fh), strerror (errno));
            r->error = true;
          }
        r->file = NULL;
      }
  
-  if (r->fh != NULL)
-    fh_close (r->fh, "system file", "rs");
+  any_read_info_destroy (&r->info);
+  fh_unlock (r->lock);
+  fh_unref (r->fh);
  
    error = r->error;
    pool_destroy (r->pool);
@@ -351,82 +932,133 @@ static void
  sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
  {
    struct sfm_reader *r = r_;
-  close_reader (r);
+  sfm_close (&r->any_reader);
  }
  
-/* Returns true if FILE is an SPSS system file,
-   false otherwise. */
-bool
+/* Detects whether FILE is an SPSS system file.  Returns 1 if so, 0 if not, and
+   a negative errno value if there is an error reading FILE. */
+static int
  sfm_detect (FILE *file)
  {
-  char rec_type[5];
+  char magic[5];
  
-  if (fread (rec_type, 4, 1, file) != 1)
-    return false;
-  rec_type[4] = '\0';
+  if (fseek (file, 0, SEEK_SET) != 0)
+    return -errno;
+  if (fread (magic, 4, 1, file) != 1)
+    return ferror (file) ? -errno : 0;
+  magic[4] = '\0';
  
-  return !strcmp ("$FL2", rec_type);
+  return (!strcmp (ASCII_MAGIC, magic)
+          || !strcmp (ASCII_ZMAGIC, magic)
+          || !strcmp (EBCDIC_MAGIC, magic));
  }
  \f
-/* Reads the global header of the system file.
-   Sets DICT's file label to the system file's label.
-   Sets *WEIGHT_IDX to 0 if the system file is unweighted,
-   or to the value index of the weight variable otherwise.
-   Sets *CLAIMED_FLT64_CNT to the number of values that the file
-   claims to have (although it is not always correct).
-   If INFO is non-null, initializes *INFO with header
-   information. */
-static void
-read_header (struct sfm_reader *r, struct dictionary *dict,
-             int *weight_idx, int *claimed_flt64_cnt,
-             struct sfm_read_info *info)
+/* Reads the global header of the system file.  Initializes *HEADER and *INFO,
+   except for the string fields in *INFO, which parse_header() will initialize
+   later once the file's encoding is known. */
+static bool
+read_header (struct sfm_reader *r, struct any_read_info *info,
+             struct sfm_header_record *header)
  {
-  char rec_type[5];
-  char eye_catcher[61];
    uint8_t raw_layout_code[4];
    uint8_t raw_bias[8];
-  char creation_date[10];
-  char creation_time[9];
-  char file_label[65];
-  struct substring file_label_ss;
+  int compressed;
+  bool zmagic;
  
-  read_string (r, rec_type, sizeof rec_type);
-  read_string (r, eye_catcher, sizeof eye_catcher);
-
-  if (strcmp ("$FL2", rec_type) != 0)
-    sys_error (r, _("This is not an SPSS system file."));
+  if (!read_string (r, header->magic, sizeof header->magic)
+      || !read_string (r, header->eye_catcher, sizeof header->eye_catcher))
+    return false;
+  r->written_by_readstat = strstr (header->eye_catcher,
+                                   "https://github.com/WizardMac/ReadStat");
+
+  if (!strcmp (ASCII_MAGIC, header->magic)
+      || !strcmp (EBCDIC_MAGIC, header->magic))
+    zmagic = false;
+  else if (!strcmp (ASCII_ZMAGIC, header->magic))
+    zmagic = true;
+  else
+    {
+      sys_error (r, 0, _("This is not an SPSS system file."));
+      return false;
+    }
  
    /* Identify integer format. */
-  read_bytes (r, raw_layout_code, sizeof raw_layout_code);
+  if (!read_bytes (r, raw_layout_code, sizeof raw_layout_code))
+    return false;
    if ((!integer_identify (2, raw_layout_code, sizeof raw_layout_code,
                            &r->integer_format)
         && !integer_identify (3, raw_layout_code, sizeof raw_layout_code,
                               &r->integer_format))
        || (r->integer_format != INTEGER_MSB_FIRST
            && r->integer_format != INTEGER_LSB_FIRST))
-    sys_error (r, _("This is not an SPSS system file."));
+    {
+      sys_error (r, 64, _("This is not an SPSS system file."));
+      return false;
+    }
+
+  if (!read_int (r, &header->nominal_case_size))
+    return false;
  
-  *claimed_flt64_cnt = read_int32 (r);
-  if (*claimed_flt64_cnt < 0 || *claimed_flt64_cnt > INT_MAX / 16)
-    *claimed_flt64_cnt = -1;
+  if (header->nominal_case_size < 0
+      || header->nominal_case_size > INT_MAX / 16)
+    header->nominal_case_size = -1;
  
-  r->compressed = read_int32 (r) != 0;
+  if (!read_int (r, &compressed))
+    return false;
+  if (!zmagic)
+    {
+      if (compressed == 0)
+        r->compression = ANY_COMP_NONE;
+      else if (compressed == 1)
+        r->compression = ANY_COMP_SIMPLE;
+      else if (compressed != 0)
+        {
+          sys_error (r, 0, "System file header has invalid compression "
+                     "value %d.", compressed);
+          return false;
+        }
+    }
+  else
+    {
+      if (compressed == 2)
+        r->compression = ANY_COMP_ZLIB;
+      else
+        {
+          sys_error (r, 0, "ZLIB-compressed system file header has invalid "
+                     "compression value %d.", compressed);
+          return false;
+        }
+    }
  
-  *weight_idx = read_int32 (r);
+  if (!read_int (r, &header->weight_idx))
+    return false;
  
-  r->case_cnt = read_int32 (r);
+  if (!read_int (r, &r->case_cnt))
+    return false;
    if ( r->case_cnt > INT_MAX / 2)
      r->case_cnt = -1;
  
-
    /* Identify floating-point format and obtain compression bias. */
-  read_bytes (r, raw_bias, sizeof raw_bias);
+  if (!read_bytes (r, raw_bias, sizeof raw_bias))
+    return false;
    if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0)
      {
-      sys_warn (r, _("Compression bias (%g) is not the usual "
-                     "value of 100, or system file uses unrecognized "
-                     "floating-point format."),
-                r->bias);
+      uint8_t zero_bias[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+
+      if (memcmp (raw_bias, zero_bias, 8))
+        sys_warn (r, r->pos - 8,
+                  _("Compression bias is not the usual "
+                    "value of 100, or system file uses unrecognized "
+                    "floating-point format."));
+      else
+        {
+          /* Some software is known to write all-zeros to this
+             field.  Such software also writes floating-point
+             numbers in the format that we expect by default
+             (it seems that all software most likely does, in
+             reality), so don't warn in this case. */
+        }
+
        if (r->integer_format == INTEGER_MSB_FIRST)
          r->float_format = FLOAT_IEEE_DOUBLE_BE;
        else
@@ -434,365 +1066,564 @@ read_header (struct sfm_reader *r, struct dictionary *dict,
      }
    float_convert (r->float_format, raw_bias, FLOAT_NATIVE_DOUBLE, &r->bias);
  
-  read_string (r, creation_date, sizeof creation_date);
-  read_string (r, creation_time, sizeof creation_time);
-  read_string (r, file_label, sizeof file_label);
-  skip_bytes (r, 3);
-
-  file_label_ss = ss_cstr (file_label);
-  ss_trim (&file_label_ss, ss_cstr (" "));
-  if (!ss_is_empty (file_label_ss))
-    {
-      ss_data (file_label_ss)[ss_length (file_label_ss)] = '\0';
-      dict_set_label (dict, ss_data (file_label_ss));
-    }
-
-  if (info)
-    {
-      struct substring product;
+  if (!read_string (r, header->creation_date, sizeof header->creation_date)
+      || !read_string (r, header->creation_time, sizeof header->creation_time)
+      || !read_string (r, header->file_label, sizeof header->file_label)
+      || !skip_bytes (r, 3))
+    return false;
  
-      strcpy (info->creation_date, creation_date);
-      strcpy (info->creation_time, creation_time);
-      info->integer_format = r->integer_format;
-      info->float_format = r->float_format;
-      info->compressed = r->compressed;
-      info->case_cnt = r->case_cnt;
+  info->integer_format = r->integer_format;
+  info->float_format = r->float_format;
+  info->compression = r->compression;
+  info->case_cnt = r->case_cnt;
  
-      product = ss_cstr (eye_catcher);
-      ss_match_string (&product, ss_cstr ("@(#) SPSS DATA FILE"));
-      ss_trim (&product, ss_cstr (" "));
-      str_copy_buf_trunc (info->product, sizeof info->product,
-                          ss_data (product), ss_length (product));
-    }
+  return true;
  }
  
-/* Reads a variable (type 2) record from R and adds the
-   corresponding variable to DICT.
-   Also skips past additional variable records for long string
-   variables. */
-static void
-read_variable_record (struct sfm_reader *r, struct dictionary *dict,
-                      int *format_warning_cnt)
+/* Reads a variable (type 2) record from R into RECORD. */
+static bool
+read_variable_record (struct sfm_reader *r, struct sfm_var_record *record)
  {
-  int width;
    int has_variable_label;
-  int missing_value_code;
-  int print_format;
-  int write_format;
-  char name[9];
  
-  struct variable *var;
-  int nv;
-
-  width = read_int32 (r);
-  has_variable_label = read_int32 (r);
-  missing_value_code = read_int32 (r);
-  print_format = read_int32 (r);
-  write_format = read_int32 (r);
-  read_string (r, name, sizeof name);
-  name[strcspn (name, " ")] = '\0';
-
-  /* Check variable name. */
-  if (name[0] == '$' || name[0] == '#')
-    sys_error (r, "Variable name begins with invalid character `%c'.",
-               name[0]);
-  if (!var_is_plausible_name (name, false))
-    sys_error (r, _("Invalid variable name `%s'."), name);
-
-  /* Create variable. */
-  if (width < 0 || width > 255)
-    sys_error (r, _("Bad variable width %d."), width);
-  var = dict_create_var (dict, name, width);
-  if (var == NULL)
-    sys_error (r,
-               _("Duplicate variable name `%s' within system file."),
-               name);
-
-  /* Set the short name the same as the long name. */
-  var_set_short_name (var, 0, var_get_name (var));
-
-  /* Get variable label, if any. */
-  if (has_variable_label != 0 && has_variable_label != 1)
-    sys_error (r, _("Variable label indicator field is not 0 or 1."));
+  memset (record, 0, sizeof *record);
+
+  record->pos = r->pos;
+  if (!read_int (r, &record->width)
+      || !read_int (r, &has_variable_label)
+      || !read_int (r, &record->missing_value_code)
+      || !read_int (r, &record->print_format)
+      || !read_int (r, &record->write_format)
+      || !read_string (r, record->name, sizeof record->name))
+    return false;
+
    if (has_variable_label == 1)
      {
-      size_t len;
-      char label[255 + 1];
+      enum { MAX_LABEL_LEN = 65536 };
+      unsigned int len, read_len;
+
+      if (!read_uint (r, &len))
+        return false;
+
+      /* Read up to MAX_LABEL_LEN bytes of label. */
+      read_len = MIN (MAX_LABEL_LEN, len);
+      record->label = pool_malloc (r->pool, read_len + 1);
+      if (!read_string (r, record->label, read_len + 1))
+        return false;
  
-      len = read_int32 (r);
-      if (len >= sizeof label)
-        sys_error (r, _("Variable %s has label of invalid length %u."),
-                   name, (unsigned int) len);
-      read_string (r, label, len + 1);
-      var_set_label (var, label);
+      /* Skip unread label bytes. */
+      if (!skip_bytes (r, len - read_len))
+        return false;
  
-      skip_bytes (r, ROUND_UP (len, 4) - len);
+      /* Skip label padding up to multiple of 4 bytes. */
+      if (!skip_bytes (r, ROUND_UP (len, 4) - len))
+        return false;
+    }
+  else if (has_variable_label != 0)
+    {
+      sys_error (r, record->pos,
+                 _("Variable label indicator field is not 0 or 1."));
+      return false;
      }
  
    /* Set missing values. */
-  if (missing_value_code < -3 || missing_value_code > 3
-      || missing_value_code == -1)
-    sys_error (r, _("Missing value indicator field is not "
-                    "-3, -2, 0, 1, 2, or 3."));
-  if (missing_value_code != 0)
+  if (record->missing_value_code != 0)
      {
-      struct missing_values mv;
-      mv_init (&mv, var_get_width (var));
-      if (var_is_numeric (var))
+      int code = record->missing_value_code;
+      if (record->width == 0)
          {
-          if (missing_value_code > 0)
-            {
-              int i;
-              for (i = 0; i < missing_value_code; i++)
-                mv_add_num (&mv, read_flt64 (r));
-            }
-          else
+          if (code < -3 || code > 3 || code == -1)
              {
-              double low = read_flt64 (r);
-              double high = read_flt64 (r);
-              mv_add_num_range (&mv, low, high);
-              if (missing_value_code == -3)
-                mv_add_num (&mv, read_flt64 (r));
+              sys_error (r, record->pos,
+                         _("Numeric missing value indicator field is not "
+                           "-3, -2, 0, 1, 2, or 3."));
+              return false;
              }
          }
-      else if (var_get_width (var) <= MAX_SHORT_STRING)
+      else
          {
-          if (missing_value_code > 0)
+          if (code < 1 || code > 3)
              {
-              int i;
-              for (i = 0; i < missing_value_code; i++)
-                {
-                  char string[9];
-                  read_string (r, string, sizeof string);
-                  mv_add_str (&mv, string);
-                }
+              sys_error (r, record->pos,
+                         _("String missing value indicator field is not "
+                           "0, 1, 2, or 3."));
+              return false;
              }
-          else
-            sys_error (r, _("String variable %s may not have missing "
-                            "values specified as a range."),
-                       name);
          }
-      else /* var->width > MAX_SHORT_STRING */
-        sys_error (r, _("Long string variable %s may not have missing "
-                        "values."),
-                   name);
-      var_set_missing_values (var, &mv);
-    }
-
-  /* Set formats. */
-  parse_format_spec (r, print_format, PRINT_FORMAT, var, format_warning_cnt);
-  parse_format_spec (r, write_format, WRITE_FORMAT, var, format_warning_cnt);
  
-  /* Account for values.
-     Skip long string continuation records, if any. */
-  nv = width == 0 ? 1 : DIV_RND_UP (width, 8);
-  r->flt64_cnt += nv;
-  if (width > 8)
-    {
-      int i;
-
-      for (i = 1; i < nv; i++)
-        {
-          /* Check for record type 2 and width -1. */
-          if (read_int32 (r) != 2 || read_int32 (r) != -1)
-            sys_error (r, _("Missing string continuation record."));
-
-          /* Skip and ignore remaining continuation data. */
-          has_variable_label = read_int32 (r);
-          missing_value_code = read_int32 (r);
-          print_format = read_int32 (r);
-          write_format = read_int32 (r);
-          read_string (r, name, sizeof name);
-
-          /* Variable label fields on continuation records have
-             been spotted in system files created by "SPSS Power
-             Macintosh Release 6.1". */
-          if (has_variable_label)
-            skip_bytes (r, ROUND_UP (read_int32 (r), 4));
-        }
+      if (!read_bytes (r, record->missing, 8 * abs (code)))
+        return false;
      }
+
+  return true;
  }
  
-/* Translates the format spec from sysfile format to internal
-   format. */
-static void
-parse_format_spec (struct sfm_reader *r, uint32_t s,
-                   enum which_format which, struct variable *v,
-                   int *format_warning_cnt)
+/* Reads value labels from R into RECORD. */
+static bool
+read_value_label_record (struct sfm_reader *r,
+                         struct sfm_value_label_record *record)
  {
-  const int max_format_warnings = 8;
-  struct fmt_spec f;
-  uint8_t raw_type = s >> 16;
-  uint8_t w = s >> 8;
-  uint8_t d = s;
+  size_t i;
+  int type;
  
-  bool ok;
+  /* Read type 3 record. */
+  record->pos = r->pos;
+  if (!read_uint (r, &record->n_labels))
+    return false;
+  if (record->n_labels > UINT_MAX / sizeof *record->labels)
+    {
+      sys_error (r, r->pos - 4, _("Invalid number of labels %u."),
+                 record->n_labels);
+      return false;
+    }
+  record->labels = pool_nmalloc (r->pool, record->n_labels,
+                                 sizeof *record->labels);
+  for (i = 0; i < record->n_labels; i++)
+    {
+      struct sfm_value_label *label = &record->labels[i];
+      unsigned char label_len;
+      size_t padded_len;
  
-  if (!fmt_from_io (raw_type, &f.type))
-    sys_error (r, _("Unknown variable format %d."), (int) raw_type);
-  f.w = w;
-  f.d = d;
+      if (!read_bytes (r, label->value, sizeof label->value))
+        return false;
  
-  msg_disable ();
-  ok = fmt_check_output (&f) && fmt_check_width_compat (&f, var_get_width (v));
-  msg_enable ();
+      /* Read label length. */
+      if (!read_bytes (r, &label_len, sizeof label_len))
+        return false;
+      padded_len = ROUND_UP (label_len + 1, 8);
  
-  if (ok)
+      /* Read label, padding. */
+      label->label = pool_malloc (r->pool, padded_len + 1);
+      if (!read_bytes (r, label->label, padded_len - 1))
+        return false;
+      label->label[label_len] = '\0';
+    }
+
+  /* Read record type of type 4 record. */
+  if (!read_int (r, &type))
+    return false;
+  if (type != 4)
      {
-      if (which == PRINT_FORMAT)
-        var_set_print_format (v, &f);
-      else
-        var_set_write_format (v, &f);
+      sys_error (r, r->pos - 4,
+                 _("Variable index record (type 4) does not immediately "
+                   "follow value label record (type 3) as it should."));
+      return false;
      }
-  else if (*++format_warning_cnt <= max_format_warnings)
+
+  /* Read number of variables associated with value label from type 4
+     record. */
+  if (!read_uint (r, &record->n_vars))
+    return false;
+  if (record->n_vars < 1 || record->n_vars > r->n_vars)
      {
-      char fmt_string[FMT_STRING_LEN_MAX + 1];
-      sys_warn (r, _("%s variable %s has invalid %s format %s."),
-                var_is_numeric (v) ? _("Numeric") : _("String"),
-                var_get_name (v),
-                which == PRINT_FORMAT ? _("print") : _("write"),
-                fmt_to_string (&f, fmt_string));
+      sys_error (r, r->pos - 4,
+                 _("Number of variables associated with a value label (%u) "
+                   "is not between 1 and the number of variables (%zu)."),
+                 record->n_vars, r->n_vars);
+      return false;
+    }
+
+  record->vars = pool_nmalloc (r->pool, record->n_vars, sizeof *record->vars);
+  for (i = 0; i < record->n_vars; i++)
+    if (!read_int (r, &record->vars[i]))
+      return false;
+
+  return true;
+}
  
-      if (*format_warning_cnt == max_format_warnings)
-        sys_warn (r, _("Suppressing further invalid format warnings."));
+/* Reads a document record from R.  Returns true if successful, false on
+   error. */
+static bool
+read_document_record (struct sfm_reader *r)
+{
+  int n_lines;
+  if (!read_int (r, &n_lines))
+    return false;
+  else if (n_lines == 0)
+    return true;
+  else if (n_lines < 0 || n_lines >= INT_MAX / DOC_LINE_LENGTH)
+    {
+      sys_error (r, r->pos,
+                 _("Number of document lines (%d) "
+                   "must be greater than 0 and less than %d."),
+                 n_lines, INT_MAX / DOC_LINE_LENGTH);
+      return false;
      }
+
+  struct sfm_document_record *record;
+  record = pool_malloc (r->pool, sizeof *record);
+  record->pos = r->pos;
+  record->n_lines = n_lines;
+  record->documents = pool_malloc (r->pool, DOC_LINE_LENGTH * n_lines);
+  if (!read_bytes (r, record->documents, DOC_LINE_LENGTH * n_lines))
+    return false;
+
+  r->document = record;
+  return true;
  }
  
-/* Sets the weighting variable in DICT to the variable
-   corresponding to the given 1-based VALUE_IDX, if VALUE_IDX is
-   nonzero. */
-static void
-setup_weight (struct sfm_reader *r, int weight_idx,
-              struct variable **var_by_value_idx, struct dictionary *dict)
+static bool
+read_extension_record_header (struct sfm_reader *r, int subtype,
+                              struct sfm_extension_record *record)
  {
-  if (weight_idx != 0)
+  record->subtype = subtype;
+  record->pos = r->pos;
+  if (!read_uint (r, &record->size) || !read_uint (r, &record->count))
+    return false;
+
+  /* Check that SIZE * COUNT + 1 doesn't overflow.  Adding 1
+     allows an extra byte for a null terminator, used by some
+     extension processing routines. */
+  if (record->size != 0
+      && xsum (1, xtimes (record->count, record->size)) >= UINT_MAX)
      {
-      struct variable *weight_var
-        = lookup_var_by_value_idx (r, var_by_value_idx, weight_idx);
-      if (var_is_numeric (weight_var))
-        dict_set_weight (dict, weight_var);
-      else
-        sys_error (r, _("Weighting variable must be numeric."));
+      sys_error (r, record->pos, "Record type 7 subtype %d too large.",
+                 subtype);
+      return false;
      }
+
+  return true;
  }
  
-/* Reads a document record, type 6, from system file R, and sets up
-   the documents and n_documents fields in the associated
-   dictionary. */
-static void
-read_documents (struct sfm_reader *r, struct dictionary *dict)
+/* Reads an extension record from R into RECORD. */
+static bool
+read_extension_record (struct sfm_reader *r, int subtype,
+                       struct sfm_extension_record **recordp)
  {
-  int line_cnt;
-  char *documents;
+  struct extension_record_type
+    {
+      int subtype;
+      int size;
+      int count;
+    };
  
-  if (dict_get_documents (dict) != NULL)
-    sys_error (r, _("Multiple type 6 (document) records."));
+  static const struct extension_record_type types[] =
+    {
+      /* Implemented record types. */
+      { EXT_INTEGER,      4, 8 },
+      { EXT_FLOAT,        8, 3 },
+      { EXT_MRSETS,       1, 0 },
+      { EXT_PRODUCT_INFO, 1, 0 },
+      { EXT_DISPLAY,      4, 0 },
+      { EXT_LONG_NAMES,   1, 0 },
+      { EXT_LONG_STRINGS, 1, 0 },
+      { EXT_NCASES,       8, 2 },
+      { EXT_FILE_ATTRS,   1, 0 },
+      { EXT_VAR_ATTRS,    1, 0 },
+      { EXT_MRSETS2,      1, 0 },
+      { EXT_ENCODING,     1, 0 },
+      { EXT_LONG_LABELS,  1, 0 },
+      { EXT_LONG_MISSING, 1, 0 },
+
+      /* Ignored record types. */
+      { EXT_VAR_SETS,     0, 0 },
+      { EXT_DATE,         0, 0 },
+      { EXT_DATA_ENTRY,   0, 0 },
+      { EXT_DATAVIEW,     0, 0 },
+    };
  
-  line_cnt = read_int32 (r);
-  if (line_cnt <= 0)
-    sys_error (r, _("Number of document lines (%d) "
-                    "must be greater than 0."), line_cnt);
+  const struct extension_record_type *type;
+  struct sfm_extension_record *record;
+  size_t n_bytes;
  
-  documents = pool_nmalloc (r->pool, line_cnt + 1, DOC_LINE_LENGTH);
-  read_string (r, documents, DOC_LINE_LENGTH * line_cnt + 1);
-  if (strlen (documents) == DOC_LINE_LENGTH * line_cnt)
-    dict_set_documents (dict, documents);
-  else
-    sys_error (r, _("Document line contains null byte."));
-  pool_free (r->pool, documents);
+  *recordp = NULL;
+  record = pool_malloc (r->pool, sizeof *record);
+  if (!read_extension_record_header (r, subtype, record))
+    return false;
+  n_bytes = record->count * record->size;
+
+  for (type = types; type < &types[sizeof types / sizeof *types]; type++)
+    if (subtype == type->subtype)
+      {
+        if (type->size > 0 && record->size != type->size)
+          sys_warn (r, record->pos,
+                    _("Record type 7, subtype %d has bad size %u "
+                      "(expected %d)."), subtype, record->size, type->size);
+        else if (type->count > 0 && record->count != type->count)
+          sys_warn (r, record->pos,
+                    _("Record type 7, subtype %d has bad count %u "
+                      "(expected %d)."), subtype, record->count, type->count);
+        else if (type->count == 0 && type->size == 0)
+          {
+            /* Ignore this record. */
+          }
+        else
+          {
+            char *data = pool_malloc (r->pool, n_bytes + 1);
+            data[n_bytes] = '\0';
+
+            record->data = data;
+            if (!read_bytes (r, record->data, n_bytes))
+              return false;
+            *recordp = record;
+            return true;
+          }
+
+        goto skip;
+      }
+
+  sys_warn (r, record->pos,
+            _("Unrecognized record type 7, subtype %d.  For help, please "
+              "send this file to %s and mention that you were using %s."),
+            subtype, PACKAGE_BUGREPORT, PACKAGE_STRING);
+
+skip:
+  return skip_bytes (r, n_bytes);
+}
+
+static bool
+skip_extension_record (struct sfm_reader *r, int subtype)
+{
+  struct sfm_extension_record record;
+
+  return (read_extension_record_header (r, subtype, &record)
+          && skip_bytes (r, record.count * record.size));
  }
  
-/* Read a type 7 extension record. */
  static void
-read_extension_record (struct sfm_reader *r, struct dictionary *dict)
+parse_header (struct sfm_reader *r, const struct sfm_header_record *header,
+              struct any_read_info *info, struct dictionary *dict)
  {
-  int subtype = read_int32 (r);
-  size_t size = read_int32 (r);
-  size_t count = read_int32 (r);
-  size_t bytes = size * count;
+  const char *dict_encoding = dict_get_encoding (dict);
+  struct substring product;
+  struct substring label;
+  char *fixed_label;
+
+  /* Convert file label to UTF-8 and put it into DICT. */
+  label = recode_substring_pool ("UTF-8", dict_encoding,
+                                 ss_cstr (header->file_label), r->pool);
+  ss_trim (&label, ss_cstr (" "));
+  label.string[label.length] = '\0';
+  fixed_label = fix_line_ends (label.string);
+  dict_set_label (dict, fixed_label);
+  free (fixed_label);
+
+  /* Put creation date and time in UTF-8 into INFO. */
+  info->creation_date = recode_string ("UTF-8", dict_encoding,
+                                       header->creation_date, -1);
+  info->creation_time = recode_string ("UTF-8", dict_encoding,
+                                       header->creation_time, -1);
+
+  /* Put product name into INFO, dropping eye-catcher string if present. */
+  product = recode_substring_pool ("UTF-8", dict_encoding,
+                                   ss_cstr (header->eye_catcher), r->pool);
+  ss_match_string (&product, ss_cstr ("@(#) SPSS DATA FILE"));
+  ss_trim (&product, ss_cstr (" "));
+  info->product = ss_xstrdup (product);
+}
  
-  /* Check that SIZE * COUNT + 1 doesn't overflow.  Adding 1
-     allows an extra byte for a null terminator, used by some
-     extension processing routines. */
-  if (size != 0 && size_overflow_p (xsum (1, xtimes (count, size))))
-    sys_error (r, "Record type 7 subtype %d too large.", subtype);
+/* Reads a variable (type 2) record from R and adds the
+   corresponding variable to DICT.
+   Also skips past additional variable records for long string
+   variables. */
+static bool
+parse_variable_records (struct sfm_reader *r, struct dictionary *dict,
+                        struct sfm_var_record *var_recs, size_t n_var_recs)
+{
+  const char *dict_encoding = dict_get_encoding (dict);
+  struct sfm_var_record *rec;
+  int n_warnings = 0;
  
-  switch (subtype)
+  for (rec = var_recs; rec < &var_recs[n_var_recs]; )
      {
-    case 3:
-      read_machine_int32_info (r, size, count);
-      return;
+      struct variable *var;
+      size_t n_values;
+      char *name;
+      size_t i;
  
-    case 4:
-      read_machine_flt64_info (r, size, count);
-      return;
+      name = recode_string_pool ("UTF-8", dict_encoding,
+                                 rec->name, -1, r->pool);
+      name[strcspn (name, " ")] = '\0';
  
-    case 5:
-      /* Variable sets information.  We don't use these yet.
-         They only apply to GUIs; see VARSETS on the APPLY
-         DICTIONARY command in SPSS documentation. */
-      break;
+      if (!dict_id_is_valid (dict, name, false)
+          || name[0] == '$' || name[0] == '#')
+        {
+          sys_error (r, rec->pos, _("Invalid variable name `%s'."), name);
+          return false;
+        }
  
-    case 6:
-      /* DATE variable information.  We don't use it yet, but we
-         should. */
-      break;
+      if (rec->width < 0 || rec->width > 255)
+        {
+          sys_error (r, rec->pos,
+                     _("Bad width %d for variable %s."), rec->width, name);
+          return false;
+        }
  
-    case 7:
-      /* Unknown purpose. */
-      break;
+      var = rec->var = dict_create_var (dict, name, rec->width);
+      if (var == NULL)
+        {
+          char *new_name = dict_make_unique_var_name (dict, NULL, NULL);
+          sys_warn (r, rec->pos, _("Renaming variable with duplicate name "
+                                   "`%s' to `%s'."),
+                    name, new_name);
+          var = rec->var = dict_create_var_assert (dict, new_name, rec->width);
+          var_set_short_name (var, 0, new_name);
+          free (new_name);
+        }
  
-    case 11:
-      read_display_parameters (r, size, count, dict);
-      return;
+      /* Set the short name the same as the long name (even if we renamed
+         it). */
+      var_set_short_name (var, 0, var_get_name (var));
  
-    case 13:
-      read_long_var_name_map (r, size, count, dict);
-      return;
+      /* Get variable label, if any. */
+      if (rec->label)
+        {
+          char *utf8_label;
  
-    case 14:
-      read_long_string_map (r, size, count, dict);
-      return;
+          utf8_label = recode_string_pool ("UTF-8", dict_encoding,
+                                           rec->label, -1, r->pool);
+          var_set_label (var, utf8_label);
+        }
  
-    case 16:
-      /* New in SPSS v14?  Unknown purpose.  */
-      break;
+      /* Set missing values. */
+      if (rec->missing_value_code != 0)
+        {
+          int width = var_get_width (var);
+          struct missing_values mv;
  
-    case 17:
-      /* Text field that defines variable attributes.  New in
-         SPSS 14. */
-      break;
+          mv_init_pool (r->pool, &mv, width);
+          if (var_is_numeric (var))
+            {
+              bool has_range = rec->missing_value_code < 0;
+              int n_discrete = (has_range
+                                ? rec->missing_value_code == -3
+                                : rec->missing_value_code);
+              int ofs = 0;
  
-    default:
-      sys_warn (r, _("Unrecognized record type 7, subtype %d."), subtype);
-      break;
+              if (has_range)
+                {
+                  double low = parse_float (r, rec->missing, 0);
+                  double high = parse_float (r, rec->missing, 8);
+
+                  /* Deal with SPSS 21 change in representation. */
+                  if (low == SYSMIS)
+                    low = LOWEST;
+
+                  mv_add_range (&mv, low, high);
+                  ofs += 16;
+                }
+
+              for (i = 0; i < n_discrete; i++)
+                {
+                  mv_add_num (&mv, parse_float (r, rec->missing, ofs));
+                  ofs += 8;
+                }
+            }
+          else
+            for (i = 0; i < rec->missing_value_code; i++)
+              mv_add_str (&mv, rec->missing + 8 * i, MIN (width, 8));
+          var_set_missing_values (var, &mv);
+        }
+
+      /* Set formats. */
+      parse_format_spec (r, rec->pos + 12, rec->print_format,
+                         PRINT_FORMAT, var, &n_warnings);
+      parse_format_spec (r, rec->pos + 16, rec->write_format,
+                         WRITE_FORMAT, var, &n_warnings);
+
+      /* Account for values.
+         Skip long string continuation records, if any. */
+      n_values = rec->width == 0 ? 1 : DIV_RND_UP (rec->width, 8);
+      for (i = 1; i < n_values; i++)
+        if (i + (rec - var_recs) >= n_var_recs || rec[i].width != -1)
+          {
+            sys_error (r, rec->pos, _("Missing string continuation record."));
+            return false;
+          }
+      rec += n_values;
+    }
+
+  return true;
+}
+
+/* Translates the format spec from sysfile format to internal
+   format. */
+static void
+parse_format_spec (struct sfm_reader *r, off_t pos, unsigned int format,
+                   enum which_format which, struct variable *v,
+                   int *n_warnings)
+{
+  const int max_warnings = 8;
+  uint8_t raw_type = format >> 16;
+  uint8_t w = format >> 8;
+  uint8_t d = format;
+  struct fmt_spec f;
+  bool ok;
+
+  f.w = w;
+  f.d = d;
+
+  msg_disable ();
+  ok = (fmt_from_io (raw_type, &f.type)
+        && fmt_check_output (&f)
+        && fmt_check_width_compat (&f, var_get_width (v)));
+  msg_enable ();
+
+  if (ok)
+    {
+      if (which == PRINT_FORMAT)
+        var_set_print_format (v, &f);
+      else
+        var_set_write_format (v, &f);
+    }
+  else if (format == 0)
+    {
+      /* Actually observed in the wild.  No point in warning about it. */
      }
+  else if (++*n_warnings <= max_warnings)
+    {
+      if (which == PRINT_FORMAT)
+        sys_warn (r, pos, _("Variable %s with width %d has invalid print "
+                            "format 0x%x."),
+                  var_get_name (v), var_get_width (v), format);
+      else
+        sys_warn (r, pos, _("Variable %s with width %d has invalid write "
+                            "format 0x%x."),
+                  var_get_name (v), var_get_width (v), format);
  
-  skip_bytes (r, bytes);
+      if (*n_warnings == max_warnings)
+        sys_warn (r, -1, _("Suppressing further invalid format warnings."));
+    }
  }
  
-/* Read record type 7, subtype 3. */
  static void
-read_machine_int32_info (struct sfm_reader *r, size_t size, size_t count)
+parse_document (struct dictionary *dict, struct sfm_document_record *record)
  {
-  int version_major UNUSED = read_int32 (r);
-  int version_minor UNUSED = read_int32 (r);
-  int version_revision UNUSED = read_int32 (r);
-  int machine_code UNUSED = read_int32 (r);
-  int float_representation = read_int32 (r);
-  int compression_code UNUSED = read_int32 (r);
-  int integer_representation = read_int32 (r);
-  int character_code UNUSED = read_int32 (r);
+  const char *p;
  
-  int expected_float_format;
-  int expected_integer_format;
+  for (p = record->documents;
+       p < record->documents + DOC_LINE_LENGTH * record->n_lines;
+       p += DOC_LINE_LENGTH)
+    {
+      struct substring line;
+
+      line = recode_substring_pool ("UTF-8", dict_get_encoding (dict),
+                                    ss_buffer (p, DOC_LINE_LENGTH), NULL);
+      ss_rtrim (&line, ss_cstr (" "));
+      line.string[line.length] = '\0';
+
+      dict_add_document_line (dict, line.string, false);
+
+      ss_dealloc (&line);
+    }
+}
+
+/* Parses record type 7, subtype 3. */
+static bool
+parse_machine_integer_info (struct sfm_reader *r,
+                            const struct sfm_extension_record *record,
+                            struct any_read_info *info)
+{
+  int float_representation, expected_float_format;
+  int integer_representation, expected_integer_format;
  
-  if (size != 4 || count != 8)
-    sys_error (r, _("Bad size (%u) or count (%u) field on record type 7, "
-                    "subtype 3."),
-               (unsigned int) size, (unsigned int) count);
+  /* Save version info. */
+  info->version_major = parse_int (r, record->data, 0);
+  info->version_minor = parse_int (r, record->data, 4);
+  info->version_revision = parse_int (r, record->data, 8);
  
    /* Check floating point format. */
+  float_representation = parse_int (r, record->data, 16);
    if (r->float_format == FLOAT_IEEE_DOUBLE_BE
        || r->float_format == FLOAT_IEEE_DOUBLE_LE)
      expected_float_format = 1;
@@ -803,11 +1634,16 @@ read_machine_int32_info (struct sfm_reader *r, size_t size, size_t count)
    else
      NOT_REACHED ();
    if (float_representation != expected_float_format)
-    sys_error (r, _("Floating-point representation indicated by "
-                    "system file (%d) differs from expected (%d)."),
-               r->float_format, expected_float_format);
+    {
+      sys_error (r, record->pos,
+                 _("Floating-point representation indicated by "
+                   "system file (%d) differs from expected (%d)."),
+                 float_representation, expected_float_format);
+      return false;
+    }
  
    /* Check integer format. */
+  integer_representation = parse_int (r, record->data, 24);
    if (r->integer_format == INTEGER_MSB_FIRST)
      expected_integer_format = 1;
    else if (r->integer_format == INTEGER_LSB_FIRST)
@@ -815,462 +1651,1088 @@ read_machine_int32_info (struct sfm_reader *r, size_t size, size_t count)
    else
      NOT_REACHED ();
    if (integer_representation != expected_integer_format)
-    {
-      static const char *endian[] = {N_("little-endian"), N_("big-endian")};
-      sys_warn (r, _("Integer format indicated by system file (%s) "
-                     "differs from expected (%s)."),
-                gettext (endian[integer_representation == 1]),
-                gettext (endian[expected_integer_format == 1]));
-    }
+    sys_warn (r, record->pos,
+              _("Integer format indicated by system file (%d) "
+                "differs from expected (%d)."),
+              integer_representation, expected_integer_format);
+
+  return true;
  }
  
-/* Read record type 7, subtype 4. */
+/* Parses record type 7, subtype 4. */
  static void
-read_machine_flt64_info (struct sfm_reader *r, size_t size, size_t count)
+parse_machine_float_info (struct sfm_reader *r,
+                          const struct sfm_extension_record *record)
  {
-  double sysmis = read_flt64 (r);
-  double highest = read_flt64 (r);
-  double lowest = read_flt64 (r);
-
-  if (size != 8 || count != 3)
-    sys_error (r, _("Bad size (%u) or count (%u) on extension 4."),
-               (unsigned int) size, (unsigned int) count);
+  double sysmis = parse_float (r, record->data, 0);
+  double highest = parse_float (r, record->data, 8);
+  double lowest = parse_float (r, record->data, 16);
  
    if (sysmis != SYSMIS)
-    sys_warn (r, _("File specifies unexpected value %g as SYSMIS."), sysmis);
+    sys_warn (r, record->pos,
+              _("File specifies unexpected value %g (%a) as %s, "
+                "instead of %g (%a)."),
+              sysmis, sysmis, "SYSMIS", SYSMIS, SYSMIS);
+
    if (highest != HIGHEST)
-    sys_warn (r, _("File specifies unexpected value %g as HIGHEST."), highest);
-  if (lowest != LOWEST)
-    sys_warn (r, _("File specifies unexpected value %g as LOWEST."), lowest);
+    sys_warn (r, record->pos,
+              _("File specifies unexpected value %g (%a) as %s, "
+                "instead of %g (%a)."),
+              highest, highest, "HIGHEST", HIGHEST, HIGHEST);
+
+  /* SPSS before version 21 used a unique value just bigger than SYSMIS as
+     LOWEST.  SPSS 21 uses SYSMIS for LOWEST, which is OK because LOWEST only
+     appears in a context (missing values) where SYSMIS cannot. */
+  if (lowest != LOWEST && lowest != SYSMIS)
+    sys_warn (r, record->pos,
+              _("File specifies unexpected value %g (%a) as %s, "
+                "instead of %g (%a) or %g (%a)."),
+              lowest, lowest, "LOWEST", LOWEST, LOWEST, SYSMIS, SYSMIS);
  }
  
-/* Read record type 7, subtype 11, which specifies how variables
-   should be displayed in GUI environments. */
+/* Parses record type 7, subtype 10. */
  static void
-read_display_parameters (struct sfm_reader *r, size_t size, size_t count,
-                         struct dictionary *dict)
+parse_extra_product_info (struct sfm_reader *r,
+                          const struct sfm_extension_record *record,
+                          struct any_read_info *info)
  {
-  const size_t n_vars = count / 3 ;
-  bool warned = false;
-  int i;
+  struct text_record *text;
+
+  text = open_text_record (r, record, true);
+  info->product_ext = fix_line_ends (text_get_all (text));
+  close_text_record (r, text);
+}
  
-  if (count % 3 || n_vars != dict_get_var_cnt (dict))
-    sys_error (r, _("Bad size (%u) or count (%u) on extension 11."),
-               (unsigned int) size, (unsigned int) count);
+/* Parses record type 7, subtype 7 or 19. */
+static void
+parse_mrsets (struct sfm_reader *r, const struct sfm_extension_record *record,
+              size_t *allocated_mrsets)
+{
+  struct text_record *text;
  
-  for (i = 0; i < n_vars; ++i)
+  text = open_text_record (r, record, false);
+  for (;;)
      {
-      int measure = read_int32 (r);
-      int width = read_int32 (r);
-      int align = read_int32 (r);
-      struct variable *v = dict_get_var (dict, i);
+      struct sfm_mrset *mrset;
+      size_t allocated_vars;
+      char delimiter;
  
-      /* spss v14 sometimes seems to set string variables' measure to zero */
-      if ( 0 == measure && var_is_alpha (v) ) measure = 1;
+      /* Skip extra line feeds if present. */
+      while (text_match (text, '\n'))
+        continue;
  
+      if (r->n_mrsets >= *allocated_mrsets)
+        r->mrsets = pool_2nrealloc (r->pool, r->mrsets, allocated_mrsets,
+                                    sizeof *r->mrsets);
+      mrset = &r->mrsets[r->n_mrsets];
+      memset(mrset, 0, sizeof *mrset);
  
-      if (measure < 1 || measure > 3 || align < 0 || align > 2)
+      mrset->name = text_get_token (text, ss_cstr ("="), NULL);
+      if (mrset->name == NULL)
+        break;
+
+      if (text_match (text, 'C'))
          {
-          if (!warned)
-            sys_warn (r, _("Invalid variable display parameters.  "
-                           "Default parameters substituted."));
-          warned = true;
-          continue;
+          mrset->type = MRSET_MC;
+          if (!text_match (text, ' '))
+            {
+              sys_warn (r, record->pos,
+                        _("Missing space following `%c' at offset %zu "
+                          "in MRSETS record."), 'C', text_pos (text));
+              break;
+            }
+        }
+      else if (text_match (text, 'D'))
+        {
+          mrset->type = MRSET_MD;
+          mrset->cat_source = MRSET_VARLABELS;
+        }
+      else if (text_match (text, 'E'))
+        {
+          char *number;
+
+          mrset->type = MRSET_MD;
+          mrset->cat_source = MRSET_COUNTEDVALUES;
+          if (!text_match (text, ' '))
+            {
+              sys_warn (r, record->pos,
+                        _("Missing space following `%c' at offset %zu "
+                          "in MRSETS record."), 'E',  text_pos (text));
+              break;
+            }
+
+          number = text_get_token (text, ss_cstr (" "), NULL);
+          if (!strcmp (number, "11"))
+            mrset->label_from_var_label = true;
+          else if (strcmp (number, "1"))
+            sys_warn (r, record->pos,
+                      _("Unexpected label source value following `E' "
+                        "at offset %zu in MRSETS record."),
+                      text_pos (text));
+        }
+      else
+        {
+          sys_warn (r, record->pos,
+                    _("Missing `C', `D', or `E' at offset %zu "
+                      "in MRSETS record."),
+                    text_pos (text));
+          break;
          }
  
-      var_set_measure (v, (measure == 1 ? MEASURE_NOMINAL
-                           : measure == 2 ? MEASURE_ORDINAL
-                           : MEASURE_SCALE));
-      var_set_display_width (v, width);
-      var_set_alignment (v, (align == 0 ? ALIGN_LEFT
-                             : align == 1 ? ALIGN_RIGHT
-                             : ALIGN_CENTRE));
+      if (mrset->type == MRSET_MD)
+        {
+          mrset->counted = text_parse_counted_string (r, text);
+          if (mrset->counted == NULL)
+            break;
+        }
+
+      mrset->label = text_parse_counted_string (r, text);
+      if (mrset->label == NULL)
+        break;
+
+      allocated_vars = 0;
+      do
+        {
+          const char *var;
+
+          var = text_get_token (text, ss_cstr (" \n"), &delimiter);
+          if (var == NULL)
+            {
+              if (delimiter != '\n')
+                sys_warn (r, record->pos,
+                          _("Missing new-line parsing variable names "
+                            "at offset %zu in MRSETS record."),
+                          text_pos (text));
+              break;
+            }
+
+          if (mrset->n_vars >= allocated_vars)
+            mrset->vars = pool_2nrealloc (r->pool, mrset->vars,
+                                          &allocated_vars,
+                                          sizeof *mrset->vars);
+          mrset->vars[mrset->n_vars++] = var;
+        }
+      while (delimiter != '\n');
+
+      r->n_mrsets++;
      }
+  close_text_record (r, text);
  }
  
-/* Reads record type 7, subtype 13, which gives the long name
-   that corresponds to each short name.  Modifies variable names
-   in DICT accordingly.  */
  static void
-read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count,
-                        struct dictionary *dict)
+decode_mrsets (struct sfm_reader *r, struct dictionary *dict)
  {
-  struct variable_to_value_map *map;
-  struct variable *var;
-  char *long_name;
-  int warning_cnt = 0;
+  const struct sfm_mrset *s;
  
-  map = open_variable_to_value_map (r, size * count);
-  while (read_variable_to_value_map (r, dict, map, &var, &long_name,
-                                     &warning_cnt))
+  for (s = r->mrsets; s < &r->mrsets[r->n_mrsets]; s++)
      {
-      char **short_names;
-      size_t short_name_cnt;
+      struct stringi_set var_names;
+      struct mrset *mrset;
+      char *name;
+      int width;
        size_t i;
  
-      /* Validate long name. */
-      if (!var_is_valid_name (long_name, false))
+      name = recode_string ("UTF-8", r->encoding, s->name, -1);
+      if (!mrset_is_valid_name (name, dict_get_encoding (dict), false))
          {
-          sys_warn (r, _("Long variable mapping from %s to invalid "
-                         "variable name `%s'."),
-                    var_get_name (var), long_name);
+          sys_warn (r, -1, _("Invalid multiple response set name `%s'."),
+                    name);
+          free (name);
            continue;
          }
  
-      /* Identify any duplicates. */
-      if (strcasecmp (var_get_short_name (var, 0), long_name)
-          && dict_lookup_var (dict, long_name) != NULL)
+      mrset = xzalloc (sizeof *mrset);
+      mrset->name = name;
+      mrset->type = s->type;
+      mrset->cat_source = s->cat_source;
+      mrset->label_from_var_label = s->label_from_var_label;
+      if (s->label[0] != '\0')
+        mrset->label = recode_string ("UTF-8", r->encoding, s->label, -1);
+
+      stringi_set_init (&var_names);
+      mrset->vars = xmalloc (s->n_vars * sizeof *mrset->vars);
+      width = INT_MAX;
+      for (i = 0; i < s->n_vars; i++)
          {
-          sys_warn (r, _("Duplicate long variable name `%s' "
-                         "within system file."), long_name);
-          continue;
-        }
+          struct variable *var;
+          char *var_name;
  
-      /* Renaming a variable may clear its short names, but we
-         want to retain them, so we save them and re-set them
-         afterward. */
-      short_name_cnt = var_get_short_name_cnt (var);
-      short_names = xnmalloc (short_name_cnt, sizeof *short_names);
-      for (i = 0; i < short_name_cnt; i++) 
+          var_name = recode_string ("UTF-8", r->encoding, s->vars[i], -1);
+
+          var = dict_lookup_var (dict, var_name);
+          if (var == NULL)
+            {
+              free (var_name);
+              continue;
+            }
+          if (!stringi_set_insert (&var_names, var_name))
+            {
+              sys_warn (r, -1,
+                        _("MRSET %s contains duplicate variable name %s."),
+                        mrset->name, var_name);
+              free (var_name);
+              continue;
+            }
+          free (var_name);
+
+          if (mrset->label == NULL && mrset->label_from_var_label
+              && var_has_label (var))
+            mrset->label = xstrdup (var_get_label (var));
+
+          if (mrset->n_vars
+              && var_get_type (var) != var_get_type (mrset->vars[0]))
+            {
+              sys_warn (r, -1,
+                        _("MRSET %s contains both string and "
+                          "numeric variables."), mrset->name);
+              continue;
+            }
+          width = MIN (width, var_get_width (var));
+
+          mrset->vars[mrset->n_vars++] = var;
+        }
+
+      if (mrset->n_vars < 2)
+        {
+          if (mrset->n_vars == 0)
+            sys_warn (r, -1, _("MRSET %s has no variables."), mrset->name);
+          else
+            sys_warn (r, -1, _("MRSET %s has only one variable."),
+                      mrset->name);
+          mrset_destroy (mrset);
+         stringi_set_destroy (&var_names);
+          continue;
+        }
+
+      if (mrset->type == MRSET_MD)
+        {
+          mrset->width = width;
+          value_init (&mrset->counted, width);
+          if (width == 0)
+            mrset->counted.f = c_strtod (s->counted, NULL);
+          else
+            value_copy_str_rpad (&mrset->counted, width,
+                                 (const uint8_t *) s->counted, ' ');
+        }
+
+      dict_add_mrset (dict, mrset);
+      stringi_set_destroy (&var_names);
+    }
+}
+
+/* Read record type 7, subtype 11, which specifies how variables
+   should be displayed in GUI environments. */
+static void
+parse_display_parameters (struct sfm_reader *r,
+                         const struct sfm_extension_record *record,
+                         struct dictionary *dict)
+{
+  bool includes_width;
+  bool warned = false;
+  size_t n_vars;
+  size_t ofs;
+  size_t i;
+
+  n_vars = dict_get_var_cnt (dict);
+  if (record->count == 3 * n_vars)
+    includes_width = true;
+  else if (record->count == 2 * n_vars)
+    includes_width = false;
+  else
+    {
+      sys_warn (r, record->pos,
+                _("Extension 11 has bad count %u (for %zu variables)."),
+                record->count, n_vars);
+      return;
+    }
+
+  ofs = 0;
+  for (i = 0; i < n_vars; ++i)
+    {
+      struct variable *v = dict_get_var (dict, i);
+      int measure, width, align;
+
+      measure = parse_int (r, record->data, ofs);
+      ofs += 4;
+
+      if (includes_width)
+        {
+          width = parse_int (r, record->data, ofs);
+          ofs += 4;
+        }
+      else
+        width = 0;
+
+      align = parse_int (r, record->data, ofs);
+      ofs += 4;
+
+      /* SPSS sometimes seems to set variables' measure to zero. */
+      if (0 == measure)
+        measure = 1;
+
+      if (measure < 1 || measure > 3 || align < 0 || align > 2)
          {
-          const char *s = var_get_short_name (var, i);
-          short_names[i] = s != NULL ? xstrdup (s) : NULL;
+          if (!warned)
+            sys_warn (r, record->pos,
+                      _("Invalid variable display parameters for variable "
+                        "%zu (%s).  Default parameters substituted."),
+                      i, var_get_name (v));
+          warned = true;
+          continue;
          }
  
-      /* Set long name. */
-      dict_rename_var (dict, var, long_name);
+      var_set_measure (v, (measure == 1 ? MEASURE_NOMINAL
+                           : measure == 2 ? MEASURE_ORDINAL
+                           : MEASURE_SCALE));
+      var_set_alignment (v, (align == 0 ? ALIGN_LEFT
+                             : align == 1 ? ALIGN_RIGHT
+                             : ALIGN_CENTRE));
+
+      /* Older versions (SPSS 9.0) sometimes set the display
+        width to zero.  This causes confusion in the GUI, so
+        only set the width if it is nonzero. */
+      if (width > 0)
+        var_set_display_width (v, width);
+    }
+}
+
+static void
+rename_var_and_save_short_names (struct sfm_reader *r, off_t pos,
+                                 struct dictionary *dict,
+                                 struct variable *var, const char *new_name)
+{
+  size_t n_short_names;
+  char **short_names;
+  size_t i;
  
-      /* Restore short names. */
-      for (i = 0; i < short_name_cnt; i++) 
+  /* Renaming a variable may clear its short names, but we
+     want to retain them, so we save them and re-set them
+     afterward. */
+  n_short_names = var_get_short_name_cnt (var);
+  short_names = xnmalloc (n_short_names, sizeof *short_names);
+  for (i = 0; i < n_short_names; i++)
+    {
+      const char *s = var_get_short_name (var, i);
+      short_names[i] = s != NULL ? xstrdup (s) : NULL;
+    }
+
+  /* Set long name. */
+  if (!dict_try_rename_var (dict, var, new_name))
+    sys_warn (r, pos, _("Duplicate long variable name `%s'."), new_name);
+
+  /* Restore short names. */
+  for (i = 0; i < n_short_names; i++)
+    {
+      var_set_short_name (var, i, short_names[i]);
+      free (short_names[i]);
+    }
+  free (short_names);
+}
+
+/* Parses record type 7, subtype 13, which gives the long name that corresponds
+   to each short name.  Modifies variable names in DICT accordingly.  */
+static void
+parse_long_var_name_map (struct sfm_reader *r,
+                         const struct sfm_extension_record *record,
+                         struct dictionary *dict)
+{
+  struct text_record *text;
+  struct variable *var;
+  char *long_name;
+
+  if (record == NULL)
+    {
+      /* There are no long variable names.  Use the short variable names,
+         converted to lowercase, as the long variable names. */
+      size_t i;
+
+      for (i = 0; i < dict_get_var_cnt (dict); i++)
+       {
+         struct variable *var = dict_get_var (dict, i);
+          char *new_name;
+
+          new_name = utf8_to_lower (var_get_name (var));
+          rename_var_and_save_short_names (r, -1, dict, var, new_name);
+          free (new_name);
+       }
+
+      return;
+    }
+
+  /* Rename each of the variables, one by one.  (In a correctly constructed
+     system file, this cannot create any intermediate duplicate variable names,
+     because all of the new variable names are longer than any of the old
+     variable names and thus there cannot be any overlaps.) */
+  text = open_text_record (r, record, true);
+  while (read_variable_to_value_pair (r, dict, text, &var, &long_name))
+    {
+      /* Validate long name. */
+      if (!dict_id_is_valid (dict, long_name, false)
+          || long_name[0] == '$' || long_name[0] == '#')
          {
-          var_set_short_name (var, i, short_names[i]);
-          free (short_names[i]);
+          sys_warn (r, record->pos,
+                    _("Long variable mapping from %s to invalid "
+                      "variable name `%s'."),
+                    var_get_name (var), long_name);
+          continue;
          }
+
+      rename_var_and_save_short_names (r, record->pos, dict, var, long_name);
      }
-  close_variable_to_value_map (r, map);
-  r->has_long_var_names = true;
+  close_text_record (r, text);
  }
  
  /* Reads record type 7, subtype 14, which gives the real length
     of each very long string.  Rearranges DICT accordingly. */
-static void
-read_long_string_map (struct sfm_reader *r, size_t size, size_t count,
-                      struct dictionary *dict)
+static bool
+parse_long_string_map (struct sfm_reader *r,
+                       const struct sfm_extension_record *record,
+                       struct dictionary *dict)
  {
-  struct variable_to_value_map *map;
+  struct text_record *text;
    struct variable *var;
    char *length_s;
-  int warning_cnt = 0;
-
-  r->has_vls = true;
  
-  map = open_variable_to_value_map (r, size * count);
-  while (read_variable_to_value_map (r, dict, map, &var, &length_s,
-                                     &warning_cnt))
+  text = open_text_record (r, record, true);
+  while (read_variable_to_value_pair (r, dict, text, &var, &length_s))
      {
-      long length, remaining_length;
-      size_t idx;
+      size_t idx = var_get_dict_index (var);
+      long int length;
+      int segment_cnt;
+      int i;
  
        /* Get length. */
        length = strtol (length_s, NULL, 10);
-      if (length < MIN_VERY_LONG_STRING || length == LONG_MAX)
+      if (length < 1 || length > MAX_STRING)
          {
-          sys_warn (r, _("%s listed as string of length %s "
-                         "in length table."),
+          sys_warn (r, record->pos,
+                    _("%s listed as string of invalid length %s "
+                      "in very long string record."),
                      var_get_name (var), length_s);
            continue;
          }
  
-      /* Group multiple variables into single variable
-         and delete all but the first. */
-      remaining_length = length;
-      for (idx = var_get_dict_index (var); remaining_length > 0; idx++)
-        if (idx < dict_get_var_cnt (dict))
-          remaining_length -= MIN (var_get_width (dict_get_var (dict, idx)),
-                                   EFFECTIVE_LONG_STRING_LENGTH);
-        else
-          sys_error (r, _("Very long string %s overflows dictionary."),
+      /* Check segments. */
+      segment_cnt = sfm_width_to_segments (length);
+      if (segment_cnt == 1)
+        {
+          sys_warn (r, record->pos,
+                    _("%s listed in very long string record with width %s, "
+                      "which requires only one segment."),
+                    var_get_name (var), length_s);
+          continue;
+        }
+      if (idx + segment_cnt > dict_get_var_cnt (dict))
+        {
+          sys_error (r, record->pos,
+                     _("Very long string %s overflows dictionary."),
                       var_get_name (var));
-      dict_delete_consecutive_vars (dict,
-                                    var_get_dict_index (var) + 1,
-                                    idx - var_get_dict_index (var) - 1);
+          return false;
+        }
  
-      /* Assign all the length to the first variable. */
+      /* Get the short names from the segments and check their
+         lengths. */
+      for (i = 0; i < segment_cnt; i++)
+        {
+          struct variable *seg = dict_get_var (dict, idx + i);
+          int alloc_width = sfm_segment_alloc_width (length, i);
+          int width = var_get_width (seg);
+
+          if (i > 0)
+            var_set_short_name (var, i, var_get_short_name (seg, 0));
+          if (ROUND_UP (width, 8) != ROUND_UP (alloc_width, 8))
+            {
+              sys_error (r, record->pos,
+                         _("Very long string with width %ld has segment %d "
+                           "of width %d (expected %d)."),
+                         length, i, width, alloc_width);
+              return false;
+            }
+        }
+      dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1);
        var_set_width (var, length);
      }
-  close_variable_to_value_map (r, map);
+  close_text_record (r, text);
    dict_compact_values (dict);
+
+  return true;
  }
  
-/* Reads value labels from sysfile H and inserts them into the
-   associated dictionary. */
-static void
-read_value_labels (struct sfm_reader *r,
-                   struct dictionary *dict, struct variable **var_by_value_idx)
+static bool
+parse_value_labels (struct sfm_reader *r, struct dictionary *dict,
+                    const struct sfm_var_record *var_recs, size_t n_var_recs,
+                    const struct sfm_value_label_record *record)
  {
-  struct pool *subpool;
+  struct variable **vars;
+  char **utf8_labels;
+  size_t i;
+
+  utf8_labels = pool_nmalloc (r->pool, record->n_labels, sizeof *utf8_labels);
+  for (i = 0; i < record->n_labels; i++)
+    utf8_labels[i] = recode_string_pool ("UTF-8", dict_get_encoding (dict),
+                                         record->labels[i].label, -1,
+                                         r->pool);
  
-  struct label
+  vars = pool_nmalloc (r->pool, record->n_vars, sizeof *vars);
+  for (i = 0; i < record->n_vars; i++)
      {
-      char raw_value[8];        /* Value as uninterpreted bytes. */
-      union value value;        /* Value. */
-      char *label;              /* Null-terminated label string. */
-    };
+      vars[i] = lookup_var_by_index (r, record->pos,
+                                     var_recs, n_var_recs, record->vars[i]);
+      if (vars[i] == NULL)
+        return false;
+    }
  
-  struct label *labels = NULL;
-  int label_cnt;               /* Number of labels. */
+  for (i = 1; i < record->n_vars; i++)
+    if (var_get_type (vars[i]) != var_get_type (vars[0]))
+      {
+        sys_error (r, record->pos,
+                   _("Variables associated with value label are not all of "
+                     "identical type.  Variable %s is %s, but variable "
+                     "%s is %s."),
+                   var_get_name (vars[0]),
+                   var_is_numeric (vars[0]) ? _("numeric") : _("string"),
+                   var_get_name (vars[i]),
+                   var_is_numeric (vars[i]) ? _("numeric") : _("string"));
+        return false;
+      }
  
-  struct variable **var = NULL;        /* Associated variables. */
-  int var_cnt;                 /* Number of associated variables. */
+  for (i = 0; i < record->n_vars; i++)
+    {
+      struct variable *var = vars[i];
+      int width;
+      size_t j;
  
-  int i;
+      width = var_get_width (var);
+      if (width > 8)
+        {
+          sys_error (r, record->pos,
+                     _("Value labels may not be added to long string "
+                       "variables (e.g. %s) using records types 3 and 4."),
+                     var_get_name (var));
+          return false;
+        }
  
-  subpool = pool_create_subpool (r->pool);
+      for (j = 0; j < record->n_labels; j++)
+        {
+          struct sfm_value_label *label = &record->labels[j];
+          union value value;
  
-  /* Read the type 3 record and record its contents.  We can't do
-     much with the data yet because we don't know whether it is
-     of numeric or string type. */
+          value_init (&value, width);
+          if (width == 0)
+            value.f = parse_float (r, label->value, 0);
+          else
+            memcpy (value_str_rw (&value, width), label->value, width);
  
-  /* Read number of labels. */
-  label_cnt = read_int32 (r);
+          if (!var_add_value_label (var, &value, utf8_labels[j]))
+            {
+              if (r->written_by_readstat)
+                {
+                  /* Ignore the problem.  ReadStat is buggy and emits value
+                     labels whose values are longer than string variables'
+                     widths, that are identical in the actual width of the
+                     variable, e.g. both values "ABC123" and "ABC456" for a
+                     string variable with width 3. */
+                }
+              else if (var_is_numeric (var))
+                sys_warn (r, record->pos,
+                          _("Duplicate value label for %g on %s."),
+                          value.f, var_get_name (var));
+              else
+                sys_warn (r, record->pos,
+                          _("Duplicate value label for `%.*s' on %s."),
+                          width, value_str (&value, width),
+                          var_get_name (var));
+            }
+
+          value_destroy (&value, width);
+        }
+    }
+
+  pool_free (r->pool, vars);
+  for (i = 0; i < record->n_labels; i++)
+    pool_free (r->pool, utf8_labels[i]);
+  pool_free (r->pool, utf8_labels);
+
+  return true;
+}
  
-  if (label_cnt >= INT32_MAX / sizeof *labels)
+static struct variable *
+lookup_var_by_index (struct sfm_reader *r, off_t offset,
+                     const struct sfm_var_record *var_recs, size_t n_var_recs,
+                     int idx)
+{
+  const struct sfm_var_record *rec;
+
+  if (idx < 1 || idx > n_var_recs)
      {
-      sys_warn (r, _("Invalid number of labels: %d.  Ignoring labels."),
-                label_cnt);
-      label_cnt = 0;
+      sys_error (r, offset,
+                 _("Variable index %d not in valid range 1...%zu."),
+                 idx, n_var_recs);
+      return NULL;
      }
  
-  /* Read each value/label tuple into labels[]. */
-  labels = pool_nalloc (subpool, label_cnt, sizeof *labels);
-  for (i = 0; i < label_cnt; i++)
+  rec = &var_recs[idx - 1];
+  if (rec->var == NULL)
      {
-      struct label *label = labels + i;
-      unsigned char label_len;
-      size_t padded_len;
+      sys_error (r, offset,
+                 _("Variable index %d refers to long string continuation."),
+                 idx);
+      return NULL;
+    }
  
-      /* Read value. */
-      read_bytes (r, label->raw_value, sizeof label->raw_value);
+  return rec->var;
+}
  
-      /* Read label length. */
-      read_bytes (r, &label_len, sizeof label_len);
-      padded_len = ROUND_UP (label_len + 1, 8);
+/* Parses a set of custom attributes from TEXT into ATTRS.
+   ATTRS may be a null pointer, in which case the attributes are
+   read but discarded. */
+static void
+parse_attributes (struct sfm_reader *r, struct text_record *text,
+                  struct attrset *attrs)
+{
+  do
+    {
+      struct attribute *attr;
+      char *key;
+      int index;
  
-      /* Read label, padding. */
-      label->label = pool_alloc (subpool, padded_len + 1);
-      read_bytes (r, label->label, padded_len - 1);
-      label->label[label_len] = 0;
-    }
+      /* Parse the key. */
+      key = text_get_token (text, ss_cstr ("("), NULL);
+      if (key == NULL)
+        return;
  
-  /* Now, read the type 4 record that has the list of variables
-     to which the value labels are to be applied. */
+      attr = attribute_create (key);
+      for (index = 1; ; index++)
+        {
+          /* Parse the value. */
+          char *value;
+          size_t length;
  
-  /* Read record type of type 4 record. */
-  if (read_int32 (r) != 4)
-    sys_error (r, _("Variable index record (type 4) does not immediately "
-                    "follow value label record (type 3) as it should."));
+          value = text_get_token (text, ss_cstr ("\n"), NULL);
+          if (value == NULL)
+            {
+              text_warn (r, text, _("Error parsing attribute value %s[%d]."),
+                         key, index);
+              break;
+            }
  
-  /* Read number of variables associated with value label from type 4
-     record. */
-  var_cnt = read_int32 (r);
-  if (var_cnt < 1 || var_cnt > dict_get_var_cnt (dict))
-    sys_error (r, _("Number of variables associated with a value label (%d) "
-                    "is not between 1 and the number of variables (%u)."),
-               var_cnt, (unsigned int) dict_get_var_cnt (dict));
-
-  /* Read the list of variables. */
-  var = pool_nalloc (subpool, var_cnt, sizeof *var);
-  for (i = 0; i < var_cnt; i++)
-    {
-      var[i] = lookup_var_by_value_idx (r, var_by_value_idx, read_int32 (r));
-      if (var_is_long_string (var[i]))
-        sys_error (r, _("Value labels are not allowed on long string "
-                        "variables (%s)."), var_get_name (var[i]));
-    }
-
-  /* Type check the variables. */
-  for (i = 1; i < var_cnt; i++)
-    if (var_get_type (var[i]) != var_get_type (var[0]))
-      sys_error (r, _("Variables associated with value label are not all of "
-                      "identical type.  Variable %s is %s, but variable "
-                      "%s is %s."),
-                 var_get_name (var[0]),
-                 var_is_numeric (var[0]) ? _("numeric") : _("string"),
-                 var_get_name (var[i]),
-                 var_is_numeric (var[i]) ? _("numeric") : _("string"));
-
-  /* Fill in labels[].value, now that we know the desired type. */
-  for (i = 0; i < label_cnt; i++)
-    {
-      struct label *label = labels + i;
-
-      if (var_is_alpha (var[0]))
-        buf_copy_rpad (label->value.s, sizeof label->value.s,
-                       label->raw_value, sizeof label->raw_value);
+          length = strlen (value);
+          if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'')
+            {
+              value[length - 1] = '\0';
+              attribute_add_value (attr, value + 1);
+            }
+          else
+            {
+              text_warn (r, text,
+                         _("Attribute value %s[%d] is not quoted: %s."),
+                         key, index, value);
+              attribute_add_value (attr, value);
+            }
+
+          /* Was this the last value for this attribute? */
+          if (text_match (text, ')'))
+            break;
+        }
+      if (attrs != NULL)
+        {
+          if (!attrset_try_add (attrs, attr))
+            {
+              text_warn (r, text, _("Duplicate attribute %s."),
+                         attribute_get_name (attr));
+              attribute_destroy (attr);
+            }
+        }
        else
-        label->value.f = flt64_to_double (r, (uint8_t *) label->raw_value);
+        attribute_destroy (attr);
+    }
+  while (!text_match (text, '/'));
+}
+
+/* Reads record type 7, subtype 17, which lists custom
+   attributes on the data file.  */
+static void
+parse_data_file_attributes (struct sfm_reader *r,
+                            const struct sfm_extension_record *record,
+                            struct dictionary *dict)
+{
+  struct text_record *text = open_text_record (r, record, true);
+  parse_attributes (r, text, dict_get_attributes (dict));
+  close_text_record (r, text);
+}
+
+/* Parses record type 7, subtype 18, which lists custom
+   attributes on individual variables.  */
+static void
+parse_variable_attributes (struct sfm_reader *r,
+                           const struct sfm_extension_record *record,
+                           struct dictionary *dict)
+{
+  struct text_record *text;
+  struct variable *var;
+
+  text = open_text_record (r, record, true);
+  while (text_read_variable_name (r, dict, text, ss_cstr (":"), &var))
+    parse_attributes (r, text, var != NULL ? var_get_attributes (var) : NULL);
+  close_text_record (r, text);
+}
+
+static void
+assign_variable_roles (struct sfm_reader *r, struct dictionary *dict)
+{
+  size_t n_warnings = 0;
+  size_t i;
+
+  for (i = 0; i < dict_get_var_cnt (dict); i++)
+    {
+      struct variable *var = dict_get_var (dict, i);
+      struct attrset *attrs = var_get_attributes (var);
+      const struct attribute *attr = attrset_lookup (attrs, "$@Role");
+      if (attr != NULL)
+        {
+          int value = atoi (attribute_get_value (attr, 0));
+          enum var_role role;
+
+          switch (value)
+            {
+            case 0:
+              role = ROLE_INPUT;
+              break;
+
+            case 1:
+              role = ROLE_TARGET;
+              break;
+
+            case 2:
+              role = ROLE_BOTH;
+              break;
+
+            case 3:
+              role = ROLE_NONE;
+              break;
+
+            case 4:
+              role = ROLE_PARTITION;
+              break;
+
+            case 5:
+              role = ROLE_SPLIT;
+              break;
+
+            default:
+              role = ROLE_INPUT;
+              if (n_warnings++ == 0)
+                sys_warn (r, -1, _("Invalid role for variable %s."),
+                          var_get_name (var));
+            }
+
+          var_set_role (var, role);
+        }
      }
  
-  /* Assign the `value_label's to each variable. */
-  for (i = 0; i < var_cnt; i++)
+  if (n_warnings > 1)
+    sys_warn (r, -1, _("%zu other variables had invalid roles."),
+              n_warnings - 1);
+}
+
+static bool
+check_overflow (struct sfm_reader *r,
+                const struct sfm_extension_record *record,
+                size_t ofs, size_t length)
+{
+  size_t end = record->size * record->count;
+  if (length >= end || ofs + length > end)
      {
-      struct variable *v = var[i];
-      int j;
+      sys_warn (r, record->pos + end,
+                _("Extension record subtype %d ends unexpectedly."),
+                record->subtype);
+      return false;
+    }
+  return true;
+}
+
+static void
+parse_long_string_value_labels (struct sfm_reader *r,
+                                const struct sfm_extension_record *record,
+                                struct dictionary *dict)
+{
+  const char *dict_encoding = dict_get_encoding (dict);
+  size_t end = record->size * record->count;
+  size_t ofs = 0;
  
-      /* Add each label to the variable. */
-      for (j = 0; j < label_cnt; j++)
+  while (ofs < end)
+    {
+      char *var_name;
+      size_t n_labels, i;
+      struct variable *var;
+      union value value;
+      int var_name_len;
+      int width;
+
+      /* Parse variable name length. */
+      if (!check_overflow (r, record, ofs, 4))
+        return;
+      var_name_len = parse_int (r, record->data, ofs);
+      ofs += 4;
+
+      /* Parse variable name, width, and number of labels. */
+      if (!check_overflow (r, record, ofs, var_name_len)
+          || !check_overflow (r, record, ofs, var_name_len + 8))
+        return;
+      var_name = recode_string_pool ("UTF-8", dict_encoding,
+                                     (const char *) record->data + ofs,
+                                     var_name_len, r->pool);
+      width = parse_int (r, record->data, ofs + var_name_len);
+      n_labels = parse_int (r, record->data, ofs + var_name_len + 4);
+      ofs += var_name_len + 8;
+
+      /* Look up 'var' and validate. */
+      var = dict_lookup_var (dict, var_name);
+      if (var == NULL)
+        sys_warn (r, record->pos + ofs,
+                  _("Ignoring long string value label record for "
+                    "unknown variable %s."), var_name);
+      else if (var_is_numeric (var))
+        {
+          sys_warn (r, record->pos + ofs,
+                    _("Ignoring long string value label record for "
+                      "numeric variable %s."), var_name);
+          var = NULL;
+        }
+      else if (width != var_get_width (var))
+        {
+          sys_warn (r, record->pos + ofs,
+                    _("Ignoring long string value label record for variable "
+                      "%s because the record's width (%d) does not match the "
+                      "variable's width (%d)."),
+                    var_name, width, var_get_width (var));
+          var = NULL;
+        }
+
+      /* Parse values. */
+      value_init_pool (r->pool, &value, width);
+      for (i = 0; i < n_labels; i++)
         {
-          struct label *label = &labels[j];
-          if (!var_add_value_label (v, &label->value, label->label))
+          size_t value_length, label_length;
+          bool skip = var == NULL;
+
+          /* Parse value length. */
+          if (!check_overflow (r, record, ofs, 4))
+            return;
+          value_length = parse_int (r, record->data, ofs);
+          ofs += 4;
+
+          /* Parse value. */
+          if (!check_overflow (r, record, ofs, value_length))
+            return;
+          if (!skip)
              {
-              if (var_is_numeric (var[0]))
-                sys_warn (r, _("Duplicate value label for %g on %s."),
-                          label->value.f, var_get_name (v));
+              if (value_length == width)
+                memcpy (value_str_rw (&value, width),
+                        (const uint8_t *) record->data + ofs, width);
                else
-                sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."),
-                          var_get_width (v), label->value.s,
-                          var_get_name (v));
+                {
+                  sys_warn (r, record->pos + ofs,
+                            _("Ignoring long string value label %zu for "
+                              "variable %s, with width %d, that has bad value "
+                              "width %zu."),
+                            i, var_get_name (var), width, value_length);
+                  skip = true;
+                }
              }
-       }
+          ofs += value_length;
+
+          /* Parse label length. */
+          if (!check_overflow (r, record, ofs, 4))
+            return;
+          label_length = parse_int (r, record->data, ofs);
+          ofs += 4;
+
+          /* Parse label. */
+          if (!check_overflow (r, record, ofs, label_length))
+            return;
+          if (!skip)
+            {
+              char *label;
+
+              label = recode_string_pool ("UTF-8", dict_encoding,
+                                          (const char *) record->data + ofs,
+                                          label_length, r->pool);
+              if (!var_add_value_label (var, &value, label))
+                sys_warn (r, record->pos + ofs,
+                          _("Duplicate value label for `%.*s' on %s."),
+                          width, value_str (&value, width),
+                          var_get_name (var));
+              pool_free (r->pool, label);
+            }
+          ofs += label_length;
+        }
      }
+}
  
-  pool_destroy (subpool);
+static void
+parse_long_string_missing_values (struct sfm_reader *r,
+                                  const struct sfm_extension_record *record,
+                                  struct dictionary *dict)
+{
+  const char *dict_encoding = dict_get_encoding (dict);
+  size_t end = record->size * record->count;
+  size_t ofs = 0;
+
+  while (ofs < end)
+    {
+      struct missing_values mv;
+      char *var_name;
+      struct variable *var;
+      int n_missing_values;
+      int var_name_len;
+      size_t i;
+
+      /* Parse variable name length. */
+      if (!check_overflow (r, record, ofs, 4))
+        return;
+      var_name_len = parse_int (r, record->data, ofs);
+      ofs += 4;
+
+      /* Parse variable name. */
+      if (!check_overflow (r, record, ofs, var_name_len)
+          || !check_overflow (r, record, ofs, var_name_len + 1))
+        return;
+      var_name = recode_string_pool ("UTF-8", dict_encoding,
+                                     (const char *) record->data + ofs,
+                                     var_name_len, r->pool);
+      ofs += var_name_len;
+
+      /* Parse number of missing values. */
+      n_missing_values = ((const uint8_t *) record->data)[ofs];
+      if (n_missing_values < 1 || n_missing_values > 3)
+        sys_warn (r, record->pos + ofs,
+                  _("Long string missing values record says variable %s "
+                    "has %d missing values, but only 1 to 3 missing values "
+                    "are allowed."),
+                  var_name, n_missing_values);
+      ofs++;
+
+      /* Look up 'var' and validate. */
+      var = dict_lookup_var (dict, var_name);
+      if (var == NULL)
+        sys_warn (r, record->pos + ofs,
+                  _("Ignoring long string missing value record for "
+                    "unknown variable %s."), var_name);
+      else if (var_is_numeric (var))
+        {
+          sys_warn (r, record->pos + ofs,
+                    _("Ignoring long string missing value record for "
+                      "numeric variable %s."), var_name);
+          var = NULL;
+        }
+
+      /* Parse values. */
+      mv_init_pool (r->pool, &mv, var ? var_get_width (var) : 8);
+      for (i = 0; i < n_missing_values; i++)
+       {
+          size_t value_length;
+
+          /* Parse value length. */
+          if (!check_overflow (r, record, ofs, 4))
+            return;
+          value_length = parse_int (r, record->data, ofs);
+          ofs += 4;
+
+          /* Parse value. */
+          if (!check_overflow (r, record, ofs, value_length))
+            return;
+          if (var != NULL
+              && i < 3
+              && !mv_add_str (&mv, (const uint8_t *) record->data + ofs,
+                              value_length))
+            sys_warn (r, record->pos + ofs,
+                      _("Ignoring long string missing value %zu for variable "
+                        "%s, with width %d, that has bad value width %zu."),
+                      i, var_get_name (var), var_get_width (var),
+                      value_length);
+          ofs += value_length;
+        }
+      if (var != NULL)
+        var_set_missing_values (var, &mv);
+    }
  }
  \f
  /* Case reader. */
  
-static void partial_record (struct sfm_reader *r)
-     NO_RETURN;
+static void partial_record (struct sfm_reader *);
  
  static void read_error (struct casereader *, const struct sfm_reader *);
  
-
  static bool read_case_number (struct sfm_reader *, double *);
-static bool read_case_string (struct sfm_reader *, char *, size_t);
+static int read_case_string (struct sfm_reader *, uint8_t *, size_t);
  static int read_opcode (struct sfm_reader *);
  static bool read_compressed_number (struct sfm_reader *, double *);
-static bool read_compressed_string (struct sfm_reader *, char *);
-static bool read_whole_strings (struct sfm_reader *, char *, size_t);
-
-/* Reads one case from READER's file into C.  Returns true only
-   if successful. */
-static bool
-sys_file_casereader_read (struct casereader *reader, void *r_,
-                          struct ccase *c)
+static int read_compressed_string (struct sfm_reader *, uint8_t *);
+static int read_whole_strings (struct sfm_reader *, uint8_t *, size_t);
+static bool skip_whole_strings (struct sfm_reader *, size_t);
+
+/* Reads and returns one case from READER's file.  Returns a null
+   pointer if not successful. */
+static struct ccase *
+sys_file_casereader_read (struct casereader *reader, void *r_)
  {
    struct sfm_reader *r = r_;
-  if (r->error)
-    return false;
+  struct ccase *c;
+  int retval;
+  int i;
  
-  case_create (c, r->value_cnt);
-  if (setjmp (r->bail_out))
-    {
-      casereader_force_error (reader);
-      case_destroy (c);
-      return false;
-    }
+  if (r->error || !r->sfm_var_cnt)
+    return NULL;
  
-  if (!r->compressed && sizeof (double) == 8 && !r->has_vls)
-    {
-      /* Fast path.  Read the whole case directly. */
-      if (!try_read_bytes (r, case_data_all_rw (c),
-                           sizeof (union value) * r->flt64_cnt))
-        {
-          case_destroy (c);
-         if ( r->case_cnt != -1 )
-           read_error (reader, r);
-          return false;
-        }
+  c = case_create (r->proto);
  
-      /* Convert floating point numbers to native format if needed. */
-      if (r->float_format != FLOAT_NATIVE_DOUBLE)
-        {
-          int i;
-
-          for (i = 0; i < r->var_cnt; i++)
-            if (r->vars[i].width == 0)
-              {
-                double *d = &case_data_rw_idx (c, r->vars[i].case_index)->f;
-                float_convert (r->float_format, d, FLOAT_NATIVE_DOUBLE, d);
-              }
-        }
-      return true;
-    }
-  else
+  for (i = 0; i < r->sfm_var_cnt; i++)
      {
-      /* Slow path.  Convert from external to internal format. */
-      int i;
+      struct sfm_var *sv = &r->sfm_vars[i];
+      union value *v = case_data_rw_idx (c, sv->case_index);
  
-      for (i = 0; i < r->var_cnt; i++)
+      if (sv->var_width == 0)
+        retval = read_case_number (r, &v->f);
+      else
          {
-         struct sfm_var *sv = &r->vars[i];
-          union value *v = case_data_rw_idx (c, sv->case_index);
-
-          if (sv->width == 0)
+          uint8_t *s = value_str_rw (v, sv->var_width);
+          retval = read_case_string (r, s + sv->offset, sv->segment_width);
+          if (retval == 1)
              {
-              if (!read_case_number (r, &v->f))
-                goto eof;
-            }
-          else
-            {
-              /* Read the string data in segments up to 255 bytes
-                 at a time, packed into 8-byte units. */
-              const int max_chunk = MIN_VERY_LONG_STRING - 1;
-             int ofs, chunk_size;
-              for (ofs = 0; ofs < sv->width; ofs += chunk_size)
-                {
-                  chunk_size = MIN (max_chunk, sv->width - ofs);
-                  if (!read_case_string (r, v->s + ofs, chunk_size))
-                    {
-                      if (ofs)
-                        partial_record (r);
-                      goto eof;
-                    }
-                }
-
-              /* Very long strings have trailing wasted space
-                 that we must skip. */
-              if (sv->width >= MIN_VERY_LONG_STRING)
-                {
-                  int bytes_read = (sv->width / max_chunk * 256
-                                    + ROUND_UP (sv->width % max_chunk, 8));
-                  int total_bytes = sfm_width_to_bytes (sv->width);
-                  int excess_bytes = total_bytes - bytes_read;
-
-                  while (excess_bytes > 0)
-                    {
-                      char buffer[1024];
-                      size_t chunk = MIN (sizeof buffer, excess_bytes);
-                      if (!read_whole_strings (r, buffer, chunk))
-                        partial_record (r);
-                      excess_bytes -= chunk;
-                    }
-                }
+              retval = skip_whole_strings (r, ROUND_DOWN (sv->padding, 8));
+              if (retval == 0)
+                sys_error (r, r->pos, _("File ends in partial string value."));
              }
          }
-      return true;
  
-    eof:
-      case_destroy (c);
-      if (i != 0)
-        partial_record (r);
-      if ( r->case_cnt != -1 )
-       read_error (reader, r);
-      return false;
+      if (retval != 1)
+        goto eof;
      }
+  return c;
+
+eof:
+  if (i != 0)
+    partial_record (r);
+  if (r->case_cnt != -1)
+    read_error (reader, r);
+  case_unref (c);
+  return NULL;
  }
  
  /* Issues an error that R ends in a partial record. */
  static void
  partial_record (struct sfm_reader *r)
  {
-  sys_error (r, _("File ends in partial case."));
+  sys_error (r, r->pos, _("File ends in partial case."));
  }
  
+/* Issues an error that an unspecified error occurred SFM, and
+   marks R tainted. */
  static void
  read_error (struct casereader *r, const struct sfm_reader *sfm)
  {
-  msg (ME, _("Error reading case from file %s"), fh_get_name (sfm->fh));
+  msg (ME, _("Error reading case from file %s."), fh_get_name (sfm->fh));
    casereader_force_error (r);
  }
  
@@ -1282,63 +2744,71 @@ read_error (struct casereader *r, const struct sfm_reader *sfm)
  static bool
  read_case_number (struct sfm_reader *r, double *d)
  {
-  if (!r->compressed)
+  if (r->compression == ANY_COMP_NONE)
      {
-      uint8_t flt64[8];
-      if (!try_read_bytes (r, flt64, sizeof flt64))
+      uint8_t number[8];
+      if (!try_read_bytes (r, number, sizeof number))
          return false;
-      *d = flt64_to_double (r, flt64);
+      float_convert (r->float_format, number, FLOAT_NATIVE_DOUBLE, d);
        return true;
      }
    else
      return read_compressed_number (r, d);
  }
  
-/* Reads LENGTH string bytes from R into S.
-   Always reads a multiple of 8 bytes; if LENGTH is not a
-   multiple of 8, then extra bytes are read and discarded without
-   being written to S.
-   Reads compressed strings if S is compressed.
-   Returns true if successful, false if end of file is
-   reached immediately. */
-static bool
-read_case_string (struct sfm_reader *r, char *s, size_t length)
+/* Reads LENGTH string bytes from R into S.  Always reads a multiple of 8
+   bytes; if LENGTH is not a multiple of 8, then extra bytes are read and
+   discarded without being written to S.  Reads compressed strings if S is
+   compressed.  Returns 1 if successful, 0 if end of file is reached
+   immediately, or -1 for some kind of error. */
+static int
+read_case_string (struct sfm_reader *r, uint8_t *s, size_t length)
  {
    size_t whole = ROUND_DOWN (length, 8);
    size_t partial = length % 8;
  
    if (whole)
      {
-      if (!read_whole_strings (r, s, whole))
-        return false;
+      int retval = read_whole_strings (r, s, whole);
+      if (retval != 1)
+        return retval;
      }
  
    if (partial)
      {
-      char bounce[8];
-      if (!read_whole_strings (r, bounce, sizeof bounce))
+      uint8_t bounce[8];
+      int retval = read_whole_strings (r, bounce, sizeof bounce);
+      if (retval == -1)
+        return -1;
+      else if (!retval)
          {
            if (whole)
-            partial_record (r);
-          return false;
+            {
+              partial_record (r);
+              return -1;
+            }
+          return 0;
          }
        memcpy (s + whole, bounce, partial);
      }
  
-  return true;
+  return 1;
  }
  
  /* Reads and returns the next compression opcode from R. */
  static int
  read_opcode (struct sfm_reader *r)
  {
-  assert (r->compressed);
+  assert (r->compression != ANY_COMP_NONE);
    for (;;)
      {
        int opcode;
        if (r->opcode_idx >= sizeof r->opcodes)
          {
-          if (!try_read_bytes (r, r->opcodes, sizeof r->opcodes))
+
+          int retval = try_read_compressed_bytes (r, r->opcodes,
+                                                  sizeof r->opcodes);
+          if (retval != 1)
              return -1;
            r->opcode_idx = 0;
          }
@@ -1363,11 +2833,18 @@ read_compressed_number (struct sfm_reader *r, double *d)
        return false;
  
      case 253:
-      *d = read_flt64 (r);
-      break;
+      return read_compressed_float (r, d);
  
      case 254:
-      sys_error (r, _("Compressed data is corrupt."));
+      float_convert (r->float_format, "        ", FLOAT_NATIVE_DOUBLE, d);
+      if (!r->corruption_warning)
+        {
+          r->corruption_warning = true;
+          sys_warn (r, r->pos,
+                    _("Possible compressed data corruption: "
+                      "compressed spaces appear in numeric field."));
+        }
+      break;
  
      case 255:
        *d = SYSMIS;
@@ -1381,380 +2858,935 @@ read_compressed_number (struct sfm_reader *r, double *d)
    return true;
  }
  
-/* Reads a compressed 8-byte string segment from R and stores it
-   in DST.
-   Returns true if successful, false if end of file is
-   reached immediately. */
-static bool
-read_compressed_string (struct sfm_reader *r, char *dst)
+/* Reads a compressed 8-byte string segment from R and stores it in DST. */
+static int
+read_compressed_string (struct sfm_reader *r, uint8_t *dst)
  {
-  switch (read_opcode (r))
+  int opcode;
+  int retval;
+
+  opcode = read_opcode (r);
+  switch (opcode)
      {
      case -1:
      case 252:
-      return false;
+      return 0;
  
      case 253:
-      read_bytes (r, dst, 8);
-      break;
+      retval = read_compressed_bytes (r, dst, 8);
+      return retval == 1 ? 1 : -1;
  
      case 254:
        memset (dst, ' ', 8);
-      break;
+      return 1;
  
      default:
-      sys_error (r, _("Compressed data is corrupt."));
+      {
+        double value = opcode - r->bias;
+        float_convert (FLOAT_NATIVE_DOUBLE, &value, r->float_format, dst);
+        if (value == 0.0)
+          {
+            /* This has actually been seen "in the wild".  The submitter of the
+               file that showed that the contents decoded as spaces, but they
+               were at the end of the field so it's possible that the null
+               bytes just acted as null terminators. */
+          }
+        else if (!r->corruption_warning)
+          {
+            r->corruption_warning = true;
+            sys_warn (r, r->pos,
+                      _("Possible compressed data corruption: "
+                        "string contains compressed integer (opcode %d)."),
+                      opcode);
+          }
+      }
+      return 1;
      }
-
-  return true;
  }
  
-/* Reads LENGTH string bytes from R into S.
-   LENGTH must be a multiple of 8.
-   Reads compressed strings if S is compressed.
-   Returns true if successful, false if end of file is
-   reached immediately. */
-static bool
-read_whole_strings (struct sfm_reader *r, char *s, size_t length)
+/* Reads LENGTH string bytes from R into S.  LENGTH must be a multiple of 8.
+   Reads compressed strings if S is compressed.  Returns 1 if successful, 0 if
+   end of file is reached immediately, or -1 for some kind of error. */
+static int
+read_whole_strings (struct sfm_reader *r, uint8_t *s, size_t length)
  {
    assert (length % 8 == 0);
-  if (!r->compressed)
+  if (r->compression == ANY_COMP_NONE)
      return try_read_bytes (r, s, length);
    else
      {
        size_t ofs;
+
        for (ofs = 0; ofs < length; ofs += 8)
-        if (!read_compressed_string (r, s + ofs))
-          {
-            if (ofs != 0)
-              partial_record (r);
-            return false;
+        {
+          int retval = read_compressed_string (r, s + ofs);
+          if (retval != 1)
+            {
+              if (ofs != 0)
+                {
+                  partial_record (r);
+                  return -1;
+                }
+              return retval;
+            }
            }
-      return true;
+      return 1;
      }
  }
+
+/* Skips LENGTH string bytes from R.
+   LENGTH must be a multiple of 8.
+   (LENGTH is also limited to 1024, but that's only because the
+   current caller never needs more than that many bytes.)
+   Returns true if successful, false if end of file is
+   reached immediately. */
+static bool
+skip_whole_strings (struct sfm_reader *r, size_t length)
+{
+  uint8_t buffer[1024];
+  assert (length < sizeof buffer);
+  return read_whole_strings (r, buffer, length);
+}
  \f
-/* Creates and returns a table that can be used for translating a value
-   index into a case to a "struct variable *" for DICT.  Multiple
-   system file fields reference variables this way.
-
-   This table must be created before processing the very long
-   string extension record, because that record causes some
-   values to be deleted from the case and the dictionary to be
-   compacted. */
-static struct variable **
-make_var_by_value_idx (struct sfm_reader *r, struct dictionary *dict)
-{
-  struct variable **var_by_value_idx;
-  int value_idx = 0;
-  int i;
+/* Helpers for reading records that contain structured text
+   strings. */
  
-  var_by_value_idx = pool_nmalloc (r->pool,
-                                   r->flt64_cnt, sizeof *var_by_value_idx);
-  for (i = 0; i < dict_get_var_cnt (dict); i++)
-    {
-      struct variable *v = dict_get_var (dict, i);
-      int nv = var_is_numeric (v) ? 1 : DIV_RND_UP (var_get_width (v), 8);
-      int j;
+/* Maximum number of warnings to issue for a single text
+   record. */
+#define MAX_TEXT_WARNINGS 5
  
-      var_by_value_idx[value_idx++] = v;
-      for (j = 1; j < nv; j++)
-        var_by_value_idx[value_idx++] = NULL;
-    }
-  assert (value_idx == r->flt64_cnt);
+/* State. */
+struct text_record
+  {
+    struct substring buffer;    /* Record contents. */
+    off_t start;                /* Starting offset in file. */
+    size_t pos;                 /* Current position in buffer. */
+    int n_warnings;             /* Number of warnings issued or suppressed. */
+    bool recoded;               /* Recoded into UTF-8? */
+  };
  
-  return var_by_value_idx;
+static struct text_record *
+open_text_record (struct sfm_reader *r,
+                  const struct sfm_extension_record *record,
+                  bool recode_to_utf8)
+{
+  struct text_record *text;
+  struct substring raw;
+
+  text = pool_alloc (r->pool, sizeof *text);
+  raw = ss_buffer (record->data, record->size * record->count);
+  text->start = record->pos;
+  text->buffer = (recode_to_utf8
+                  ? recode_substring_pool ("UTF-8", r->encoding, raw, r->pool)
+                  : raw);
+  text->pos = 0;
+  text->n_warnings = 0;
+  text->recoded = recode_to_utf8;
+
+  return text;
  }
  
-/* Returns the "struct variable" corresponding to the given
-   1-basd VALUE_IDX in VAR_BY_VALUE_IDX.  Verifies that the index
-   is valid. */
-static struct variable *
-lookup_var_by_value_idx (struct sfm_reader *r,
-                         struct variable **var_by_value_idx, int value_idx)
+/* Closes TEXT, frees its storage, and issues a final warning
+   about suppressed warnings if necessary. */
+static void
+close_text_record (struct sfm_reader *r, struct text_record *text)
  {
-  struct variable *var;
+  if (text->n_warnings > MAX_TEXT_WARNINGS)
+    sys_warn (r, -1, _("Suppressed %d additional related warnings."),
+              text->n_warnings - MAX_TEXT_WARNINGS);
+  if (text->recoded)
+    pool_free (r->pool, ss_data (text->buffer));
+}
  
-  if (value_idx < 1 || value_idx > r->flt64_cnt)
-    sys_error (r, _("Variable index %d not in valid range 1...%d."),
-               value_idx, r->flt64_cnt);
+/* Reads a variable=value pair from TEXT.
+   Looks up the variable in DICT and stores it into *VAR.
+   Stores a null-terminated value into *VALUE. */
+static bool
+read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict,
+                             struct text_record *text,
+                             struct variable **var, char **value)
+{
+  for (;;)
+    {
+      if (!text_read_short_name (r, dict, text, ss_cstr ("="), var))
+        return false;
+
+      *value = text_get_token (text, ss_buffer ("\t\0", 2), NULL);
+      if (*value == NULL)
+        return false;
  
-  var = var_by_value_idx[value_idx - 1];
-  if (var == NULL)
-    sys_error (r, _("Variable index %d refers to long string "
-                    "continuation."),
-               value_idx);
+      text->pos += ss_span (ss_substr (text->buffer, text->pos, SIZE_MAX),
+                            ss_buffer ("\t\0", 2));
  
-  return var;
+      if (*var != NULL)
+        return true;
+    }
  }
  
-/* Returns the variable in D with the given SHORT_NAME,
-   or a null pointer if there is none. */
-static struct variable *
-lookup_var_by_short_name (struct dictionary *d, const char *short_name)
+static bool
+text_read_variable_name (struct sfm_reader *r, struct dictionary *dict,
+                         struct text_record *text, struct substring delimiters,
+                         struct variable **var)
  {
-  struct variable *var;
-  size_t var_cnt;
-  size_t i;
+  char *name;
  
-  /* First try looking up by full name.  This often succeeds. */
-  var = dict_lookup_var (d, short_name);
-  if (var != NULL && !strcasecmp (var_get_short_name (var, 0), short_name))
-    return var;
+  name = text_get_token (text, delimiters, NULL);
+  if (name == NULL)
+    return false;
  
-  /* Iterate through the whole dictionary as a fallback. */
-  var_cnt = dict_get_var_cnt (d);
-  for (i = 0; i < var_cnt; i++)
-    {
-      var = dict_get_var (d, i);
-      if (!strcasecmp (var_get_short_name (var, 0), short_name))
-        return var;
-    }
+  *var = dict_lookup_var (dict, name);
+  if (*var != NULL)
+    return true;
  
-  return NULL;
+  text_warn (r, text, _("Dictionary record refers to unknown variable %s."),
+             name);
+  return false;
  }
-\f
-/* Helpers for reading records that contain "variable=value"
-   pairs. */
  
-/* State. */
-struct variable_to_value_map
-  {
-    struct substring buffer;    /* Record contents. */
-    size_t pos;                 /* Current position in buffer. */
-  };
  
-/* Reads SIZE bytes into a "variable=value" map for R,
-   and returns the map. */
-static struct variable_to_value_map *
-open_variable_to_value_map (struct sfm_reader *r, size_t size)
+static bool
+text_read_short_name (struct sfm_reader *r, struct dictionary *dict,
+                      struct text_record *text, struct substring delimiters,
+                      struct variable **var)
  {
-  struct variable_to_value_map *map = pool_alloc (r->pool, sizeof *map);
-  char *buffer = pool_malloc (r->pool, size + 1);
-  read_bytes (r, buffer, size);
-  map->buffer = ss_buffer (buffer, size);
-  map->pos = 0;
-  return map;
+  char *short_name = text_get_token (text, delimiters, NULL);
+  if (short_name == NULL)
+    return false;
+
+  *var = dict_lookup_var (dict, short_name);
+  if (*var == NULL)
+    text_warn (r, text, _("Dictionary record refers to unknown variable %s."),
+               short_name);
+  return true;
  }
  
-/* Closes MAP and frees its storage.
-   Not really needed, because the pool will free the map anyway,
-   but can be used to free it earlier. */
+/* Displays a warning for the current file position, limiting the
+   number to MAX_TEXT_WARNINGS for TEXT. */
  static void
-close_variable_to_value_map (struct sfm_reader *r,
-                             struct variable_to_value_map *map)
+text_warn (struct sfm_reader *r, struct text_record *text,
+           const char *format, ...)
  {
-  pool_free (r->pool, ss_data (map->buffer));
+  if (text->n_warnings++ < MAX_TEXT_WARNINGS)
+    {
+      va_list args;
+
+      va_start (args, format);
+      sys_msg (r, text->start + text->pos, MW, format, args);
+      va_end (args);
+    }
  }
  
-/* Reads the next variable=value pair from MAP.
-   Looks up the variable in DICT and stores it into *VAR.
-   Stores a null-terminated value into *VALUE. */
-static bool
-read_variable_to_value_map (struct sfm_reader *r, struct dictionary *dict,
-                            struct variable_to_value_map *map,
-                            struct variable **var, char **value,
-                            int *warning_cnt)
+static char *
+text_get_token (struct text_record *text, struct substring delimiters,
+                char *delimiter)
+{
+  struct substring token;
+  char *end;
+
+  if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token))
+    return NULL;
+
+  end = &ss_data (token)[ss_length (token)];
+  if (delimiter != NULL)
+    *delimiter = *end;
+  *end = '\0';
+  return ss_data (token);
+}
+
+/* Reads a integer value expressed in decimal, then a space, then a string that
+   consists of exactly as many bytes as specified by the integer, then a space,
+   from TEXT.  Returns the string, null-terminated, as a subset of TEXT's
+   buffer (so the caller should not free the string). */
+static const char *
+text_parse_counted_string (struct sfm_reader *r, struct text_record *text)
  {
-  int max_warnings = 5;
+  size_t start;
+  size_t n;
+  char *s;
  
-  for (;;)
+  start = text->pos;
+  n = 0;
+  while (text->pos < text->buffer.length)
+    {
+      int c = text->buffer.string[text->pos];
+      if (c < '0' || c > '9')
+        break;
+      n = (n * 10) + (c - '0');
+      text->pos++;
+    }
+  if (text->pos >= text->buffer.length || start == text->pos)
      {
-      struct substring short_name_ss, value_ss;
+      sys_warn (r, text->start,
+                _("Expecting digit at offset %zu in MRSETS record."),
+                text->pos);
+      return NULL;
+    }
  
-      if (!ss_tokenize (map->buffer, ss_cstr ("="), &map->pos, &short_name_ss)
-          || !ss_tokenize (map->buffer, ss_buffer ("\t\0", 2), &map->pos,
-                           &value_ss))
-        {
-          if (*warning_cnt > max_warnings)
-            sys_warn (r, _("Suppressed %d additional variable map warnings."),
-                      *warning_cnt - max_warnings);
-          return false;
-        }
+  if (!text_match (text, ' '))
+    {
+      sys_warn (r, text->start,
+                _("Expecting space at offset %zu in MRSETS record."),
+                text->pos);
+      return NULL;
+    }
  
-      map->pos += ss_span (ss_substr (map->buffer, map->pos, SIZE_MAX),
-                           ss_buffer ("\t\0", 2));
+  if (text->pos + n > text->buffer.length)
+    {
+      sys_warn (r, text->start,
+                _("%zu-byte string starting at offset %zu "
+                  "exceeds record length %zu."),
+                n, text->pos, text->buffer.length);
+      return NULL;
+    }
  
-      ss_data (short_name_ss)[ss_length (short_name_ss)] = '\0';
-      *var = lookup_var_by_short_name (dict, ss_data (short_name_ss));
-      if (*var == NULL)
-        {
-          if (++*warning_cnt <= 5)
-            sys_warn (r, _("Variable map refers to unknown variable %s."),
-                      ss_data (short_name_ss));
-          continue;
-        }
+  s = &text->buffer.string[text->pos];
+  if (s[n] != ' ')
+    {
+      sys_warn (r, text->start,
+                _("Expecting space at offset %zu following %zu-byte string."),
+                text->pos + n, n);
+      return NULL;
+    }
+  s[n] = '\0';
+  text->pos += n + 1;
+  return s;
+}
  
-      ss_data (value_ss)[ss_length (value_ss)] = '\0';
-      *value = ss_data (value_ss);
+static bool
+text_match (struct text_record *text, char c)
+{
+  if (text->pos >= text->buffer.length)
+    return false;
  
+  if (text->buffer.string[text->pos] == c)
+    {
+      text->pos++;
        return true;
      }
+  else
+    return false;
+}
+
+/* Returns the current byte offset (as converted to UTF-8, if it was converted)
+   inside the TEXT's string. */
+static size_t
+text_pos (const struct text_record *text)
+{
+  return text->pos;
+}
+
+static const char *
+text_get_all (const struct text_record *text)
+{
+  return text->buffer.string;
  }
  \f
  /* Messages. */
  
  /* Displays a corruption message. */
  static void
-sys_msg (struct sfm_reader *r, int class, const char *format, va_list args)
+sys_msg (struct sfm_reader *r, off_t offset,
+         int class, const char *format, va_list args)
  {
    struct msg m;
    struct string text;
  
    ds_init_empty (&text);
-  ds_put_format (&text, "\"%s\" near offset 0x%lx: ",
-                 fh_get_file_name (r->fh), (unsigned long) ftell (r->file));
+  if (offset >= 0)
+    ds_put_format (&text, _("`%s' near offset 0x%llx: "),
+                   fh_get_file_name (r->fh), (long long int) offset);
+  else
+    ds_put_format (&text, _("`%s': "), fh_get_file_name (r->fh));
    ds_put_vformat (&text, format, args);
  
    m.category = msg_class_to_category (class);
    m.severity = msg_class_to_severity (class);
-  m.where.file_name = NULL;
-  m.where.line_number = 0;
+  m.file_name = NULL;
+  m.first_line = 0;
+  m.last_line = 0;
+  m.first_column = 0;
+  m.last_column = 0;
    m.text = ds_cstr (&text);
  
    msg_emit (&m);
  }
  
-/* Displays a warning for the current file position. */
+/* Displays a warning for offset OFFSET in the file. */
  static void
-sys_warn (struct sfm_reader *r, const char *format, ...)
+sys_warn (struct sfm_reader *r, off_t offset, const char *format, ...)
  {
    va_list args;
  
    va_start (args, format);
-  sys_msg (r, MW, format, args);
+  sys_msg (r, offset, MW, format, args);
    va_end (args);
  }
  
-/* Displays an error for the current file position,
-   marks it as in an error state,
-   and aborts reading it using longjmp. */
+/* Displays an error for the current file position and marks it as in an error
+   state. */
  static void
-sys_error (struct sfm_reader *r, const char *format, ...)
+sys_error (struct sfm_reader *r, off_t offset, const char *format, ...)
  {
    va_list args;
  
    va_start (args, format);
-  sys_msg (r, ME, format, args);
+  sys_msg (r, offset, ME, format, args);
    va_end (args);
  
    r->error = true;
-  longjmp (r->bail_out, 1);
  }
  \f
  /* Reads BYTE_CNT bytes into BUF.
-   Returns true if exactly BYTE_CNT bytes are successfully read.
-   Aborts if an I/O error or a partial read occurs.
-   If EOF_IS_OK, then an immediate end-of-file causes false to be
-   returned; otherwise, immediate end-of-file causes an abort
-   too. */
-static inline bool
+   Returns 1 if exactly BYTE_CNT bytes are successfully read.
+   Returns -1 if an I/O error or a partial read occurs.
+   Returns 0 for an immediate end-of-file and, if EOF_IS_OK is false, reports
+   an error. */
+static inline int
  read_bytes_internal (struct sfm_reader *r, bool eof_is_ok,
-                   void *buf, size_t byte_cnt)
+                     void *buf, size_t byte_cnt)
  {
    size_t bytes_read = fread (buf, 1, byte_cnt, r->file);
+  r->pos += bytes_read;
    if (bytes_read == byte_cnt)
-    return true;
+    return 1;
    else if (ferror (r->file))
-    sys_error (r, _("System error: %s."), strerror (errno));
+    {
+      sys_error (r, r->pos, _("System error: %s."), strerror (errno));
+      return -1;
+    }
    else if (!eof_is_ok || bytes_read != 0)
-    sys_error (r, _("Unexpected end of file."));
+    {
+      sys_error (r, r->pos, _("Unexpected end of file."));
+      return -1;
+    }
    else
-    return false;
+    return 0;
  }
  
  /* Reads BYTE_CNT into BUF.
-   Aborts upon I/O error or if end-of-file is encountered. */
-static void
+   Returns true if successful.
+   Returns false upon I/O error or if end-of-file is encountered. */
+static bool
  read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
  {
-  read_bytes_internal (r, false, buf, byte_cnt);
+  return read_bytes_internal (r, false, buf, byte_cnt) == 1;
  }
  
  /* Reads BYTE_CNT bytes into BUF.
-   Returns true if exactly BYTE_CNT bytes are successfully read.
-   Returns false if an immediate end-of-file is encountered.
-   Aborts if an I/O error or a partial read occurs. */
-static bool
+   Returns 1 if exactly BYTE_CNT bytes are successfully read.
+   Returns 0 if an immediate end-of-file is encountered.
+   Returns -1 if an I/O error or a partial read occurs. */
+static int
  try_read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
  {
    return read_bytes_internal (r, true, buf, byte_cnt);
  }
  
-/* Reads a 32-bit signed integer from R and returns its value in
+/* Reads a 32-bit signed integer from R and stores its value in host format in
+   *X.  Returns true if successful, otherwise false. */
+static bool
+read_int (struct sfm_reader *r, int *x)
+{
+  uint8_t integer[4];
+  if (read_bytes (r, integer, sizeof integer) != 1)
+    return false;
+  *x = integer_get (r->integer_format, integer, sizeof integer);
+  return true;
+}
+
+static bool
+read_uint (struct sfm_reader *r, unsigned int *x)
+{
+  bool ok;
+  int y;
+
+  ok = read_int (r, &y);
+  *x = y;
+  return ok;
+}
+
+/* Reads a 64-bit signed integer from R and returns its value in
     host format. */
-static int32_t
-read_int32 (struct sfm_reader *r)
+static bool
+read_int64 (struct sfm_reader *r, long long int *x)
  {
-  uint8_t int32[4];
-  read_bytes (r, int32, sizeof int32);
-  return int32_to_native (r, int32);
+  uint8_t integer[8];
+  if (read_bytes (r, integer, sizeof integer) != 1)
+    return false;
+  *x = integer_get (r->integer_format, integer, sizeof integer);
+  return true;
+}
+
+/* Reads a 64-bit signed integer from R and returns its value in
+   host format. */
+static bool
+read_uint64 (struct sfm_reader *r, unsigned long long int *x)
+{
+  long long int y;
+  bool ok;
+
+  ok = read_int64 (r, &y);
+  *x = y;
+  return ok;
+}
+
+static int
+parse_int (const struct sfm_reader *r, const void *data, size_t ofs)
+{
+  return integer_get (r->integer_format, (const uint8_t *) data + ofs, 4);
  }
  
-/* Reads a 64-bit floating-point number from R and returns its
-   value in host format. */
  static double
-read_flt64 (struct sfm_reader *r)
+parse_float (const struct sfm_reader *r, const void *data, size_t ofs)
  {
-  uint8_t flt64[8];
-  read_bytes (r, flt64, sizeof flt64);
-  return flt64_to_double (r, flt64);
+  return float_get_double (r->float_format, (const uint8_t *) data + ofs);
  }
  
  /* Reads exactly SIZE - 1 bytes into BUFFER
     and stores a null byte into BUFFER[SIZE - 1]. */
-static void
+static bool
  read_string (struct sfm_reader *r, char *buffer, size_t size)
  {
+  bool ok;
+
    assert (size > 0);
-  read_bytes (r, buffer, size - 1);
-  buffer[size - 1] = '\0';
+  ok = read_bytes (r, buffer, size - 1);
+  if (ok)
+    buffer[size - 1] = '\0';
+  return ok;
  }
  
  /* Skips BYTES bytes forward in R. */
-static void
+static bool
  skip_bytes (struct sfm_reader *r, size_t bytes)
  {
    while (bytes > 0)
      {
        char buffer[1024];
        size_t chunk = MIN (sizeof buffer, bytes);
-      read_bytes (r, buffer, chunk);
+      if (!read_bytes (r, buffer, chunk))
+        return false;
        bytes -= chunk;
      }
+
+  return true;
+}
+
+/* Returns a malloc()'d copy of S in which all lone CRs and CR LF pairs have
+   been replaced by LFs.
+
+   (A product that identifies itself as VOXCO INTERVIEWER 4.3 produces system
+   files that use CR-only line ends in the file label and extra product
+   info.) */
+static char *
+fix_line_ends (const char *s)
+{
+  char *dst, *d;
+
+  d = dst = xmalloc (strlen (s) + 1);
+  while (*s != '\0')
+    {
+      if (*s == '\r')
+        {
+          s++;
+          if (*s == '\n')
+            s++;
+          *d++ = '\n';
+        }
+      else
+        *d++ = *s++;
+    }
+  *d = '\0';
+
+  return dst;
  }
  \f
-/* Returns the value of the 32-bit signed integer at INT32,
-   converted from the format used by R to the host format. */
-static int32_t
-int32_to_native (const struct sfm_reader *r, const uint8_t int32[4])
-{
-  int32_t x;
-  if (r->integer_format == INTEGER_NATIVE)
-    memcpy (&x, int32, sizeof x);
+static bool
+read_ztrailer (struct sfm_reader *r,
+               long long int zheader_ofs,
+               long long int ztrailer_len);
+
+static void *
+zalloc (voidpf pool_, uInt items, uInt size)
+{
+  struct pool *pool = pool_;
+
+  return (!size || xalloc_oversized (items, size)
+          ? Z_NULL
+          : pool_malloc (pool, items * size));
+}
+
+static void
+zfree (voidpf pool_, voidpf address)
+{
+  struct pool *pool = pool_;
+
+  pool_free (pool, address);
+}
+
+static bool
+read_zheader (struct sfm_reader *r)
+{
+  off_t pos = r->pos;
+  long long int zheader_ofs;
+  long long int ztrailer_ofs;
+  long long int ztrailer_len;
+
+  if (!read_int64 (r, &zheader_ofs)
+      || !read_int64 (r, &ztrailer_ofs)
+      || !read_int64 (r, &ztrailer_len))
+    return false;
+
+  if (zheader_ofs != pos)
+    {
+      sys_error (r, pos, _("Wrong ZLIB data header offset %#llx "
+                           "(expected %#llx)."),
+                 zheader_ofs, (long long int) pos);
+      return false;
+    }
+
+  if (ztrailer_ofs < r->pos)
+    {
+      sys_error (r, pos, _("Impossible ZLIB trailer offset 0x%llx."),
+                 ztrailer_ofs);
+      return false;
+    }
+
+  if (ztrailer_len < 24 || ztrailer_len % 24)
+    {
+      sys_error (r, pos, _("Invalid ZLIB trailer length %lld."), ztrailer_len);
+      return false;
+    }
+
+  r->ztrailer_ofs = ztrailer_ofs;
+  if (!read_ztrailer (r, zheader_ofs, ztrailer_len))
+    return false;
+
+  if (r->zin_buf == NULL)
+    {
+      r->zin_buf = pool_malloc (r->pool, ZIN_BUF_SIZE);
+      r->zout_buf = pool_malloc (r->pool, ZOUT_BUF_SIZE);
+      r->zstream.next_in = NULL;
+      r->zstream.avail_in = 0;
+    }
+
+  r->zstream.zalloc = zalloc;
+  r->zstream.zfree = zfree;
+  r->zstream.opaque = r->pool;
+
+  return open_zstream (r);
+}
+
+static void
+seek (struct sfm_reader *r, off_t offset)
+{
+  if (fseeko (r->file, offset, SEEK_SET))
+    sys_error (r, 0, _("%s: seek failed (%s)."),
+               fh_get_file_name (r->fh), strerror (errno));
+  r->pos = offset;
+}
+
+/* Performs some additional consistency checks on the ZLIB compressed data
+   trailer. */
+static bool
+read_ztrailer (struct sfm_reader *r,
+               long long int zheader_ofs,
+               long long int ztrailer_len)
+{
+  long long int expected_uncmp_ofs;
+  long long int expected_cmp_ofs;
+  long long int bias;
+  long long int zero;
+  unsigned int block_size;
+  unsigned int n_blocks;
+  unsigned int i;
+  struct stat s;
+
+  if (fstat (fileno (r->file), &s))
+    {
+      sys_error (r, 0, _("%s: stat failed (%s)."),
+                 fh_get_file_name (r->fh), strerror (errno));
+      return false;
+    }
+
+  if (!S_ISREG (s.st_mode))
+    {
+      /* We can't seek to the trailer and then back to the data in this file,
+         so skip doing extra checks. */
+      return true;
+    }
+
+  if (r->ztrailer_ofs + ztrailer_len != s.st_size)
+    sys_warn (r, r->pos,
+              _("End of ZLIB trailer (0x%llx) is not file size (0x%llx)."),
+              r->ztrailer_ofs + ztrailer_len, (long long int) s.st_size);
+
+  seek (r, r->ztrailer_ofs);
+
+  /* Read fixed header from ZLIB data trailer. */
+  if (!read_int64 (r, &bias))
+    return false;
+  if (-bias != r->bias)
+    {
+      sys_error (r, r->pos, _("ZLIB trailer bias (%lld) differs from "
+                              "file header bias (%.2f)."),
+                 -bias, r->bias);
+      return false;
+    }
+
+  if (!read_int64 (r, &zero))
+    return false;
+  if (zero != 0)
+    sys_warn (r, r->pos,
+              _("ZLIB trailer \"zero\" field has nonzero value %lld."), zero);
+
+  if (!read_uint (r, &block_size))
+    return false;
+  if (block_size != ZBLOCK_SIZE)
+    sys_warn (r, r->pos,
+              _("ZLIB trailer specifies unexpected %u-byte block size."),
+              block_size);
+
+  if (!read_uint (r, &n_blocks))
+    return false;
+  if (n_blocks != (ztrailer_len - 24) / 24)
+    {
+      sys_error (r, r->pos,
+                 _("%lld-byte ZLIB trailer specifies %u data blocks (expected "
+                   "%lld)."),
+                 ztrailer_len, n_blocks, (ztrailer_len - 24) / 24);
+      return false;
+    }
+
+  expected_uncmp_ofs = zheader_ofs;
+  expected_cmp_ofs = zheader_ofs + 24;
+  for (i = 0; i < n_blocks; i++)
+    {
+      off_t desc_ofs = r->pos;
+      unsigned long long int uncompressed_ofs;
+      unsigned long long int compressed_ofs;
+      unsigned int uncompressed_size;
+      unsigned int compressed_size;
+
+      if (!read_uint64 (r, &uncompressed_ofs)
+          || !read_uint64 (r, &compressed_ofs)
+          || !read_uint (r, &uncompressed_size)
+          || !read_uint (r, &compressed_size))
+        return false;
+
+      if (uncompressed_ofs != expected_uncmp_ofs)
+        {
+          sys_error (r, desc_ofs,
+                     _("ZLIB block descriptor %u reported uncompressed data "
+                       "offset %#llx, when %#llx was expected."),
+                     i, uncompressed_ofs, expected_uncmp_ofs);
+          return false;
+        }
+
+      if (compressed_ofs != expected_cmp_ofs)
+        {
+          sys_error (r, desc_ofs,
+                     _("ZLIB block descriptor %u reported compressed data "
+                       "offset %#llx, when %#llx was expected."),
+                     i, compressed_ofs, expected_cmp_ofs);
+          return false;
+        }
+
+      if (i < n_blocks - 1)
+        {
+          if (uncompressed_size != block_size)
+            sys_warn (r, desc_ofs,
+                      _("ZLIB block descriptor %u reported block size %#x, "
+                        "when %#x was expected."),
+                      i, uncompressed_size, block_size);
+        }
+      else
+        {
+          if (uncompressed_size > block_size)
+            sys_warn (r, desc_ofs,
+                      _("ZLIB block descriptor %u reported block size %#x, "
+                        "when at most %#x was expected."),
+                      i, uncompressed_size, block_size);
+        }
+
+      /* http://www.zlib.net/zlib_tech.html says that the maximum expansion
+         from compression, with worst-case parameters, is 13.5% plus 11 bytes.
+         This code checks for an expansion of more than 14.3% plus 11
+         bytes.  */
+      if (compressed_size > uncompressed_size + uncompressed_size / 7 + 11)
+        {
+          sys_error (r, desc_ofs,
+                     _("ZLIB block descriptor %u reports compressed size %u "
+                       "and uncompressed size %u."),
+                     i, compressed_size, uncompressed_size);
+          return false;
+        }
+
+      expected_uncmp_ofs += uncompressed_size;
+      expected_cmp_ofs += compressed_size;
+    }
+
+  if (expected_cmp_ofs != r->ztrailer_ofs)
+    {
+      sys_error (r, r->pos, _("ZLIB trailer is at offset %#llx but %#llx "
+                              "would be expected from block descriptors."),
+                 r->ztrailer_ofs, expected_cmp_ofs);
+      return false;
+    }
+
+  seek (r, zheader_ofs + 24);
+  return true;
+}
+
+static bool
+open_zstream (struct sfm_reader *r)
+{
+  int error;
+
+  r->zout_pos = r->zout_end = 0;
+  error = inflateInit (&r->zstream);
+  if (error != Z_OK)
+    {
+      sys_error (r, r->pos, _("ZLIB initialization failed (%s)."),
+                 r->zstream.msg);
+      return false;
+    }
+  return true;
+}
+
+static bool
+close_zstream (struct sfm_reader *r)
+{
+  int error;
+
+  error = inflateEnd (&r->zstream);
+  if (error != Z_OK)
+    {
+      sys_error (r, r->pos, _("Inconsistency at end of ZLIB stream (%s)."),
+                 r->zstream.msg);
+      return false;
+    }
+  return true;
+}
+
+static int
+read_bytes_zlib (struct sfm_reader *r, void *buf_, size_t byte_cnt)
+{
+  uint8_t *buf = buf_;
+
+  if (byte_cnt == 0)
+    return 1;
+
+  for (;;)
+    {
+      int error;
+
+      /* Use already inflated data if there is any. */
+      if (r->zout_pos < r->zout_end)
+        {
+          unsigned int n = MIN (byte_cnt, r->zout_end - r->zout_pos);
+          memcpy (buf, &r->zout_buf[r->zout_pos], n);
+          r->zout_pos += n;
+          byte_cnt -= n;
+          buf += n;
+
+          if (byte_cnt == 0)
+            return 1;
+        }
+
+      /* We need to inflate some more data.
+         Get some more input data if we don't have any. */
+      if (r->zstream.avail_in == 0)
+        {
+          unsigned int n = MIN (ZIN_BUF_SIZE, r->ztrailer_ofs - r->pos);
+          if (n == 0)
+            return 0;
+          else
+            {
+              int retval = try_read_bytes (r, r->zin_buf, n);
+              if (retval != 1)
+                return retval;
+              r->zstream.avail_in = n;
+              r->zstream.next_in = r->zin_buf;
+            }
+        }
+
+      /* Inflate the (remaining) input data. */
+      r->zstream.avail_out = ZOUT_BUF_SIZE;
+      r->zstream.next_out = r->zout_buf;
+      error = inflate (&r->zstream, Z_SYNC_FLUSH);
+      r->zout_pos = 0;
+      r->zout_end = r->zstream.next_out - r->zout_buf;
+      if (r->zout_end == 0)
+        {
+          if (error != Z_STREAM_END)
+            {
+              sys_error (r, r->pos, _("ZLIB stream inconsistency (%s)."),
+                         r->zstream.msg);
+              return -1;
+            }
+          else if (!close_zstream (r) || !open_zstream (r))
+            return -1;
+        }
+      else
+        {
+          /* Process the output data and ignore 'error' for now.  ZLIB will
+             present it to us again on the next inflate() call. */
+        }
+    }
+}
+
+static int
+read_compressed_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
+{
+  if (r->compression == ANY_COMP_SIMPLE)
+    return read_bytes (r, buf, byte_cnt);
    else
-    x = integer_get (r->integer_format, int32, sizeof x);
-  return x;
+    {
+      int retval = read_bytes_zlib (r, buf, byte_cnt);
+      if (retval == 0)
+        sys_error (r, r->pos, _("Unexpected end of ZLIB compressed data."));
+      return retval;
+    }
  }
  
-/* Returns the value of the 64-bit floating point number at
-   FLT64, converted from the format used by R to the host
-   format. */
-static double
-flt64_to_double (const struct sfm_reader *r, const uint8_t flt64[8])
+static int
+try_read_compressed_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
  {
-  double x;
-  if (r->float_format == FLOAT_NATIVE_DOUBLE)
-    memcpy (&x, flt64, sizeof x);
+  if (r->compression == ANY_COMP_SIMPLE)
+    return try_read_bytes (r, buf, byte_cnt);
    else
-    float_convert (r->float_format, flt64, FLOAT_NATIVE_DOUBLE, &x);
-  return x;
+    return read_bytes_zlib (r, buf, byte_cnt);
+}
+
+/* Reads a 64-bit floating-point number from R and returns its
+   value in host format. */
+static bool
+read_compressed_float (struct sfm_reader *r, double *d)
+{
+  uint8_t number[8];
+
+  if (!read_compressed_bytes (r, number, sizeof number))
+    return false;
+
+  *d = float_get_double (r->float_format, number);
+  return true;
  }
  \f
-static struct casereader_class sys_file_casereader_class =
+static const struct casereader_class sys_file_casereader_class =
    {
      sys_file_casereader_read,
      sys_file_casereader_destroy,
      NULL,
      NULL,
    };
+
+const struct any_reader_class sys_file_reader_class =
+  {
+    N_("SPSS System File"),
+    sfm_detect,
+    sfm_open,
+    sfm_close,
+    sfm_decode,
+    sfm_get_strings,
+  };