lexer: Remove unused function lex_put_back_id().

[pspp-builds.git] / src / data / sys-file-reader.c
diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c

index 9589747ea0bc2775d98c20372a804ea9a2ff2214..c9c843dfaf4dd83cee88f7dd510284fa37e516a7 100644 (file)
--- a/src/data/sys-file-reader.c
+++ b/src/data/sys-file-reader.c
@@ -1,25 +1,23 @@
-/* PSPP - computes sample statistics.
-   Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
  
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
  
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
  
     You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-   02110-1301, USA. */
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  
  #include <config.h>
  
-#include <data/sys-file-reader.h>
-#include <data/sys-file-private.h>
+#include "data/sys-file-reader.h"
+#include "data/sys-file-private.h"
  
  #include <errno.h>
  #include <float.h>
@@ -27,34 +25,36 @@
  #include <setjmp.h>
  #include <stdlib.h>
  
-#include <libpspp/alloc.h>
-#include <libpspp/assertion.h>
-#include <libpspp/message.h>
-#include <libpspp/compiler.h>
-#include <libpspp/magic.h>
-#include <libpspp/misc.h>
-#include <libpspp/pool.h>
-#include <libpspp/str.h>
-#include <libpspp/hash.h>
-#include <libpspp/array.h>
-
-#include <data/case.h>
-#include <data/casereader-provider.h>
-#include <data/casereader.h>
-#include <data/dictionary.h>
-#include <data/file-handle-def.h>
-#include <data/file-name.h>
-#include <data/format.h>
-#include <data/missing-values.h>
-#include <data/value-labels.h>
-#include <data/variable.h>
-#include <data/value.h>
-
-#include "c-ctype.h"
-#include "inttostr.h"
-#include "minmax.h"
-#include "unlocked-io.h"
-#include "xsize.h"
+#include "data/attributes.h"
+#include "data/case.h"
+#include "data/casereader-provider.h"
+#include "data/casereader.h"
+#include "data/dictionary.h"
+#include "data/file-handle-def.h"
+#include "data/file-name.h"
+#include "data/format.h"
+#include "data/missing-values.h"
+#include "data/mrset.h"
+#include "data/short-names.h"
+#include "data/value-labels.h"
+#include "data/value.h"
+#include "data/variable.h"
+#include "libpspp/array.h"
+#include "libpspp/assertion.h"
+#include "libpspp/compiler.h"
+#include "libpspp/i18n.h"
+#include "libpspp/message.h"
+#include "libpspp/misc.h"
+#include "libpspp/pool.h"
+#include "libpspp/str.h"
+#include "libpspp/stringi-set.h"
+
+#include "gl/c-ctype.h"
+#include "gl/inttostr.h"
+#include "gl/minmax.h"
+#include "gl/unlocked-io.h"
+#include "gl/xalloc.h"
+#include "gl/xsize.h"
  
  #include "gettext.h"
  #define _(msgid) gettext (msgid)
@@ -69,34 +69,29 @@ struct sfm_reader
  
      /* File state. */
      struct file_handle *fh;     /* File handle. */
+    struct fh_lock *lock;       /* Mutual exclusion for file handle. */
      FILE *file;                 /* File stream. */
      bool error;                 /* I/O or corruption error? */
-    size_t value_cnt;           /* Number of "union value"s in struct case. */
+    struct caseproto *proto;    /* Format of output cases. */
  
      /* File format. */
      enum integer_format integer_format; /* On-disk integer format. */
      enum float_format float_format; /* On-disk floating point format. */
-    int flt64_cnt;             /* Number of 8-byte units per case. */
-    struct sfm_var *vars;       /* Variables. */
-    size_t var_cnt;             /* Number of variables. */
+    int oct_cnt;               /* Number of 8-byte units per case. */
+    struct sfm_var *sfm_vars;   /* Variables. */
+    size_t sfm_var_cnt;         /* Number of variables. */
+    casenumber case_cnt;        /* Number of cases */
      bool has_long_var_names;    /* File has a long variable name map */
-    bool has_vls;               /* File has one or more very long strings? */
  
      /* Decompression. */
      bool compressed;           /* File is compressed? */
      double bias;               /* Compression bias, usually 100.0. */
      uint8_t opcodes[8];         /* Current block of opcodes. */
      size_t opcode_idx;          /* Next opcode to interpret, 8 if none left. */
+    bool corruption_warning;    /* Warned about possible corruption? */
    };
  
-/* A variable in a system file. */
-struct sfm_var 
-  {
-    int width;                  /* 0=numeric, otherwise string width. */
-    int case_index;             /* Index into case. */
-  };
-
-static struct casereader_class sys_file_casereader_class;
+static const struct casereader_class sys_file_casereader_class;
  
  static bool close_reader (struct sfm_reader *);
  
@@ -105,50 +100,66 @@ static struct variable **make_var_by_value_idx (struct sfm_reader *,
  static struct variable *lookup_var_by_value_idx (struct sfm_reader *,
                                                   struct variable **,
                                                   int value_idx);
+static struct variable *lookup_var_by_short_name (struct dictionary *,
+                                                  const char *short_name);
  
+static void sys_msg (struct sfm_reader *r, int class,
+                     const char *format, va_list args)
+     PRINTF_FORMAT (3, 0);
  static void sys_warn (struct sfm_reader *, const char *, ...)
       PRINTF_FORMAT (2, 3);
-
  static void sys_error (struct sfm_reader *, const char *, ...)
       PRINTF_FORMAT (2, 3)
       NO_RETURN;
  
  static void read_bytes (struct sfm_reader *, void *, size_t);
  static bool try_read_bytes (struct sfm_reader *, void *, size_t);
-static int32_t read_int32 (struct sfm_reader *);
-static double read_flt64 (struct sfm_reader *);
+static int read_int (struct sfm_reader *);
+static double read_float (struct sfm_reader *);
  static void read_string (struct sfm_reader *, char *, size_t);
  static void skip_bytes (struct sfm_reader *, size_t);
  
-static int32_t int32_to_native (const struct sfm_reader *, const uint8_t[4]);
-static double flt64_to_double (const struct sfm_reader *, const uint8_t[8]);
-
-static struct variable_to_value_map *open_variable_to_value_map (
-  struct sfm_reader *, size_t size);
-static void close_variable_to_value_map (struct sfm_reader *r,
-                                         struct variable_to_value_map *);
-static bool read_variable_to_value_map (struct sfm_reader *,
-                                        struct dictionary *,
-                                        struct variable_to_value_map *,
-                                        struct variable **var, char **value,
-                                        int *warning_cnt);
+static struct text_record *open_text_record (struct sfm_reader *, size_t size);
+static void close_text_record (struct sfm_reader *r,
+                               struct text_record *);
+static bool read_variable_to_value_pair (struct sfm_reader *,
+                                         struct dictionary *,
+                                         struct text_record *,
+                                         struct variable **var, char **value);
+static void text_warn (struct sfm_reader *r, struct text_record *text,
+                       const char *format, ...)
+  PRINTF_FORMAT (3, 4);
+static char *text_get_token (struct text_record *,
+                             struct substring delimiters, char *delimiter);
+static bool text_match (struct text_record *, char c);
+static bool text_read_variable_name (struct sfm_reader *, struct dictionary *,
+                                     struct text_record *,
+                                     struct substring delimiters,
+                                     struct variable **);
+static bool text_read_short_name (struct sfm_reader *, struct dictionary *,
+                                  struct text_record *,
+                                  struct substring delimiters,
+                                  struct variable **);
+static const char *text_parse_counted_string (struct sfm_reader *,
+                                              struct text_record *);
+static size_t text_pos (const struct text_record *);
  
  static bool close_reader (struct sfm_reader *r);
  \f
  /* Dictionary reader. */
  
-enum which_format 
+enum which_format
    {
      PRINT_FORMAT,
      WRITE_FORMAT
    };
  
  static void read_header (struct sfm_reader *, struct dictionary *,
-                         int *weight_idx, int *claimed_flt64_cnt,
+                         int *weight_idx, int *claimed_oct_cnt,
                           struct sfm_read_info *);
  static void read_variable_record (struct sfm_reader *, struct dictionary *,
                                    int *format_warning_cnt);
-static void parse_format_spec (struct sfm_reader *, uint32_t,
+static void parse_format_spec (struct sfm_reader *, unsigned int,
                                 enum which_format, struct variable *,
                                 int *format_warning_cnt);
  static void setup_weight (struct sfm_reader *, int weight_idx,
@@ -158,11 +169,17 @@ static void read_documents (struct sfm_reader *, struct dictionary *);
  static void read_value_labels (struct sfm_reader *, struct dictionary *,
                                 struct variable **var_by_value_idx);
  
-static void read_extension_record (struct sfm_reader *, struct dictionary *);
-static void read_machine_int32_info (struct sfm_reader *,
-                                     size_t size, size_t count);
-static void read_machine_flt64_info (struct sfm_reader *,
+static void read_extension_record (struct sfm_reader *, struct dictionary *,
+                                   struct sfm_read_info *);
+static void read_machine_integer_info (struct sfm_reader *,
+                                       size_t size, size_t count,
+                                       struct sfm_read_info *,
+                                      struct dictionary *
+                                      );
+static void read_machine_float_info (struct sfm_reader *,
                                       size_t size, size_t count);
+static void read_mrsets (struct sfm_reader *, size_t size, size_t count,
+                         struct dictionary *);
  static void read_display_parameters (struct sfm_reader *,
                                       size_t size, size_t count,
                                       struct dictionary *);
@@ -172,7 +189,71 @@ static void read_long_var_name_map (struct sfm_reader *,
  static void read_long_string_map (struct sfm_reader *,
                                    size_t size, size_t count,
                                    struct dictionary *);
+static void read_data_file_attributes (struct sfm_reader *,
+                                       size_t size, size_t count,
+                                       struct dictionary *);
+static void read_variable_attributes (struct sfm_reader *,
+                                      size_t size, size_t count,
+                                      struct dictionary *);
+static void read_long_string_value_labels (struct sfm_reader *,
+                                          size_t size, size_t count,
+                                          struct dictionary *);
+
+/* Convert all the strings in DICT from the dict encoding to UTF8 */
+static void
+recode_strings (struct dictionary *dict)
+{
+  int i;
+
+  const char *enc = dict_get_encoding (dict);
  
+  if ( NULL == enc)
+    enc = get_default_encoding ();
+
+  for (i = 0 ; i < dict_get_var_cnt (dict); ++i)
+    {
+      /* Convert the long variable name */
+      struct variable *var = dict_get_var (dict, i);
+      const char *native_name = var_get_name (var);
+      char *utf8_name = recode_string (UTF8, enc, native_name, -1);
+      if ( 0 != strcmp (utf8_name, native_name))
+       {
+         if ( NULL == dict_lookup_var (dict, utf8_name))
+           dict_rename_var (dict, var, utf8_name);
+         else
+           msg (MW,
+            _("Recoded variable name duplicates an existing `%s' within system file."), utf8_name);
+    }
+
+      free (utf8_name);
+
+      /* Convert the variable label */
+      if (var_has_label (var))
+       {
+         char *utf8_label = recode_string (UTF8, enc, var_get_label (var), -1);
+         var_set_label (var, utf8_label);
+         free (utf8_label);
+       }
+
+      if (var_has_value_labels (var))
+       {
+         const struct val_lab *vl = NULL;
+         const struct val_labs *vlabs = var_get_value_labels (var);
+
+         for (vl = val_labs_first (vlabs); vl != NULL; vl = val_labs_next (vlabs, vl))
+           {
+             const union value *val = val_lab_get_value (vl);
+             const char *label = val_lab_get_label (vl);
+             char *new_label = NULL;
+
+             new_label = recode_string (UTF8, enc, label, -1);
+
+             var_replace_value_label (var, val, new_label);
+             free (new_label);
+           }
+       }
+    }
+}
  
  /* Opens the system file designated by file handle FH for
     reading.  Reads the system file's dictionary into *DICT.
@@ -180,55 +261,61 @@ static void read_long_string_map (struct sfm_reader *,
     system file. */
  struct casereader *
  sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
-                 struct sfm_read_info *info)
+                 struct sfm_read_info *volatile info)
  {
    struct sfm_reader *volatile r = NULL;
    struct variable **var_by_value_idx;
+  struct sfm_read_info local_info;
    int format_warning_cnt = 0;
    int weight_idx;
-  int claimed_flt64_cnt;
+  int claimed_oct_cnt;
    int rec_type;
-  size_t i;
-
-  if (!fh_open (fh, FH_REF_FILE, "system file", "rs"))
-    return NULL;
  
    *dict = dict_create ();
  
    /* Create and initialize reader. */
    r = pool_create_container (struct sfm_reader, pool);
-  r->fh = fh;
-  r->file = fn_open (fh_get_file_name (fh), "rb");
+  r->fh = fh_ref (fh);
+  r->lock = NULL;
+  r->file = NULL;
    r->error = false;
-  r->flt64_cnt = 0;
-  r->has_vls = false;
+  r->oct_cnt = 0;
    r->has_long_var_names = false;
    r->opcode_idx = sizeof r->opcodes;
+  r->corruption_warning = false;
  
-  if (setjmp (r->bail_out)) 
-    {
-      close_reader (r);
-      dict_destroy (*dict);
-      *dict = NULL;
-      return NULL;
-    }
+  /* TRANSLATORS: this fragment will be interpolated into
+     messages in fh_lock() that identify types of files. */
+  r->lock = fh_lock (fh, FH_REF_FILE, N_("system file"), FH_ACC_READ, false);
+  if (r->lock == NULL)
+    goto error;
  
+  r->file = fn_open (fh_get_file_name (fh), "rb");
    if (r->file == NULL)
      {
-      msg (ME, _("Error opening \"%s\" for reading as a system file: %s."),
+      msg (ME, _("Error opening `%s' for reading as a system file: %s."),
             fh_get_file_name (r->fh), strerror (errno));
-      longjmp (r->bail_out, 1);
+      goto error;
      }
  
+  /* Initialize info. */
+  if (info == NULL)
+    info = &local_info;
+  memset (info, 0, sizeof *info);
+
+  if (setjmp (r->bail_out))
+    goto error;
+
+
    /* Read header. */
-  read_header (r, *dict, &weight_idx, &claimed_flt64_cnt, info);
+  read_header (r, *dict, &weight_idx, &claimed_oct_cnt, info);
  
    /* Read all the variable definition records. */
-  rec_type = read_int32 (r);
+  rec_type = read_int (r);
    while (rec_type == 2)
      {
-      read_variable_record (r, *dict, &format_warning_cnt); 
-      rec_type = read_int32 (r);
+      read_variable_record (r, *dict, &format_warning_cnt);
+      rec_type = read_int (r);
      }
  
    /* Figure out the case format. */
@@ -236,7 +323,7 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
    setup_weight (r, weight_idx, var_by_value_idx, *dict);
  
    /* Read all the rest of the dictionary records. */
-  while (rec_type != 999) 
+  while (rec_type != 999)
      {
        switch (rec_type)
          {
@@ -252,13 +339,13 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
            break;
  
          case 7:
-          read_extension_record (r, *dict);
+          read_extension_record (r, *dict, info);
            break;
  
          default:
            sys_error (r, _("Unrecognized record type %d."), rec_type);
          }
-      rec_type = read_int32 (r);
+      rec_type = read_int (r);
      }
  
  
@@ -268,8 +355,8 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
        for (i = 0; i < dict_get_var_cnt (*dict); i++)
         {
           struct variable *var = dict_get_var (*dict, i);
-         char short_name [SHORT_NAME_LEN + 1];
-         char long_name [SHORT_NAME_LEN + 1];
+         char short_name[SHORT_NAME_LEN + 1];
+         char long_name[SHORT_NAME_LEN + 1];
  
           strcpy (short_name, var_get_name (var));
  
@@ -280,38 +367,46 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
              name, but we want to retain it, so re-set it
              explicitly. */
           dict_rename_var (*dict, var, long_name);
-         var_set_short_name (var, short_name);
+         var_set_short_name (var, 0, short_name);
         }
  
        r->has_long_var_names = true;
      }
  
-  /* Read record 999 data, which is just filler. */
-  read_int32 (r);
+  recode_strings (*dict);
  
-  if (claimed_flt64_cnt != -1 && claimed_flt64_cnt != r->flt64_cnt)
+  /* Read record 999 data, which is just filler. */
+  read_int (r);
+
+  /* Warn if the actual amount of data per case differs from the
+     amount that the header claims.  SPSS version 13 gets this
+     wrong when very long strings are involved, so don't warn in
+     that case. */
+  if (claimed_oct_cnt != -1 && claimed_oct_cnt != r->oct_cnt
+      && info->version_major != 13)
      sys_warn (r, _("File header claims %d variable positions but "
                     "%d were read from file."),
-              claimed_flt64_cnt, r->flt64_cnt);
+              claimed_oct_cnt, r->oct_cnt);
  
    /* Create an index of dictionary variable widths for
       sfm_read_case to use.  We cannot use the `struct variable's
       from the dictionary we created, because the caller owns the
       dictionary and may destroy or modify its variables. */
-  r->var_cnt = dict_get_var_cnt (*dict);
-  r->vars = pool_nalloc (r->pool, r->var_cnt, sizeof *r->vars);
-  for (i = 0; i < r->var_cnt; i++) 
-    {
-      struct variable *v = dict_get_var (*dict, i);
-      struct sfm_var *sv = &r->vars[i];
-      sv->width = var_get_width (v);
-      sv->case_index = var_get_case_index (v); 
-    }
+  sfm_dictionary_to_sfm_vars (*dict, &r->sfm_vars, &r->sfm_var_cnt);
+  pool_register (r->pool, free, r->sfm_vars);
+  r->proto = caseproto_ref_pool (dict_get_proto (*dict), r->pool);
  
    pool_free (r->pool, var_by_value_idx);
-  r->value_cnt = dict_get_next_value_idx (*dict);
-  return casereader_create_sequential (NULL, r->value_cnt, CASENUMBER_MAX,
+  return casereader_create_sequential
+    (NULL, r->proto,
+     r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt,
                                         &sys_file_casereader_class, r);
+
+error:
+  close_reader (r);
+  dict_destroy (*dict);
+  *dict = NULL;
+  return NULL;
  }
  
  /* Closes a system file after we're done with it.
@@ -327,17 +422,17 @@ close_reader (struct sfm_reader *r)
  
    if (r->file)
      {
-      if (fn_close (fh_get_file_name (r->fh), r->file) == EOF) 
+      if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
          {
-          msg (ME, _("Error closing system file \"%s\": %s."),
+          msg (ME, _("Error closing system file `%s': %s."),
                 fh_get_file_name (r->fh), strerror (errno));
            r->error = true;
          }
        r->file = NULL;
      }
  
-  if (r->fh != NULL)
-    fh_close (r->fh, "system file", "rs");
+  fh_unlock (r->lock);
+  fh_unref (r->fh);
  
    error = r->error;
    pool_destroy (r->pool);
@@ -347,7 +442,7 @@ close_reader (struct sfm_reader *r)
  
  /* Destroys READER. */
  static void
-sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_) 
+sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
  {
    struct sfm_reader *r = r_;
    close_reader (r);
@@ -356,14 +451,14 @@ sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
  /* Returns true if FILE is an SPSS system file,
     false otherwise. */
  bool
-sfm_detect (FILE *file) 
+sfm_detect (FILE *file)
  {
    char rec_type[5];
  
    if (fread (rec_type, 4, 1, file) != 1)
      return false;
    rec_type[4] = '\0';
-  
+
    return !strcmp ("$FL2", rec_type);
  }
  \f
@@ -371,28 +466,28 @@ sfm_detect (FILE *file)
     Sets DICT's file label to the system file's label.
     Sets *WEIGHT_IDX to 0 if the system file is unweighted,
     or to the value index of the weight variable otherwise.
-   Sets *CLAIMED_FLT64_CNT to the number of values that the file
-   claims to have (although it is not always correct).
-   If INFO is non-null, initializes *INFO with header
-   information. */   
+   Sets *CLAIMED_OCT_CNT to the number of "octs" (8-byte units)
+   per case that the file claims to have (although it is not
+   always correct).
+   Initializes INFO with header information. */
  static void
  read_header (struct sfm_reader *r, struct dictionary *dict,
-             int *weight_idx, int *claimed_flt64_cnt,
+             int *weight_idx, int *claimed_oct_cnt,
               struct sfm_read_info *info)
  {
    char rec_type[5];
    char eye_catcher[61];
    uint8_t raw_layout_code[4];
-  int case_cnt;
    uint8_t raw_bias[8];
    char creation_date[10];
    char creation_time[9];
    char file_label[65];
    struct substring file_label_ss;
+  struct substring product;
  
    read_string (r, rec_type, sizeof rec_type);
    read_string (r, eye_catcher, sizeof eye_catcher);
-  
+
    if (strcmp ("$FL2", rec_type) != 0)
      sys_error (r, _("This is not an SPSS system file."));
  
@@ -406,26 +501,38 @@ read_header (struct sfm_reader *r, struct dictionary *dict,
            && r->integer_format != INTEGER_LSB_FIRST))
      sys_error (r, _("This is not an SPSS system file."));
  
-  *claimed_flt64_cnt = read_int32 (r);
-  if (*claimed_flt64_cnt < 0 || *claimed_flt64_cnt > INT_MAX / 16)
-    *claimed_flt64_cnt = -1;
+  *claimed_oct_cnt = read_int (r);
+  if (*claimed_oct_cnt < 0 || *claimed_oct_cnt > INT_MAX / 16)
+    *claimed_oct_cnt = -1;
  
-  r->compressed = read_int32 (r) != 0;
+  r->compressed = read_int (r) != 0;
  
-  *weight_idx = read_int32 (r);
+  *weight_idx = read_int (r);
+
+  r->case_cnt = read_int (r);
+  if ( r->case_cnt > INT_MAX / 2)
+    r->case_cnt = -1;
  
-  case_cnt = read_int32 (r);
-  if (case_cnt < -1 || case_cnt > INT_MAX / 2)
-    case_cnt = -1;
  
    /* Identify floating-point format and obtain compression bias. */
    read_bytes (r, raw_bias, sizeof raw_bias);
    if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0)
      {
-      sys_warn (r, _("Compression bias (%g) is not the usual "
-                     "value of 100, or system file uses unrecognized "
-                     "floating-point format."),
-                r->bias);
+      uint8_t zero_bias[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+
+      if (memcmp (raw_bias, zero_bias, 8))
+        sys_warn (r, _("Compression bias is not the usual "
+                       "value of 100, or system file uses unrecognized "
+                       "floating-point format."));
+      else
+        {
+          /* Some software is known to write all-zeros to this
+             field.  Such software also writes floating-point
+             numbers in the format that we expect by default
+             (it seems that all software most likely does, in
+             reality), so don't warn in this case. */
+        }
+
        if (r->integer_format == INTEGER_MSB_FIRST)
          r->float_format = FLOAT_IEEE_DOUBLE_BE;
        else
@@ -437,32 +544,27 @@ read_header (struct sfm_reader *r, struct dictionary *dict,
    read_string (r, creation_time, sizeof creation_time);
    read_string (r, file_label, sizeof file_label);
    skip_bytes (r, 3);
-  
+
    file_label_ss = ss_cstr (file_label);
    ss_trim (&file_label_ss, ss_cstr (" "));
-  if (!ss_is_empty (file_label_ss)) 
+  if (!ss_is_empty (file_label_ss))
      {
        ss_data (file_label_ss)[ss_length (file_label_ss)] = '\0';
        dict_set_label (dict, ss_data (file_label_ss));
      }
  
-  if (info)
-    {
-      struct substring product;
-
-      strcpy (info->creation_date, creation_date);
-      strcpy (info->creation_time, creation_time);
-      info->integer_format = r->integer_format;
-      info->float_format = r->float_format;
-      info->compressed = r->compressed;
-      info->case_cnt = case_cnt;
-
-      product = ss_cstr (eye_catcher);
-      ss_match_string (&product, ss_cstr ("@(#) SPSS DATA FILE"));
-      ss_trim (&product, ss_cstr (" "));
-      str_copy_buf_trunc (info->product, sizeof info->product,
-                          ss_data (product), ss_length (product));
-    }
+  strcpy (info->creation_date, creation_date);
+  strcpy (info->creation_time, creation_time);
+  info->integer_format = r->integer_format;
+  info->float_format = r->float_format;
+  info->compressed = r->compressed;
+  info->case_cnt = r->case_cnt;
+
+  product = ss_cstr (eye_catcher);
+  ss_match_string (&product, ss_cstr ("@(#) SPSS DATA FILE"));
+  ss_trim (&product, ss_cstr (" "));
+  str_copy_buf_trunc (info->product, sizeof info->product,
+                      ss_data (product), ss_length (product));
  }
  
  /* Reads a variable (type 2) record from R and adds the
@@ -483,98 +585,97 @@ read_variable_record (struct sfm_reader *r, struct dictionary *dict,
    struct variable *var;
    int nv;
  
-  width = read_int32 (r);
-  has_variable_label = read_int32 (r);
-  missing_value_code = read_int32 (r);
-  print_format = read_int32 (r);
-  write_format = read_int32 (r);
+  width = read_int (r);
+  has_variable_label = read_int (r);
+  missing_value_code = read_int (r);
+  print_format = read_int (r);
+  write_format = read_int (r);
    read_string (r, name, sizeof name);
    name[strcspn (name, " ")] = '\0';
  
    /* Check variable name. */
    if (name[0] == '$' || name[0] == '#')
-    sys_error (r, "Variable name begins with invalid character `%c'.",
+    sys_error (r, _("Variable name begins with invalid character `%c'."),
                 name[0]);
    if (!var_is_plausible_name (name, false))
      sys_error (r, _("Invalid variable name `%s'."), name);
  
    /* Create variable. */
    if (width < 0 || width > 255)
-    sys_error (r, _("Bad variable width %d."), width);
+    sys_error (r, _("Bad width %d for variable %s."), width, name);
    var = dict_create_var (dict, name, width);
-  if (var == NULL) 
+  if (var == NULL)
      sys_error (r,
                 _("Duplicate variable name `%s' within system file."),
                 name);
  
-  /* Set the short name the same as the long name */
-  var_set_short_name (var, var_get_name (var));
+  /* Set the short name the same as the long name. */
+  var_set_short_name (var, 0, var_get_name (var));
  
    /* Get variable label, if any. */
    if (has_variable_label != 0 && has_variable_label != 1)
      sys_error (r, _("Variable label indicator field is not 0 or 1."));
    if (has_variable_label == 1)
      {
-      size_t len;
+      size_t len, read_len;
        char label[255 + 1];
  
-      len = read_int32 (r);
-      if (len >= sizeof label)
-        sys_error (r, _("Variable %s has label of invalid length %u."),
-                   name, (unsigned int) len);
-      read_string (r, label, len + 1);
+      len = read_int (r);
+
+      /* Read up to 255 bytes of label. */
+      read_len = MIN (sizeof label - 1, len);
+      read_string (r, label, read_len + 1);
        var_set_label (var, label);
-      
+
+      /* Skip unread label bytes. */
+      skip_bytes (r, len - read_len);
+
+      /* Skip label padding up to multiple of 4 bytes. */
        skip_bytes (r, ROUND_UP (len, 4) - len);
      }
  
    /* Set missing values. */
-  if (missing_value_code < -3 || missing_value_code > 3
-      || missing_value_code == -1)
-    sys_error (r, _("Missing value indicator field is not "
-                    "-3, -2, 0, 1, 2, or 3."));
    if (missing_value_code != 0)
      {
        struct missing_values mv;
-      mv_init (&mv, var_get_width (var));
-      if (var_is_numeric (var)) 
+      int i;
+
+      mv_init_pool (r->pool, &mv, var_get_width (var));
+      if (var_is_numeric (var))
          {
-          if (missing_value_code > 0)
-            {
-              int i;
-              for (i = 0; i < missing_value_code; i++)
-                mv_add_num (&mv, read_flt64 (r));
-            }
-          else
+          if (missing_value_code < -3 || missing_value_code > 3
+              || missing_value_code == -1)
+            sys_error (r, _("Numeric missing value indicator field is not "
+                            "-3, -2, 0, 1, 2, or 3."));
+          if (missing_value_code < 0)
              {
-              double low = read_flt64 (r);
-              double high = read_flt64 (r);
-              mv_add_num_range (&mv, low, high);
-              if (missing_value_code == -3)
-                mv_add_num (&mv, read_flt64 (r));
+              double low = read_float (r);
+              double high = read_float (r);
+              mv_add_range (&mv, low, high);
+              missing_value_code = -missing_value_code - 2;
              }
+          for (i = 0; i < missing_value_code; i++)
+            mv_add_num (&mv, read_float (r));
          }
-      else if (var_get_width (var) <= MAX_SHORT_STRING)
+      else
          {
-          if (missing_value_code > 0)
+          int mv_width = MAX (width, 8);
+          union value value;
+
+          if (missing_value_code < 1 || missing_value_code > 3)
+            sys_error (r, _("String missing value indicator field is not "
+                            "0, 1, 2, or 3."));
+
+          value_init (&value, mv_width);
+          value_set_missing (&value, mv_width);
+          for (i = 0; i < missing_value_code; i++)
              {
-              int i;
-              for (i = 0; i < missing_value_code; i++)
-                {
-                  char string[9];
-                  read_string (r, string, sizeof string);
-                  mv_add_str (&mv, string); 
-                }
+              uint8_t *s = value_str_rw (&value, mv_width);
+              read_bytes (r, s, 8);
+              mv_add_str (&mv, s);
              }
-          else 
-            sys_error (r, _("String variable %s may not have missing "
-                            "values specified as a range."),
-                       name);
+          value_destroy (&value, mv_width);
          }
-      else /* var->width > MAX_SHORT_STRING */
-        sys_error (r, _("Long string variable %s may not have missing "
-                        "values."),
-                   name);
        var_set_missing_values (var, &mv);
      }
  
@@ -585,7 +686,7 @@ read_variable_record (struct sfm_reader *r, struct dictionary *dict,
    /* Account for values.
       Skip long string continuation records, if any. */
    nv = width == 0 ? 1 : DIV_RND_UP (width, 8);
-  r->flt64_cnt += nv;
+  r->oct_cnt += nv;
    if (width > 8)
      {
        int i;
@@ -593,21 +694,21 @@ read_variable_record (struct sfm_reader *r, struct dictionary *dict,
        for (i = 1; i < nv; i++)
          {
            /* Check for record type 2 and width -1. */
-          if (read_int32 (r) != 2 || read_int32 (r) != -1)
+          if (read_int (r) != 2 || read_int (r) != -1)
              sys_error (r, _("Missing string continuation record."));
  
            /* Skip and ignore remaining continuation data. */
-          has_variable_label = read_int32 (r);
-          missing_value_code = read_int32 (r);
-          print_format = read_int32 (r);
-          write_format = read_int32 (r);
+          has_variable_label = read_int (r);
+          missing_value_code = read_int (r);
+          print_format = read_int (r);
+          write_format = read_int (r);
            read_string (r, name, sizeof name);
  
            /* Variable label fields on continuation records have
               been spotted in system files created by "SPSS Power
               Macintosh Release 6.1". */
-          if (has_variable_label) 
-            skip_bytes (r, ROUND_UP (read_int32 (r), 4));
+          if (has_variable_label)
+            skip_bytes (r, ROUND_UP (read_int (r), 4));
          }
      }
  }
@@ -615,7 +716,7 @@ read_variable_record (struct sfm_reader *r, struct dictionary *dict,
  /* Translates the format spec from sysfile format to internal
     format. */
  static void
-parse_format_spec (struct sfm_reader *r, uint32_t s,
+parse_format_spec (struct sfm_reader *r, unsigned int s,
                     enum which_format which, struct variable *v,
                     int *format_warning_cnt)
  {
@@ -624,19 +725,19 @@ parse_format_spec (struct sfm_reader *r, uint32_t s,
    uint8_t raw_type = s >> 16;
    uint8_t w = s >> 8;
    uint8_t d = s;
-  
+
    bool ok;
-  
+
    if (!fmt_from_io (raw_type, &f.type))
-    sys_error (r, _("Unknown variable format %d."), (int) raw_type);
+    sys_error (r, _("Unknown variable format %"PRIu8"."), raw_type);
    f.w = w;
    f.d = d;
  
    msg_disable ();
    ok = fmt_check_output (&f) && fmt_check_width_compat (&f, var_get_width (v));
    msg_enable ();
-  
-  if (ok) 
+
+  if (ok)
      {
        if (which == PRINT_FORMAT)
          var_set_print_format (v, &f);
@@ -662,7 +763,7 @@ parse_format_spec (struct sfm_reader *r, uint32_t s,
     nonzero. */
  static void
  setup_weight (struct sfm_reader *r, int weight_idx,
-              struct variable **var_by_value_idx, struct dictionary *dict) 
+              struct variable **var_by_value_idx, struct dictionary *dict)
  {
    if (weight_idx != 0)
      {
@@ -687,7 +788,7 @@ read_documents (struct sfm_reader *r, struct dictionary *dict)
    if (dict_get_documents (dict) != NULL)
      sys_error (r, _("Multiple type 6 (document) records."));
  
-  line_cnt = read_int32 (r);
+  line_cnt = read_int (r);
    if (line_cnt <= 0)
      sys_error (r, _("Number of document lines (%d) "
                      "must be greater than 0."), line_cnt);
@@ -703,11 +804,12 @@ read_documents (struct sfm_reader *r, struct dictionary *dict)
  
  /* Read a type 7 extension record. */
  static void
-read_extension_record (struct sfm_reader *r, struct dictionary *dict)
+read_extension_record (struct sfm_reader *r, struct dictionary *dict,
+                       struct sfm_read_info *info)
  {
-  int subtype = read_int32 (r);
-  size_t size = read_int32 (r);
-  size_t count = read_int32 (r);
+  int subtype = read_int (r);
+  size_t size = read_int (r);
+  size_t count = read_int (r);
    size_t bytes = size * count;
  
    /* Check that SIZE * COUNT + 1 doesn't overflow.  Adding 1
@@ -719,11 +821,11 @@ read_extension_record (struct sfm_reader *r, struct dictionary *dict)
    switch (subtype)
      {
      case 3:
-      read_machine_int32_info (r, size, count);
+      read_machine_integer_info (r, size, count, info, dict);
        return;
  
      case 4:
-      read_machine_flt64_info (r, size, count);
+      read_machine_float_info (r, size, count);
        return;
  
      case 5:
@@ -736,11 +838,16 @@ read_extension_record (struct sfm_reader *r, struct dictionary *dict)
        /* DATE variable information.  We don't use it yet, but we
           should. */
        break;
-               
+
      case 7:
-      /* Unknown purpose. */
+    case 19:
+      read_mrsets (r, size, count, dict);
+      return;
+
+    case 8:
+      /* Used by the SPSS Data Entry software. */
        break;
-      
+
      case 11:
        read_display_parameters (r, size, count, dict);
        return;
@@ -754,16 +861,36 @@ read_extension_record (struct sfm_reader *r, struct dictionary *dict)
        return;
  
      case 16:
-      /* New in SPSS v14?  Unknown purpose.  */
+      /* Extended number of cases.  Not important. */
        break;
  
      case 17:
-      /* Text field that defines variable attributes.  New in
-         SPSS 14. */
-      break;
-      
+      read_data_file_attributes (r, size, count, dict);
+      return;
+
+    case 18:
+      read_variable_attributes (r, size, count, dict);
+      return;
+
+    case 20:
+      /* New in SPSS 16.  Contains a single string that describes
+         the character encoding, e.g. "windows-1252". */
+      {
+       char *encoding = pool_calloc (r->pool, size, count + 1);
+       read_string (r, encoding, count + 1);
+       dict_set_encoding (dict, encoding);
+       return;
+      }
+
+    case 21:
+      /* New in SPSS 16.  Encodes value labels for long string
+         variables. */
+      read_long_string_value_labels (r, size, count, dict);
+      return;
+
      default:
-      sys_warn (r, _("Unrecognized record type 7, subtype %d."), subtype);
+      sys_warn (r, _("Unrecognized record type 7, subtype %d.  Please send a copy of this file, and the syntax which created it to %s"),
+               subtype, PACKAGE_BUGREPORT);
        break;
      }
  
@@ -772,24 +899,31 @@ read_extension_record (struct sfm_reader *r, struct dictionary *dict)
  
  /* Read record type 7, subtype 3. */
  static void
-read_machine_int32_info (struct sfm_reader *r, size_t size, size_t count)
+read_machine_integer_info (struct sfm_reader *r, size_t size, size_t count,
+                           struct sfm_read_info *info,
+                          struct dictionary *dict)
  {
-  int version_major UNUSED = read_int32 (r);
-  int version_minor UNUSED = read_int32 (r);
-  int version_revision UNUSED = read_int32 (r);
-  int machine_code UNUSED = read_int32 (r);
-  int float_representation = read_int32 (r);
-  int compression_code UNUSED = read_int32 (r);
-  int integer_representation = read_int32 (r);
-  int character_code UNUSED = read_int32 (r);
+  int version_major = read_int (r);
+  int version_minor = read_int (r);
+  int version_revision = read_int (r);
+  int machine_code UNUSED = read_int (r);
+  int float_representation = read_int (r);
+  int compression_code UNUSED = read_int (r);
+  int integer_representation = read_int (r);
+  int character_code = read_int (r);
  
    int expected_float_format;
    int expected_integer_format;
  
    if (size != 4 || count != 8)
-    sys_error (r, _("Bad size (%u) or count (%u) field on record type 7, "
+    sys_error (r, _("Bad size (%zu) or count (%zu) field on record type 7, "
                      "subtype 3."),
-               (unsigned int) size, (unsigned int) count);
+                size, count);
+
+  /* Save version info. */
+  info->version_major = version_major;
+  info->version_minor = version_minor;
+  info->version_revision = version_revision;
  
    /* Check floating point format. */
    if (r->float_format == FLOAT_IEEE_DOUBLE_BE
@@ -815,32 +949,233 @@ read_machine_int32_info (struct sfm_reader *r, size_t size, size_t count)
      NOT_REACHED ();
    if (integer_representation != expected_integer_format)
      {
-      static const char *endian[] = {N_("little-endian"), N_("big-endian")};
+      static const char *const endian[] = {N_("Little Endian"), N_("Big Endian")};
        sys_warn (r, _("Integer format indicated by system file (%s) "
                       "differs from expected (%s)."),
                  gettext (endian[integer_representation == 1]),
                  gettext (endian[expected_integer_format == 1]));
      }
+
+
+  /*
+    Record 7 (20) provides a much more reliable way of
+    setting the encoding.
+    The character_code is used as a fallback only.
+  */
+  if ( NULL == dict_get_encoding (dict))
+    {
+      switch (character_code)
+       {
+       case 1:
+         dict_set_encoding (dict, "EBCDIC-US");
+         break;
+       case 2:
+       case 3:
+         /* These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
+            respectively.   However, there are known to be many files
+            in the wild with character code 2, yet have data which are
+            clearly not ascii.
+            Therefore we ignore these values.
+         */
+         return;
+       case 4:
+         dict_set_encoding (dict, "MS_KANJI");
+         break;
+       case 65000:
+         dict_set_encoding (dict, "UTF-7");
+         break;
+       case 65001:
+         dict_set_encoding (dict, "UTF-8");
+         break;
+       default:
+         {
+           char enc[100];
+           snprintf (enc, 100, "CP%d", character_code);
+           dict_set_encoding (dict, enc);
+         }
+         break;
+       };
+    }
  }
  
  /* Read record type 7, subtype 4. */
  static void
-read_machine_flt64_info (struct sfm_reader *r, size_t size, size_t count)
+read_machine_float_info (struct sfm_reader *r, size_t size, size_t count)
  {
-  double sysmis = read_flt64 (r);
-  double highest = read_flt64 (r);
-  double lowest = read_flt64 (r);
+  double sysmis = read_float (r);
+  double highest = read_float (r);
+  double lowest = read_float (r);
  
    if (size != 8 || count != 3)
-    sys_error (r, _("Bad size (%u) or count (%u) on extension 4."),
-               (unsigned int) size, (unsigned int) count);
+    sys_error (r, _("Bad size (%zu) or count (%zu) on extension 4."),
+               size, count);
  
    if (sysmis != SYSMIS)
-    sys_warn (r, _("File specifies unexpected value %g as SYSMIS."), sysmis);
+    sys_warn (r, _("File specifies unexpected value %g as %s."),
+              sysmis, "SYSMIS");
+
    if (highest != HIGHEST)
-    sys_warn (r, _("File specifies unexpected value %g as HIGHEST."), highest);
+    sys_warn (r, _("File specifies unexpected value %g as %s."),
+              highest, "HIGHEST");
+
    if (lowest != LOWEST)
-    sys_warn (r, _("File specifies unexpected value %g as LOWEST."), lowest);
+    sys_warn (r, _("File specifies unexpected value %g as %s."),
+              lowest, "LOWEST");
+}
+
+/* Read record type 7, subtype 7 or 19. */
+static void
+read_mrsets (struct sfm_reader *r, size_t size, size_t count,
+             struct dictionary *dict)
+{
+  struct text_record *text;
+  struct mrset *mrset;
+
+  text = open_text_record (r, size * count);
+  for (;;)
+    {
+      const char *name, *label, *counted;
+      struct stringi_set var_names;
+      size_t allocated_vars;
+      char delimiter;
+      int width;
+
+      mrset = xzalloc (sizeof *mrset);
+
+      name = text_get_token (text, ss_cstr ("="), NULL);
+      if (name == NULL)
+        break;
+      mrset->name = xstrdup (name);
+
+      if (text_match (text, 'C'))
+        {
+          mrset->type = MRSET_MC;
+          if (!text_match (text, ' '))
+            {
+              sys_warn (r, _("Missing space following `%c' at offset %zu "
+                             "in MRSETS record"), 'C', text_pos (text));
+              break;
+            }
+        }
+      else if (text_match (text, 'D'))
+        {
+          mrset->type = MRSET_MD;
+          mrset->cat_source = MRSET_VARLABELS;
+        }
+      else if (text_match (text, 'E'))
+        {
+          char *number;
+
+          mrset->type = MRSET_MD;
+          mrset->cat_source = MRSET_COUNTEDVALUES;
+          if (!text_match (text, ' '))
+            {
+              sys_warn (r, _("Missing space following `%c' at offset %zu "
+                             "in MRSETS record"), 'E',  text_pos (text));
+              break;
+            }
+
+          number = text_get_token (text, ss_cstr (" "), NULL);
+          if (!strcmp (number, "11"))
+            mrset->label_from_var_label = true;
+          else if (strcmp (number, "1"))
+            sys_warn (r, _("Unexpected label source value `%s' "
+                           "following `E' at offset %zu in MRSETS record"),
+                      number, text_pos (text));
+        }
+      else
+        {
+          sys_warn (r, _("Missing `C', `D', or `E' at offset %zu "
+                         "in MRSETS record."),
+                    text_pos (text));
+          break;
+        }
+
+      if (mrset->type == MRSET_MD)
+        {
+          counted = text_parse_counted_string (r, text);
+          if (counted == NULL)
+            break;
+        }
+
+      label = text_parse_counted_string (r, text);
+      if (label == NULL)
+        break;
+      mrset->label = label[0] != '\0' ? xstrdup (label) : NULL;
+
+      stringi_set_init (&var_names);
+      allocated_vars = 0;
+      width = INT_MAX;
+      do
+        {
+          struct variable *var;
+          const char *var_name;
+
+          var_name = text_get_token (text, ss_cstr (" \n"), &delimiter);
+          if (var_name == NULL)
+            {
+              sys_warn (r, _("Missing new-line parsing variable names "
+                             "at offset %zu in MRSETS record."),
+                        text_pos (text));
+              break;
+            }
+
+          var = lookup_var_by_short_name (dict, var_name);
+          if (var == NULL)
+            continue;
+          if (!stringi_set_insert (&var_names, var_name))
+            {
+              sys_warn (r, _("Duplicate variable name %s "
+                             "at offset %zu in MRSETS record."),
+                        var_name, text_pos (text));
+              continue;
+            }
+
+          if (mrset->label == NULL && mrset->label_from_var_label
+              && var_has_label (var))
+            mrset->label = xstrdup (var_get_label (var));
+
+          if (mrset->n_vars
+              && var_get_type (var) != var_get_type (mrset->vars[0]))
+            {
+              sys_warn (r, _("MRSET %s contains both string and "
+                             "numeric variables."), name);
+              continue;
+            }
+          width = MIN (width, var_get_width (var));
+
+          if (mrset->n_vars >= allocated_vars)
+            mrset->vars = x2nrealloc (mrset->vars, &allocated_vars,
+                                      sizeof *mrset->vars);
+          mrset->vars[mrset->n_vars++] = var;
+        }
+      while (delimiter != '\n');
+
+      if (mrset->n_vars < 2)
+        {
+          sys_warn (r, _("MRSET %s has only %zu variables."), mrset->name,
+                    mrset->n_vars);
+          mrset_destroy (mrset);
+          continue;
+        }
+
+      if (mrset->type == MRSET_MD)
+        {
+          mrset->width = width;
+          value_init (&mrset->counted, width);
+          if (width == 0)
+            mrset->counted.f = strtod (counted, NULL);
+          else
+            value_copy_str_rpad (&mrset->counted, width,
+                                 (const uint8_t *) counted, ' ');
+        }
+
+      dict_add_mrset (dict, mrset);
+      mrset = NULL;
+      stringi_set_destroy (&var_names);
+    }
+  mrset_destroy (mrset);
+  close_text_record (r, text);
  }
  
  /* Read record type 7, subtype 11, which specifies how variables
@@ -849,30 +1184,50 @@ static void
  read_display_parameters (struct sfm_reader *r, size_t size, size_t count,
                           struct dictionary *dict)
  {
-  const size_t n_vars = count / 3 ;
+  size_t n_vars;
+  bool includes_width;
    bool warned = false;
-  int i;
+  size_t i;
+
+  if (size != 4)
+    {
+      sys_warn (r, _("Bad size %zu on extension 11."), size);
+      skip_bytes (r, size * count);
+      return;
+    }
  
-  if (count % 3 || n_vars != dict_get_var_cnt (dict)) 
-    sys_error (r, _("Bad size (%u) or count (%u) on extension 11."),
-               (unsigned int) size, (unsigned int) count);
+  n_vars = dict_get_var_cnt (dict);
+  if (count == 3 * n_vars)
+    includes_width = true;
+  else if (count == 2 * n_vars)
+    includes_width = false;
+  else
+    {
+      sys_warn (r, _("Extension 11 has bad count %zu (for %zu variables)."),
+                count, n_vars);
+      skip_bytes (r, size * count);
+      return;
+    }
  
-  for (i = 0; i < n_vars; ++i) 
+  for (i = 0; i < n_vars; ++i)
      {
-      int measure = read_int32 (r);
-      int width = read_int32 (r);
-      int align = read_int32 (r);
        struct variable *v = dict_get_var (dict, i);
+      int measure = read_int (r);
+      int width = includes_width ? read_int (r) : 0;
+      int align = read_int (r);
  
-      /* spss v14 sometimes seems to set string variables' measure to zero */
-      if ( 0 == measure && var_is_alpha (v) ) measure = 1;
-
+      /* SPSS 14 sometimes seems to set string variables' measure
+         to zero. */
+      if (0 == measure && var_is_alpha (v))
+        measure = 1;
  
        if (measure < 1 || measure > 3 || align < 0 || align > 2)
          {
            if (!warned)
-            sys_warn (r, _("Invalid variable display parameters.  "
-                           "Default parameters substituted."));
+            sys_warn (r, _("Invalid variable display parameters "
+                           "for variable %zu (%s).  "
+                           "Default parameters substituted."),
+                      i, var_get_name (v));
            warned = true;
            continue;
          }
@@ -880,10 +1235,15 @@ read_display_parameters (struct sfm_reader *r, size_t size, size_t count,
        var_set_measure (v, (measure == 1 ? MEASURE_NOMINAL
                             : measure == 2 ? MEASURE_ORDINAL
                             : MEASURE_SCALE));
-      var_set_display_width (v, width);
        var_set_alignment (v, (align == 0 ? ALIGN_LEFT
                               : align == 1 ? ALIGN_RIGHT
                               : ALIGN_CENTRE));
+
+      /* Older versions (SPSS 9.0) sometimes set the display
+        width to zero.  This causes confusion in the GUI, so
+        only set the width if it is nonzero. */
+      if (width > 0)
+        var_set_display_width (v, width);
      }
  }
  
@@ -894,17 +1254,16 @@ static void
  read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count,
                          struct dictionary *dict)
  {
-  struct variable_to_value_map *map;
+  struct text_record *text;
    struct variable *var;
    char *long_name;
-  int warning_cnt = 0;
-  
-  map = open_variable_to_value_map (r, size * count);
-  while (read_variable_to_value_map (r, dict, map, &var, &long_name,
-                                     &warning_cnt))
+
+  text = open_text_record (r, size * count);
+  while (read_variable_to_value_pair (r, dict, text, &var, &long_name))
      {
-      char short_name[SHORT_NAME_LEN + 1];
-      strcpy (short_name, var_get_short_name (var));
+      char **short_names;
+      size_t short_name_cnt;
+      size_t i;
  
        /* Validate long name. */
        if (!var_is_valid_name (long_name, false))
@@ -914,9 +1273,9 @@ read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count,
                      var_get_name (var), long_name);
            continue;
          }
-                      
+
        /* Identify any duplicates. */
-      if (strcasecmp (short_name, long_name)
+      if (strcasecmp (var_get_short_name (var, 0), long_name)
            && dict_lookup_var (dict, long_name) != NULL)
          {
            sys_warn (r, _("Duplicate long variable name `%s' "
@@ -924,13 +1283,29 @@ read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count,
            continue;
          }
  
-      /* Set long name.  Renaming a variable may clear the short
-         name, but we want to retain it, so re-set it
-         explicitly. */
+      /* Renaming a variable may clear its short names, but we
+         want to retain them, so we save them and re-set them
+         afterward. */
+      short_name_cnt = var_get_short_name_cnt (var);
+      short_names = xnmalloc (short_name_cnt, sizeof *short_names);
+      for (i = 0; i < short_name_cnt; i++)
+        {
+          const char *s = var_get_short_name (var, i);
+          short_names[i] = s != NULL ? xstrdup (s) : NULL;
+        }
+
+      /* Set long name. */
        dict_rename_var (dict, var, long_name);
-      var_set_short_name (var, short_name);
+
+      /* Restore short names. */
+      for (i = 0; i < short_name_cnt; i++)
+        {
+          var_set_short_name (var, i, short_names[i]);
+          free (short_names[i]);
+        }
+      free (short_names);
      }
-  close_variable_to_value_map (r, map);
+  close_text_record (r, text);
    r->has_long_var_names = true;
  }
  
@@ -940,48 +1315,60 @@ static void
  read_long_string_map (struct sfm_reader *r, size_t size, size_t count,
                        struct dictionary *dict)
  {
-  struct variable_to_value_map *map;
+  struct text_record *text;
    struct variable *var;
    char *length_s;
-  int warning_cnt = 0;
  
-  r->has_vls = true;
-
-  map = open_variable_to_value_map (r, size * count);
-  while (read_variable_to_value_map (r, dict, map, &var, &length_s,
-                                     &warning_cnt))
+  text = open_text_record (r, size * count);
+  while (read_variable_to_value_pair (r, dict, text, &var, &length_s))
      {
-      long length, remaining_length;
-      size_t idx;
+      size_t idx = var_get_dict_index (var);
+      long int length;
+      int segment_cnt;
+      int i;
  
        /* Get length. */
        length = strtol (length_s, NULL, 10);
-      if (length < MIN_VERY_LONG_STRING || length == LONG_MAX) 
+      if (length < 1 || length > MAX_STRING)
+        {
+          sys_warn (r, _("%s listed as string of invalid length %s "
+                         "in very length string record."),
+                    var_get_name (var), length_s);
+          continue;
+        }
+
+      /* Check segments. */
+      segment_cnt = sfm_width_to_segments (length);
+      if (segment_cnt == 1)
          {
-          sys_warn (r, _("%s listed as string of length %s "
-                         "in length table."),
+          sys_warn (r, _("%s listed in very long string record with width %s, "
+                         "which requires only one segment."),
                      var_get_name (var), length_s);
            continue;
          }
+      if (idx + segment_cnt > dict_get_var_cnt (dict))
+        sys_error (r, _("Very long string %s overflows dictionary."),
+                   var_get_name (var));
  
-      /* Group multiple variables into single variable
-         and delete all but the first. */
-      remaining_length = length;
-      for (idx = var_get_dict_index (var); remaining_length > 0; idx++)
-        if (idx < dict_get_var_cnt (dict)) 
-          remaining_length -= MIN (var_get_width (dict_get_var (dict, idx)),
-                                   EFFECTIVE_LONG_STRING_LENGTH);
-        else
-          sys_error (r, _("Very long string %s overflows dictionary."),
-                     var_get_name (var));
-      dict_delete_consecutive_vars (dict,
-                                    var_get_dict_index (var) + 1,
-                                    idx - var_get_dict_index (var) - 1);
-
-      /* Assign all the length to the first variable. */
+      /* Get the short names from the segments and check their
+         lengths. */
+      for (i = 0; i < segment_cnt; i++)
+        {
+          struct variable *seg = dict_get_var (dict, idx + i);
+          int alloc_width = sfm_segment_alloc_width (length, i);
+          int width = var_get_width (seg);
+
+          if (i > 0)
+            var_set_short_name (var, i, var_get_short_name (seg, 0));
+          if (ROUND_UP (width, 8) != ROUND_UP (alloc_width, 8))
+            sys_error (r, _("Very long string with width %ld has segment %d "
+                            "of width %d (expected %d)"),
+                       length, i, width, alloc_width);
+        }
+      dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1);
        var_set_width (var, length);
      }
-  close_variable_to_value_map (r, map);
+  close_text_record (r, text);
    dict_compact_values (dict);
  }
  
@@ -992,10 +1379,10 @@ read_value_labels (struct sfm_reader *r,
                     struct dictionary *dict, struct variable **var_by_value_idx)
  {
    struct pool *subpool;
-  
-  struct label 
+
+  struct label
      {
-      char raw_value[8];        /* Value as uninterpreted bytes. */
+      uint8_t raw_value[8];        /* Value as uninterpreted bytes. */
        union value value;        /* Value. */
        char *label;              /* Null-terminated label string. */
      };
@@ -1005,6 +1392,7 @@ read_value_labels (struct sfm_reader *r,
  
    struct variable **var = NULL;        /* Associated variables. */
    int var_cnt;                 /* Number of associated variables. */
+  int max_width;                /* Maximum width of string variables. */
  
    int i;
  
@@ -1015,10 +1403,10 @@ read_value_labels (struct sfm_reader *r,
       of numeric or string type. */
  
    /* Read number of labels. */
-  label_cnt = read_int32 (r);
+  label_cnt = read_int (r);
  
-  if (label_cnt >= INT32_MAX / sizeof *labels)
-    {    
+  if (size_overflow_p (xtimes (label_cnt, sizeof *labels)))
+    {
        sys_warn (r, _("Invalid number of labels: %d.  Ignoring labels."),
                  label_cnt);
        label_cnt = 0;
@@ -1049,26 +1437,29 @@ read_value_labels (struct sfm_reader *r,
       to which the value labels are to be applied. */
  
    /* Read record type of type 4 record. */
-  if (read_int32 (r) != 4)
+  if (read_int (r) != 4)
      sys_error (r, _("Variable index record (type 4) does not immediately "
                      "follow value label record (type 3) as it should."));
  
    /* Read number of variables associated with value label from type 4
       record. */
-  var_cnt = read_int32 (r);
+  var_cnt = read_int (r);
    if (var_cnt < 1 || var_cnt > dict_get_var_cnt (dict))
      sys_error (r, _("Number of variables associated with a value label (%d) "
-                    "is not between 1 and the number of variables (%u)."),
-               var_cnt, (unsigned int) dict_get_var_cnt (dict));
+                    "is not between 1 and the number of variables (%zu)."),
+               var_cnt, dict_get_var_cnt (dict));
  
    /* Read the list of variables. */
    var = pool_nalloc (subpool, var_cnt, sizeof *var);
+  max_width = 0;
    for (i = 0; i < var_cnt; i++)
      {
-      var[i] = lookup_var_by_value_idx (r, var_by_value_idx, read_int32 (r));
-      if (var_is_long_string (var[i]))
-        sys_error (r, _("Value labels are not allowed on long string "
-                        "variables (%s)."), var_get_name (var[i]));
+      var[i] = lookup_var_by_value_idx (r, var_by_value_idx, read_int (r));
+      if (var_get_width (var[i]) > 8)
+        sys_error (r, _("Value labels may not be added to long string "
+                        "variables (e.g. %s) using records types 3 and 4."),
+                   var_get_name (var[i]));
+      max_width = MAX (max_width, var_get_width (var[i]));
      }
  
    /* Type check the variables. */
@@ -1083,17 +1474,18 @@ read_value_labels (struct sfm_reader *r,
                   var_is_numeric (var[i]) ? _("numeric") : _("string"));
  
    /* Fill in labels[].value, now that we know the desired type. */
-  for (i = 0; i < label_cnt; i++) 
+  for (i = 0; i < label_cnt; i++)
      {
        struct label *label = labels + i;
-      
+
+      value_init_pool (subpool, &label->value, max_width);
        if (var_is_alpha (var[0]))
-        buf_copy_rpad (label->value.s, sizeof label->value.s,
-                       label->raw_value, sizeof label->raw_value);
+        u8_buf_copy_rpad (value_str_rw (&label->value, max_width), max_width,
+                       label->raw_value, sizeof label->raw_value, ' ');
        else
-        label->value.f = flt64_to_double (r, (uint8_t *) label->raw_value);
+        label->value.f = float_get_double (r->float_format, label->raw_value);
      }
-  
+
    /* Assign the `value_label's to each variable. */
    for (i = 0; i < var_cnt; i++)
      {
@@ -1104,135 +1496,282 @@ read_value_labels (struct sfm_reader *r,
        for (j = 0; j < label_cnt; j++)
         {
            struct label *label = &labels[j];
-          if (!var_add_value_label (v, &label->value, label->label)) 
+          if (!var_add_value_label (v, &label->value, label->label))
              {
                if (var_is_numeric (var[0]))
                  sys_warn (r, _("Duplicate value label for %g on %s."),
                            label->value.f, var_get_name (v));
                else
-                sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."),
-                          var_get_width (v), label->value.s,
-                          var_get_name (v)); 
+                sys_warn (r, _("Duplicate value label for `%.*s' on %s."),
+                          max_width, value_str (&label->value, max_width),
+                          var_get_name (v));
              }
         }
      }
  
    pool_destroy (subpool);
  }
+
+/* Reads a set of custom attributes from TEXT into ATTRS.
+   ATTRS may be a null pointer, in which case the attributes are
+   read but discarded. */
+static void
+read_attributes (struct sfm_reader *r, struct text_record *text,
+                 struct attrset *attrs)
+{
+  do
+    {
+      struct attribute *attr;
+      char *key;
+      int index;
+
+      /* Parse the key. */
+      key = text_get_token (text, ss_cstr ("("), NULL);
+      if (key == NULL)
+        return;
+
+      attr = attribute_create (key);
+      for (index = 1; ; index++)
+        {
+          /* Parse the value. */
+          char *value;
+          size_t length;
+
+          value = text_get_token (text, ss_cstr ("\n"), NULL);
+          if (value == NULL)
+            {
+              text_warn (r, text, _("Error parsing attribute value %s[%d]"),
+                         key, index);
+              break;
+            }              
+
+          length = strlen (value);
+          if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'') 
+            {
+              value[length - 1] = '\0';
+              attribute_add_value (attr, value + 1); 
+            }
+          else 
+            {
+              text_warn (r, text,
+                         _("Attribute value %s[%d] is not quoted: %s"),
+                         key, index, value);
+              attribute_add_value (attr, value); 
+            }
+
+          /* Was this the last value for this attribute? */
+          if (text_match (text, ')'))
+            break;
+        }
+      if (attrs != NULL)
+        attrset_add (attrs, attr);
+      else
+        attribute_destroy (attr);
+    }
+  while (!text_match (text, '/'));
+}
+
+/* Reads record type 7, subtype 17, which lists custom
+   attributes on the data file.  */
+static void
+read_data_file_attributes (struct sfm_reader *r,
+                           size_t size, size_t count,
+                           struct dictionary *dict)
+{
+  struct text_record *text = open_text_record (r, size * count);
+  read_attributes (r, text, dict_get_attributes (dict));
+  close_text_record (r, text);
+}
+
+static void
+skip_long_string_value_labels (struct sfm_reader *r, size_t n_labels)
+{
+  size_t i;
+
+  for (i = 0; i < n_labels; i++)
+    {
+      size_t value_length, label_length;
+
+      value_length = read_int (r);
+      skip_bytes (r, value_length);
+      label_length = read_int (r);
+      skip_bytes (r, label_length);
+    }
+}
+
+static void
+read_long_string_value_labels (struct sfm_reader *r,
+                              size_t size, size_t count,
+                              struct dictionary *d)
+{
+  const off_t start = ftello (r->file);
+  while (ftello (r->file) - start < size * count)
+    {
+      char var_name[VAR_NAME_LEN + 1];
+      size_t n_labels, i;
+      struct variable *v;
+      union value value;
+      int var_name_len;
+      int width;
+
+      /* Read header. */
+      var_name_len = read_int (r);
+      if (var_name_len > VAR_NAME_LEN)
+        sys_error (r, _("Variable name length in long string value label "
+                        "record (%d) exceeds %d-byte limit."),
+                   var_name_len, VAR_NAME_LEN);
+      read_string (r, var_name, var_name_len + 1);
+      width = read_int (r);
+      n_labels = read_int (r);
+
+      v = dict_lookup_var (d, var_name);
+      if (v == NULL)
+        {
+          sys_warn (r, _("Ignoring long string value record for "
+                         "unknown variable %s."), var_name);
+          skip_long_string_value_labels (r, n_labels);
+          continue;
+        }
+      if (var_is_numeric (v))
+        {
+          sys_warn (r, _("Ignoring long string value record for "
+                         "numeric variable %s."), var_name);
+          skip_long_string_value_labels (r, n_labels);
+          continue;
+        }
+      if (width != var_get_width (v))
+        {
+          sys_warn (r, _("Ignoring long string value record for variable %s "
+                         "because the record's width (%d) does not match the "
+                         "variable's width (%d)"),
+                    var_name, width, var_get_width (v));
+          skip_long_string_value_labels (r, n_labels);
+          continue;
+        }
+
+      /* Read values. */
+      value_init_pool (r->pool, &value, width);
+      for (i = 0; i < n_labels; i++)
+       {
+          size_t value_length, label_length;
+          char label[256];
+          bool skip = false;
+
+          /* Read value. */
+          value_length = read_int (r);
+          if (value_length == width)
+            read_bytes (r, value_str_rw (&value, width), width);
+          else
+            {
+              sys_warn (r, _("Ignoring long string value %zu for variable %s, "
+                             "with width %d, that has bad value width %zu."),
+                        i, var_get_name (v), width, value_length);
+              skip_bytes (r, value_length);
+              skip = true;
+            }
+
+          /* Read label. */
+          label_length = read_int (r);
+          read_string (r, label, MIN (sizeof label, label_length + 1));
+          if (label_length >= sizeof label)
+            {
+              /* Skip and silently ignore label text after the
+                 first 255 bytes.  The maximum documented length
+                 of a label is 120 bytes so this is more than
+                 generous. */
+              skip_bytes (r, (label_length + 1) - sizeof label);
+            }
+
+          if (!skip && !var_add_value_label (v, &value, label))
+            sys_warn (r, _("Duplicate value label for `%.*s' on %s."),
+                      width, value_str (&value, width), var_get_name (v));
+        }
+    }
+}
+
+
+/* Reads record type 7, subtype 18, which lists custom
+   attributes on individual variables.  */
+static void
+read_variable_attributes (struct sfm_reader *r,
+                          size_t size, size_t count,
+                          struct dictionary *dict)
+{
+  struct text_record *text = open_text_record (r, size * count);
+  for (;;) 
+    {
+      struct variable *var;
+      if (!text_read_variable_name (r, dict, text, ss_cstr (":"), &var))
+        break;
+      read_attributes (r, text, var != NULL ? var_get_attributes (var) : NULL);
+    }
+  close_text_record (r, text);
+}
+
  \f
  /* Case reader. */
  
  static void partial_record (struct sfm_reader *r)
       NO_RETURN;
+
+static void read_error (struct casereader *, const struct sfm_reader *);
+
  static bool read_case_number (struct sfm_reader *, double *);
-static bool read_case_string (struct sfm_reader *, char *, size_t);
+static bool read_case_string (struct sfm_reader *, uint8_t *, size_t);
  static int read_opcode (struct sfm_reader *);
  static bool read_compressed_number (struct sfm_reader *, double *);
-static bool read_compressed_string (struct sfm_reader *, char *);
-static bool read_whole_strings (struct sfm_reader *, char *, size_t);
-
-/* Reads one case from READER's file into C.  Returns true only
-   if successful. */
-static bool
-sys_file_casereader_read (struct casereader *reader, void *r_,
-                          struct ccase *c)
+static bool read_compressed_string (struct sfm_reader *, uint8_t *);
+static bool read_whole_strings (struct sfm_reader *, uint8_t *, size_t);
+static bool skip_whole_strings (struct sfm_reader *, size_t);
+
+/* Reads and returns one case from READER's file.  Returns a null
+   pointer if not successful. */
+static struct ccase *
+sys_file_casereader_read (struct casereader *reader, void *r_)
  {
    struct sfm_reader *r = r_;
+  struct ccase *volatile c;
+  int i;
+
    if (r->error)
-    return false;
+    return NULL;
  
-  case_create (c, r->value_cnt);
-  if (setjmp (r->bail_out)) 
+  c = case_create (r->proto);
+  if (setjmp (r->bail_out))
      {
        casereader_force_error (reader);
-      case_destroy (c);
-      return false; 
+      case_unref (c);
+      return NULL;
      }
  
-  if (!r->compressed && sizeof (double) == 8 && !r->has_vls) 
+  for (i = 0; i < r->sfm_var_cnt; i++)
      {
-      /* Fast path.  Read the whole case directly. */
-      if (!try_read_bytes (r, case_data_all_rw (c),
-                           sizeof (union value) * r->flt64_cnt)) 
-        {
-          case_destroy (c);
-          return false; 
-        }
+      struct sfm_var *sv = &r->sfm_vars[i];
+      union value *v = case_data_rw_idx (c, sv->case_index);
  
-      /* Convert floating point numbers to native format if needed. */
-      if (r->float_format != FLOAT_NATIVE_DOUBLE) 
+      if (sv->var_width == 0)
          {
-          int i;
-
-          for (i = 0; i < r->var_cnt; i++) 
-            if (r->vars[i].width == 0) 
-              {
-                double *d = &case_data_rw_idx (c, r->vars[i].case_index)->f;
-                float_convert (r->float_format, d, FLOAT_NATIVE_DOUBLE, d); 
-              }
+          if (!read_case_number (r, &v->f))
+            goto eof;
          }
-      return true;
-    }
-  else 
-    {
-      /* Slow path.  Convert from external to internal format. */
-      int i;
-
-      for (i = 0; i < r->var_cnt; i++)
+      else
          {
-         struct sfm_var *sv = &r->vars[i];
-          union value *v = case_data_rw_idx (c, sv->case_index);
-
-          if (sv->width == 0) 
-            {
-              if (!read_case_number (r, &v->f))
-                goto eof; 
-            }
-          else
-            {
-              /* Read the string data in segments up to 255 bytes
-                 at a time, packed into 8-byte units. */
-              const int max_chunk = MIN_VERY_LONG_STRING - 1;
-             int ofs, chunk_size;
-              for (ofs = 0; ofs < sv->width; ofs += chunk_size)
-                {
-                  chunk_size = MIN (max_chunk, sv->width - ofs);
-                  if (!read_case_string (r, v->s + ofs, chunk_size)) 
-                    {
-                      if (ofs)
-                        partial_record (r);
-                      goto eof; 
-                    }
-                }
-
-              /* Very long strings have trailing wasted space
-                 that we must skip. */
-              if (sv->width >= MIN_VERY_LONG_STRING) 
-                {
-                  int bytes_read = (sv->width / max_chunk * 256
-                                    + ROUND_UP (sv->width % max_chunk, 8));
-                  int total_bytes = sfm_width_to_bytes (sv->width);
-                  int excess_bytes = total_bytes - bytes_read;
-
-                  while (excess_bytes > 0) 
-                    {
-                      char buffer[1024];
-                      size_t chunk = MIN (sizeof buffer, excess_bytes);
-                      if (!read_whole_strings (r, buffer, chunk))
-                        partial_record (r);
-                      excess_bytes -= chunk;
-                    }
-                }
-            }
+          uint8_t *s = value_str_rw (v, sv->var_width);
+          if (!read_case_string (r, s + sv->offset, sv->segment_width))
+            goto eof;
+          if (!skip_whole_strings (r, ROUND_DOWN (sv->padding, 8)))
+            partial_record (r);
          }
-      return true; 
-
-    eof:
-      case_destroy (c);
-      if (i != 0)
-        partial_record (r);
-      return false;
      }
+  return c;
+
+eof:
+  case_unref (c);
+  if (i != 0)
+    partial_record (r);
+  if (r->case_cnt != -1)
+    read_error (reader, r);
+  return NULL;
  }
  
  /* Issues an error that R ends in a partial record. */
@@ -1242,20 +1781,29 @@ partial_record (struct sfm_reader *r)
    sys_error (r, _("File ends in partial case."));
  }
  
+/* Issues an error that an unspecified error occurred SFM, and
+   marks R tainted. */
+static void
+read_error (struct casereader *r, const struct sfm_reader *sfm)
+{
+  msg (ME, _("Error reading case from file %s."), fh_get_name (sfm->fh));
+  casereader_force_error (r);
+}
+
  /* Reads a number from R and stores its value in *D.
     If R is compressed, reads a compressed number;
     otherwise, reads a number in the regular way.
     Returns true if successful, false if end of file is
     reached immediately. */
  static bool
-read_case_number (struct sfm_reader *r, double *d) 
+read_case_number (struct sfm_reader *r, double *d)
  {
    if (!r->compressed)
      {
-      uint8_t flt64[8];
-      if (!try_read_bytes (r, flt64, sizeof flt64))
+      uint8_t number[8];
+      if (!try_read_bytes (r, number, sizeof number))
          return false;
-      *d = flt64_to_double (r, flt64);
+      float_convert (r->float_format, number, FLOAT_NATIVE_DOUBLE, d);
        return true;
      }
    else
@@ -1270,12 +1818,12 @@ read_case_number (struct sfm_reader *r, double *d)
     Returns true if successful, false if end of file is
     reached immediately. */
  static bool
-read_case_string (struct sfm_reader *r, char *s, size_t length) 
+read_case_string (struct sfm_reader *r, uint8_t *s, size_t length)
  {
    size_t whole = ROUND_DOWN (length, 8);
    size_t partial = length % 8;
-  
-  if (whole) 
+
+  if (whole)
      {
        if (!read_whole_strings (r, s, whole))
          return false;
@@ -1283,12 +1831,12 @@ read_case_string (struct sfm_reader *r, char *s, size_t length)
  
    if (partial)
      {
-      char bounce[8];
+      uint8_t bounce[8];
        if (!read_whole_strings (r, bounce, sizeof bounce))
          {
            if (whole)
              partial_record (r);
-          return false; 
+          return false;
          }
        memcpy (s + whole, bounce, partial);
      }
@@ -1298,13 +1846,13 @@ read_case_string (struct sfm_reader *r, char *s, size_t length)
  
  /* Reads and returns the next compression opcode from R. */
  static int
-read_opcode (struct sfm_reader *r) 
+read_opcode (struct sfm_reader *r)
  {
    assert (r->compressed);
    for (;;)
      {
        int opcode;
-      if (r->opcode_idx >= sizeof r->opcodes) 
+      if (r->opcode_idx >= sizeof r->opcodes)
          {
            if (!try_read_bytes (r, r->opcodes, sizeof r->opcodes))
              return -1;
@@ -1323,7 +1871,7 @@ read_opcode (struct sfm_reader *r)
  static bool
  read_compressed_number (struct sfm_reader *r, double *d)
  {
-  int opcode = read_opcode (r); 
+  int opcode = read_opcode (r);
    switch (opcode)
      {
      case -1:
@@ -1331,11 +1879,18 @@ read_compressed_number (struct sfm_reader *r, double *d)
        return false;
  
      case 253:
-      *d = read_flt64 (r);
+      *d = read_float (r);
        break;
-      
+
      case 254:
-      sys_error (r, _("Compressed data is corrupt."));
+      float_convert (r->float_format, "        ", FLOAT_NATIVE_DOUBLE, d);
+      if (!r->corruption_warning)
+        {
+          r->corruption_warning = true;
+          sys_warn (r, _("Possible compressed data corruption: "
+                         "compressed spaces appear in numeric field."));
+        }
+      break;
  
      case 255:
        *d = SYSMIS;
@@ -1354,9 +1909,10 @@ read_compressed_number (struct sfm_reader *r, double *d)
     Returns true if successful, false if end of file is
     reached immediately. */
  static bool
-read_compressed_string (struct sfm_reader *r, char *dst)
+read_compressed_string (struct sfm_reader *r, uint8_t *dst)
  {
-  switch (read_opcode (r))
+  int opcode = read_opcode (r);
+  switch (opcode)
      {
      case -1:
      case 252:
@@ -1371,7 +1927,25 @@ read_compressed_string (struct sfm_reader *r, char *dst)
        break;
  
      default:
-      sys_error (r, _("Compressed data is corrupt."));
+      {
+        double value = opcode - r->bias;
+        float_convert (FLOAT_NATIVE_DOUBLE, &value, r->float_format, dst);
+        if (value == 0.0)
+          {
+            /* This has actually been seen "in the wild".  The submitter of the
+               file that showed that the contents decoded as spaces, but they
+               were at the end of the field so it's possible that the null
+               bytes just acted as null terminators. */
+          }
+        else if (!r->corruption_warning)
+          {
+            r->corruption_warning = true;
+            sys_warn (r, _("Possible compressed data corruption: "
+                           "string contains compressed integer (opcode %d)"),
+                      opcode);
+          }
+      }
+      break;
      }
  
    return true;
@@ -1383,7 +1957,7 @@ read_compressed_string (struct sfm_reader *r, char *dst)
     Returns true if successful, false if end of file is
     reached immediately. */
  static bool
-read_whole_strings (struct sfm_reader *r, char *s, size_t length)
+read_whole_strings (struct sfm_reader *r, uint8_t *s, size_t length)
  {
    assert (length % 8 == 0);
    if (!r->compressed)
@@ -1392,7 +1966,7 @@ read_whole_strings (struct sfm_reader *r, char *s, size_t length)
      {
        size_t ofs;
        for (ofs = 0; ofs < length; ofs += 8)
-        if (!read_compressed_string (r, s + ofs)) 
+        if (!read_compressed_string (r, s + ofs))
            {
              if (ofs != 0)
                partial_record (r);
@@ -1401,6 +1975,20 @@ read_whole_strings (struct sfm_reader *r, char *s, size_t length)
        return true;
      }
  }
+
+/* Skips LENGTH string bytes from R.
+   LENGTH must be a multiple of 8.
+   (LENGTH is also limited to 1024, but that's only because the
+   current caller never needs more than that many bytes.)
+   Returns true if successful, false if end of file is
+   reached immediately. */
+static bool
+skip_whole_strings (struct sfm_reader *r, size_t length)
+{
+  uint8_t buffer[1024];
+  assert (length < sizeof buffer);
+  return read_whole_strings (r, buffer, length);
+}
  \f
  /* Creates and returns a table that can be used for translating a value
     index into a case to a "struct variable *" for DICT.  Multiple
@@ -1411,15 +1999,15 @@ read_whole_strings (struct sfm_reader *r, char *s, size_t length)
     values to be deleted from the case and the dictionary to be
     compacted. */
  static struct variable **
-make_var_by_value_idx (struct sfm_reader *r, struct dictionary *dict) 
+make_var_by_value_idx (struct sfm_reader *r, struct dictionary *dict)
  {
    struct variable **var_by_value_idx;
    int value_idx = 0;
    int i;
  
    var_by_value_idx = pool_nmalloc (r->pool,
-                                   r->flt64_cnt, sizeof *var_by_value_idx);
-  for (i = 0; i < dict_get_var_cnt (dict); i++) 
+                                   r->oct_cnt, sizeof *var_by_value_idx);
+  for (i = 0; i < dict_get_var_cnt (dict); i++)
      {
        struct variable *v = dict_get_var (dict, i);
        int nv = var_is_numeric (v) ? 1 : DIV_RND_UP (var_get_width (v), 8);
@@ -1429,7 +2017,7 @@ make_var_by_value_idx (struct sfm_reader *r, struct dictionary *dict)
        for (j = 1; j < nv; j++)
          var_by_value_idx[value_idx++] = NULL;
      }
-  assert (value_idx == r->flt64_cnt);
+  assert (value_idx == r->oct_cnt);
  
    return var_by_value_idx;
  }
@@ -1439,13 +2027,13 @@ make_var_by_value_idx (struct sfm_reader *r, struct dictionary *dict)
     is valid. */
  static struct variable *
  lookup_var_by_value_idx (struct sfm_reader *r,
-                         struct variable **var_by_value_idx, int value_idx) 
+                         struct variable **var_by_value_idx, int value_idx)
  {
    struct variable *var;
-  
-  if (value_idx < 1 || value_idx > r->flt64_cnt)
+
+  if (value_idx < 1 || value_idx > r->oct_cnt)
      sys_error (r, _("Variable index %d not in valid range 1...%d."),
-               value_idx, r->flt64_cnt);
+               value_idx, r->oct_cnt);
  
    var = var_by_value_idx[value_idx - 1];
    if (var == NULL)
@@ -1467,97 +2055,229 @@ lookup_var_by_short_name (struct dictionary *d, const char *short_name)
  
    /* First try looking up by full name.  This often succeeds. */
    var = dict_lookup_var (d, short_name);
-  if (var != NULL && !strcasecmp (var_get_short_name (var), short_name))
+  if (var != NULL && !strcasecmp (var_get_short_name (var, 0), short_name))
      return var;
  
    /* Iterate through the whole dictionary as a fallback. */
    var_cnt = dict_get_var_cnt (d);
-  for (i = 0; i < var_cnt; i++) 
+  for (i = 0; i < var_cnt; i++)
      {
        var = dict_get_var (d, i);
-      if (!strcasecmp (var_get_short_name (var), short_name))
+      if (!strcasecmp (var_get_short_name (var, 0), short_name))
          return var;
      }
  
    return NULL;
  }
  \f
-/* Helpers for reading records that contain "variable=value"
-   pairs. */
+/* Helpers for reading records that contain structured text
+   strings. */
+
+/* Maximum number of warnings to issue for a single text
+   record. */
+#define MAX_TEXT_WARNINGS 5
  
  /* State. */
-struct variable_to_value_map 
+struct text_record
    {
      struct substring buffer;    /* Record contents. */
      size_t pos;                 /* Current position in buffer. */
+    int n_warnings;             /* Number of warnings issued or suppressed. */
    };
  
-/* Reads SIZE bytes into a "variable=value" map for R,
-   and returns the map. */
-static struct variable_to_value_map *
-open_variable_to_value_map (struct sfm_reader *r, size_t size) 
+/* Reads SIZE bytes into a text record for R,
+   and returns the new text record. */
+static struct text_record *
+open_text_record (struct sfm_reader *r, size_t size)
  {
-  struct variable_to_value_map *map = pool_alloc (r->pool, sizeof *map);
+  struct text_record *text = pool_alloc (r->pool, sizeof *text);
    char *buffer = pool_malloc (r->pool, size + 1);
    read_bytes (r, buffer, size);
-  map->buffer = ss_buffer (buffer, size);
-  map->pos = 0;
-  return map;
+  text->buffer = ss_buffer (buffer, size);
+  text->pos = 0;
+  text->n_warnings = 0;
+  return text;
  }
  
-/* Closes MAP and frees its storage.
-   Not really needed, because the pool will free the map anyway,
-   but can be used to free it earlier. */
+/* Closes TEXT, frees its storage, and issues a final warning
+   about suppressed warnings if necesary. */
  static void
-close_variable_to_value_map (struct sfm_reader *r,
-                             struct variable_to_value_map *map) 
+close_text_record (struct sfm_reader *r, struct text_record *text)
  {
-  pool_free (r->pool, ss_data (map->buffer));
+  if (text->n_warnings > MAX_TEXT_WARNINGS)
+    sys_warn (r, _("Suppressed %d additional related warnings."),
+              text->n_warnings - MAX_TEXT_WARNINGS);
+  pool_free (r->pool, ss_data (text->buffer));
  }
  
-/* Reads the next variable=value pair from MAP.
+/* Reads a variable=value pair from TEXT.
     Looks up the variable in DICT and stores it into *VAR.
     Stores a null-terminated value into *VALUE. */
  static bool
-read_variable_to_value_map (struct sfm_reader *r, struct dictionary *dict,
-                            struct variable_to_value_map *map,
-                            struct variable **var, char **value,
-                            int *warning_cnt) 
+read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict,
+                             struct text_record *text,
+                             struct variable **var, char **value)
  {
-  int max_warnings = 5;
-  
-  for (;;) 
+  for (;;)
      {
-      struct substring short_name_ss, value_ss;
-
-      if (!ss_tokenize (map->buffer, ss_cstr ("="), &map->pos, &short_name_ss)
-          || !ss_tokenize (map->buffer, ss_buffer ("\t\0", 2), &map->pos,
-                           &value_ss)) 
-        {
-          if (*warning_cnt > max_warnings)
-            sys_warn (r, _("Suppressed %d additional variable map warnings."),
-                      *warning_cnt - max_warnings);
-          return false; 
-        }
+      if (!text_read_short_name (r, dict, text, ss_cstr ("="), var))
+        return false;
        
-      map->pos += ss_span (ss_substr (map->buffer, map->pos, SIZE_MAX),
-                           ss_buffer ("\t\0", 2));
+      *value = text_get_token (text, ss_buffer ("\t\0", 2), NULL);
+      if (*value == NULL)
+        return false;
  
-      ss_data (short_name_ss)[ss_length (short_name_ss)] = '\0';
-      *var = lookup_var_by_short_name (dict, ss_data (short_name_ss));
-      if (*var == NULL)
-        {
-          if (++*warning_cnt <= 5)
-            sys_warn (r, _("Variable map refers to unknown variable %s."),
-                      ss_data (short_name_ss));
-          continue;
-        }
+      text->pos += ss_span (ss_substr (text->buffer, text->pos, SIZE_MAX),
+                            ss_buffer ("\t\0", 2));
+
+      if (*var != NULL)
+        return true;
+    }
+}
+
+static bool
+text_read_variable_name (struct sfm_reader *r, struct dictionary *dict,
+                         struct text_record *text, struct substring delimiters,
+                         struct variable **var)
+{
+  char *name;
+
+  name = text_get_token (text, delimiters, NULL);
+  if (name == NULL)
+    return false;
+
+  *var = dict_lookup_var (dict, name);
+  if (*var != NULL)
+    return true;
+
+  text_warn (r, text, _("Dictionary record refers to unknown variable %s."),
+             name);
+  return false;
+}
+
+
+static bool
+text_read_short_name (struct sfm_reader *r, struct dictionary *dict,
+                      struct text_record *text, struct substring delimiters,
+                      struct variable **var)
+{
+  char *short_name = text_get_token (text, delimiters, NULL);
+  if (short_name == NULL)
+    return false;
+
+  *var = lookup_var_by_short_name (dict, short_name);
+  if (*var == NULL)
+    text_warn (r, text, _("Dictionary record refers to unknown variable %s."),
+               short_name);
+  return true;
+}
+
+/* Displays a warning for the current file position, limiting the
+   number to MAX_TEXT_WARNINGS for TEXT. */
+static void
+text_warn (struct sfm_reader *r, struct text_record *text,
+           const char *format, ...)
+{
+  if (text->n_warnings++ < MAX_TEXT_WARNINGS) 
+    {
+      va_list args;
+
+      va_start (args, format);
+      sys_msg (r, MW, format, args);
+      va_end (args);
+    }
+}
  
-      ss_data (value_ss)[ss_length (value_ss)] = '\0';
-      *value = ss_data (value_ss);
+static char *
+text_get_token (struct text_record *text, struct substring delimiters,
+                char *delimiter)
+{
+  struct substring token;
+  char *end;
+
+  if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token))
+    return NULL;
+
+  end = &ss_data (token)[ss_length (token)];
+  if (delimiter != NULL)
+    *delimiter = *end;
+  *end = '\0';
+  return ss_data (token);
+}
+
+/* Reads a integer value expressed in decimal, then a space, then a string that
+   consists of exactly as many bytes as specified by the integer, then a space,
+   from TEXT.  Returns the string, null-terminated, as a subset of TEXT's
+   buffer (so the caller should not free the string). */
+static const char *
+text_parse_counted_string (struct sfm_reader *r, struct text_record *text)
+{
+  size_t start;
+  size_t n;
+  char *s;
+
+  start = text->pos;
+  n = 0;
+  for (;;)
+    {
+      int c = text->buffer.string[text->pos];
+      if (c < '0' || c > '9')
+        break;
+      n = (n * 10) + (c - '0');
+      text->pos++;
+    }
+  if (start == text->pos)
+    {
+      sys_warn (r, _("Expecting digit at offset %zu in MRSETS record."),
+                 text->pos);
+      return NULL;
+    }
+
+  if (!text_match (text, ' '))
+    {
+      sys_warn (r, _("Expecting space at offset %zu in MRSETS record."),
+                text->pos);
+      return NULL;
+    }
+
+  if (text->pos + n > text->buffer.length)
+    {
+      sys_warn (r, _("%zu-byte string starting at offset %zu "
+                     "exceeds record length %zu."),
+                n, text->pos, text->buffer.length);
+      return NULL;
+    }
+
+  s = &text->buffer.string[text->pos];
+  if (s[n] != ' ')
+    {
+      sys_warn (r,
+                _("Expecting space at offset %zu following %zu-byte string."),
+                text->pos + n, n);
+      return NULL;
+    }
+  s[n] = '\0';
+  text->pos += n + 1;
+  return s;
+}
  
+static bool
+text_match (struct text_record *text, char c)
+{
+  if (text->buffer.string[text->pos] == c) 
+    {
+      text->pos++;
        return true;
      }
+  else
+    return false;
+}
+
+/* Returns the current byte offset inside the TEXT's string. */
+static size_t
+text_pos (const struct text_record *text)
+{
+  return text->pos;
  }
  \f
  /* Messages. */
@@ -1570,14 +2290,16 @@ sys_msg (struct sfm_reader *r, int class, const char *format, va_list args)
    struct string text;
  
    ds_init_empty (&text);
-  ds_put_format (&text, "\"%s\" near offset 0x%lx: ",
-                 fh_get_file_name (r->fh), (unsigned long) ftell (r->file));
+  ds_put_format (&text, "`%s' near offset 0x%llx: ",
+                 fh_get_file_name (r->fh), (long long int) ftello (r->file));
    ds_put_vformat (&text, format, args);
  
    m.category = msg_class_to_category (class);
    m.severity = msg_class_to_severity (class);
    m.where.file_name = NULL;
    m.where.line_number = 0;
+  m.where.first_column = 0;
+  m.where.last_column = 0;
    m.text = ds_cstr (&text);
  
    msg_emit (&m);
@@ -1585,10 +2307,10 @@ sys_msg (struct sfm_reader *r, int class, const char *format, va_list args)
  
  /* Displays a warning for the current file position. */
  static void
-sys_warn (struct sfm_reader *r, const char *format, ...) 
+sys_warn (struct sfm_reader *r, const char *format, ...)
  {
    va_list args;
-  
+
    va_start (args, format);
    sys_msg (r, MW, format, args);
    va_end (args);
@@ -1598,10 +2320,10 @@ sys_warn (struct sfm_reader *r, const char *format, ...)
     marks it as in an error state,
     and aborts reading it using longjmp. */
  static void
-sys_error (struct sfm_reader *r, const char *format, ...) 
+sys_error (struct sfm_reader *r, const char *format, ...)
  {
    va_list args;
-  
+
    va_start (args, format);
    sys_msg (r, ME, format, args);
    va_end (args);
@@ -1651,28 +2373,28 @@ try_read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
  
  /* Reads a 32-bit signed integer from R and returns its value in
     host format. */
-static int32_t
-read_int32 (struct sfm_reader *r) 
+static int
+read_int (struct sfm_reader *r)
  {
-  uint8_t int32[4];
-  read_bytes (r, int32, sizeof int32);
-  return int32_to_native (r, int32);
+  uint8_t integer[4];
+  read_bytes (r, integer, sizeof integer);
+  return integer_get (r->integer_format, integer, sizeof integer);
  }
  
  /* Reads a 64-bit floating-point number from R and returns its
     value in host format. */
  static double
-read_flt64 (struct sfm_reader *r) 
+read_float (struct sfm_reader *r)
  {
-  uint8_t flt64[8];
-  read_bytes (r, flt64, sizeof flt64);
-  return flt64_to_double (r, flt64);
+  uint8_t number[8];
+  read_bytes (r, number, sizeof number);
+  return float_get_double (r->float_format, number);
  }
  
  /* Reads exactly SIZE - 1 bytes into BUFFER
     and stores a null byte into BUFFER[SIZE - 1]. */
  static void
-read_string (struct sfm_reader *r, char *buffer, size_t size) 
+read_string (struct sfm_reader *r, char *buffer, size_t size)
  {
    assert (size > 0);
    read_bytes (r, buffer, size - 1);
@@ -1683,7 +2405,7 @@ read_string (struct sfm_reader *r, char *buffer, size_t size)
  static void
  skip_bytes (struct sfm_reader *r, size_t bytes)
  {
-  while (bytes > 0) 
+  while (bytes > 0)
      {
        char buffer[1024];
        size_t chunk = MIN (sizeof buffer, bytes);
@@ -1692,34 +2414,7 @@ skip_bytes (struct sfm_reader *r, size_t bytes)
      }
  }
  \f
-/* Returns the value of the 32-bit signed integer at INT32,
-   converted from the format used by R to the host format. */
-static int32_t
-int32_to_native (const struct sfm_reader *r, const uint8_t int32[4]) 
-{
-  int32_t x;
-  if (r->integer_format == INTEGER_NATIVE)
-    memcpy (&x, int32, sizeof x);
-  else
-    x = integer_get (r->integer_format, int32, sizeof x);
-  return x;
-}
-
-/* Returns the value of the 64-bit floating point number at
-   FLT64, converted from the format used by R to the host
-   format. */
-static double
-flt64_to_double (const struct sfm_reader *r, const uint8_t flt64[8])
-{
-  double x;
-  if (r->float_format == FLOAT_NATIVE_DOUBLE)
-    memcpy (&x, flt64, sizeof x);
-  else
-    float_convert (r->float_format, flt64, FLOAT_NATIVE_DOUBLE, &x);
-  return x;
-}
-\f
-static struct casereader_class sys_file_casereader_class = 
+static const struct casereader_class sys_file_casereader_class =
    {
      sys_file_casereader_read,
      sys_file_casereader_destroy,