treewide: Replace <name>_cnt by n_<name>s and <name>_cap by allocated_<name>.

[pspp] / src / language / data-io / matrix-data.c
diff --git a/src/language/data-io/matrix-data.c b/src/language/data-io/matrix-data.c

index 99664fc6e66a2631cc6b8a60d1ad126ccf86726d..f510ec25162d194273de45aa9572077a8a718af3 100644 (file)
--- a/src/language/data-io/matrix-data.c
+++ b/src/language/data-io/matrix-data.c
@@ -1,1992 +1,1197 @@
-/* PSPP - computes sample statistics.
-   Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
-   Written by Ben Pfaff <blp@gnu.org>.
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2017 Free Software Foundation, Inc.
  
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
  
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
  
     You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-   02110-1301, USA. */
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  
  #include <config.h>
  
-#include <stdlib.h>
-#include <ctype.h>
-#include <float.h>
-
-#include <data/case-source.h>
-#include <data/case.h>
-#include <data/data-in.h>
-#include <data/dictionary.h>
-#include <data/procedure.h>
-#include <data/variable.h>
-#include <language/command.h>
-#include <language/data-io/data-reader.h>
-#include <language/data-io/file-handle.h>
-#include <language/lexer/lexer.h>
-#include <language/lexer/variable-parser.h>
-#include <libpspp/alloc.h>
-#include <libpspp/array.h>
-#include <libpspp/assertion.h>
-#include <libpspp/compiler.h>
-#include <libpspp/message.h>
-#include <libpspp/message.h>
-#include <libpspp/misc.h>
-#include <libpspp/pool.h>
-#include <libpspp/str.h>
-
-#include "size_max.h"
+#include <gsl/gsl_matrix.h>
+#include <gsl/gsl_vector.h>
+
+#include "data/case.h"
+#include "data/casereader.h"
+#include "data/casewriter.h"
+#include "data/data-in.h"
+#include "data/dataset.h"
+#include "data/dictionary.h"
+#include "data/format.h"
+#include "data/short-names.h"
+#include "data/transformations.h"
+#include "data/variable.h"
+#include "language/command.h"
+#include "language/data-io/data-parser.h"
+#include "language/data-io/data-reader.h"
+#include "language/data-io/file-handle.h"
+#include "language/data-io/inpt-pgm.h"
+#include "language/data-io/placement-parser.h"
+#include "language/lexer/lexer.h"
+#include "language/lexer/variable-parser.h"
+#include "libpspp/assertion.h"
+#include "libpspp/i18n.h"
+#include "libpspp/intern.h"
+#include "libpspp/message.h"
+#include "libpspp/str.h"
+
+#include "gl/c-ctype.h"
+#include "gl/minmax.h"
+#include "gl/xsize.h"
+#include "gl/xalloc.h"
  
  #include "gettext.h"
  #define _(msgid) gettext (msgid)
-
-/* FIXME: /N subcommand not implemented.  It should be pretty simple,
-   too. */
-
-/* Different types of variables for MATRIX DATA procedure.  Order is
-   important: these are used for sort keys. */
-enum
-  {
-    MXD_SPLIT,                 /* SPLIT FILE variables. */
-    MXD_ROWTYPE,               /* ROWTYPE_. */
-    MXD_FACTOR,                        /* Factor variables. */
-    MXD_VARNAME,               /* VARNAME_. */
-    MXD_CONTINUOUS,            /* Continuous variables. */
-
-    MXD_COUNT
-  };
-
-/* Format type enums. */
-enum format_type
-  {
-    LIST,
-    FREE
-  };
-
-/* Matrix section enums. */
-enum matrix_section
-  {
-    LOWER,
-    UPPER,
-    FULL
-  };
-
-/* Diagonal inclusion enums. */
-enum include_diagonal
-  {
-    DIAGONAL,
-    NODIAGONAL
-  };
-
-/* CONTENTS types. */
-enum content_type
-  {
-    N_VECTOR,
-    N_SCALAR,
-    N_MATRIX,
-    MEAN,
-    STDDEV,
-    COUNT,
-    MSE,
-    DFE,
-    MAT,
-    COV,
-    CORR,
-    PROX,
-    
-    LPAREN,
-    RPAREN,
-    EOC
-  };
-
-/* 0=vector, 1=matrix, 2=scalar. */
-static const int content_type[PROX + 1] = 
+\f
+#define ROWTYPES                                \
+    /* Matrix row types. */                     \
+    RT(CORR,     2)                             \
+    RT(COV,      2)                             \
+    RT(MAT,      2)                             \
+    RT(N_MATRIX, 2)                             \
+    RT(PROX,     2)                             \
+                                                \
+    /* Vector row types. */                     \
+    RT(COUNT,    1)                             \
+    RT(DFE,      1)                             \
+    RT(MEAN,     1)                             \
+    RT(MSE,      1)                             \
+    RT(STDDEV,   1)                             \
+    RT(N, 1)                                    \
+                                                \
+    /* Scalar row types. */                     \
+    RT(N_SCALAR, 0)
+
+enum rowtype
    {
-    0, 2, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1,
+#define RT(NAME, DIMS) C_##NAME,
+    ROWTYPES
+#undef RT
    };
  
-/* Name of each content type. */
-static const char *content_names[PROX + 1] =
+enum
    {
-    "N", "N", "N_MATRIX", "MEAN", "STDDEV", "COUNT", "MSE",
-    "DFE", "MAT", "COV", "CORR", "PROX",
+#define RT(NAME, DIMS) +1
+    N_ROWTYPES = ROWTYPES
+#undef RT
    };
+verify (N_ROWTYPES < 32);
  
-/* A MATRIX DATA input program. */
-struct matrix_data_pgm 
-  {
-    struct pool *container;     /* Arena used for all allocations. */
-    struct dfm_reader *reader;  /* Data file to read. */
-
-    /* Format. */
-    enum format_type fmt;      /* LIST or FREE. */
-    enum matrix_section section;/* LOWER or UPPER or FULL. */
-    enum include_diagonal diag; /* DIAGONAL or NODIAGONAL. */
-
-    int explicit_rowtype;       /* ROWTYPE_ specified explicitly in data? */
-    struct variable *rowtype_, *varname_; /* ROWTYPE_, VARNAME_ variables. */
-    
-    struct variable *single_split; /* Single SPLIT FILE variable. */
-
-    /* Factor variables.  */
-    size_t n_factors;           /* Number of factor variables. */
-    struct variable **factors;  /* Factor variables. */
-    int is_per_factor[PROX + 1]; /* Is there per-factor data? */
-
-    int cells;                  /* Number of cells, or -1 if none. */
-
-    int pop_n;                  /* Population N specified by user. */
-
-    /* CONTENTS subcommand. */
-    int contents[EOC * 3 + 1];  /* Contents. */
-    int n_contents;             /* Number of entries. */
-
-    /* Continuous variables. */
-    int n_continuous;           /* Number of continuous variables. */
-    int first_continuous;       /* Index into dataset_dict (current_dataset).var of
-                                   first continuous variable. */
+/* Returns the number of dimensions in the indexes for row type RT.  A matrix
+   has 2 dimensions, a vector has 1, a scalar has 0. */
+static int
+rowtype_dimensions (enum rowtype rt)
+{
+  static const int rowtype_dims[N_ROWTYPES] = {
+#define RT(NAME, DIMS) [C_##NAME] = DIMS,
+    ROWTYPES
+#undef RT
    };
+  return rowtype_dims[rt];
+}
  
-/* Auxiliary data attached to MATRIX DATA variables. */
-struct mxd_var 
-  {
-    int var_type;              /* Variable type. */
-    int sub_type;              /* Subtype. */
+static struct substring
+rowtype_name (enum rowtype rt)
+{
+  static const struct substring rowtype_names[N_ROWTYPES] = {
+#define RT(NAME, DIMS) [C_##NAME] = SS_LITERAL_INITIALIZER (#NAME),
+    ROWTYPES
+#undef RT
    };
  
-static const struct case_source_class matrix_data_with_rowtype_source_class;
-static const struct case_source_class matrix_data_without_rowtype_source_class;
-
-static int compare_variables_by_mxd_var_type (const void *pa,
-                                            const void *pb);
-static bool read_matrices_without_rowtype (struct matrix_data_pgm *);
-static bool read_matrices_with_rowtype (struct matrix_data_pgm *);
-static int string_to_content_type (char *, int *);
-static void attach_mxd_aux (struct variable *, int var_type, int sub_type);
+  return rowtype_names[rt];
+}
  
-int
-cmd_matrix_data (void)
+static bool
+rowtype_from_string (struct substring token, enum rowtype *rt)
  {
-  struct pool *pool;
-  struct matrix_data_pgm *mx;
-  struct file_handle *fh = fh_inline_file ();
-  bool ok;
-    
-  unsigned seen = 0;
-  
-  discard_variables (current_dataset);
-
-  pool = pool_create ();
-  mx = pool_alloc (pool, sizeof *mx);
-  mx->container = pool;
-  mx->reader = NULL;
-  mx->fmt = LIST;
-  mx->section = LOWER;
-  mx->diag = DIAGONAL;
-  mx->explicit_rowtype = 0;
-  mx->rowtype_ = NULL;
-  mx->varname_ = NULL;
-  mx->single_split = NULL;
-  mx->n_factors = 0;
-  mx->factors = NULL;
-  memset (mx->is_per_factor, 0, sizeof mx->is_per_factor);
-  mx->cells = -1;
-  mx->pop_n = -1;
-  mx->n_contents = 0;
-  mx->n_continuous = 0;
-  mx->first_continuous = 0;
-  while (token != '.')
-    {
-      lex_match ('/');
-
-      if (lex_match_id ("VARIABLES"))
-       {
-         char **v;
-         size_t nv;
-
-         if (seen & 1)
-           {
-             msg (SE, _("VARIABLES subcommand multiply specified."));
-             goto lossage;
-           }
-         seen |= 1;
-         
-         lex_match ('=');
-         if (!parse_DATA_LIST_vars (&v, &nv, PV_NO_DUPLICATE))
-           goto lossage;
-         
-         {
-           size_t i;
-
-           for (i = 0; i < nv; i++)
-             if (!strcasecmp (v[i], "VARNAME_"))
-               {
-                 msg (SE, _("VARNAME_ cannot be explicitly specified on "
-                            "VARIABLES."));
-                 for (i = 0; i < nv; i++)
-                   free (v[i]);
-                 free (v);
-                 goto lossage;
-               }
-         }
-         
-         {
-           size_t i;
-
-           for (i = 0; i < nv; i++)
-             {
-               struct variable *new_var;
-               
-               if (strcasecmp (v[i], "ROWTYPE_"))
-                 {
-                   new_var = dict_create_var_assert (dataset_dict (current_dataset), v[i], 0);
-                    attach_mxd_aux (new_var, MXD_CONTINUOUS, i);
-                  }
-               else
-                 mx->explicit_rowtype = 1;
-               free (v[i]);
-             }
-           free (v);
-         }
-         
-          mx->rowtype_ = dict_create_var_assert (dataset_dict (current_dataset),
-                                                 "ROWTYPE_", 8);
-          attach_mxd_aux (mx->rowtype_, MXD_ROWTYPE, 0);
-       }
-      else if (lex_match_id ("FILE"))
-       {
-         lex_match ('=');
-         fh = fh_parse (FH_REF_FILE | FH_REF_INLINE);
-         if (fh == NULL)
-           goto lossage;
-       }
-      else if (lex_match_id ("FORMAT"))
-       {
-         lex_match ('=');
-
-         while (token == T_ID)
-           {
-             if (lex_match_id ("LIST"))
-               mx->fmt = LIST;
-             else if (lex_match_id ("FREE"))
-               mx->fmt = FREE;
-             else if (lex_match_id ("LOWER"))
-               mx->section = LOWER;
-             else if (lex_match_id ("UPPER"))
-               mx->section = UPPER;
-             else if (lex_match_id ("FULL"))
-               mx->section = FULL;
-             else if (lex_match_id ("DIAGONAL"))
-               mx->diag = DIAGONAL;
-             else if (lex_match_id ("NODIAGONAL"))
-               mx->diag = NODIAGONAL;
-             else 
-               {
-                 lex_error (_("in FORMAT subcommand"));
-                 goto lossage;
-               }
-           }
-       }
-      else if (lex_match_id ("SPLIT"))
-       {
-         lex_match ('=');
-
-         if (seen & 2)
-           {
-             msg (SE, _("SPLIT subcommand multiply specified."));
-             goto lossage;
-           }
-         seen |= 2;
-         
-         if (token != T_ID)
-           {
-             lex_error (_("in SPLIT subcommand"));
-             goto lossage;
-           }
-         
-         if (dict_lookup_var (dataset_dict (current_dataset), tokid) == NULL
-             && (lex_look_ahead () == '.' || lex_look_ahead () == '/'))
-           {
-             if (!strcasecmp (tokid, "ROWTYPE_")
-                  || !strcasecmp (tokid, "VARNAME_"))
-               {
-                 msg (SE, _("Split variable may not be named ROWTYPE_ "
-                            "or VARNAME_."));
-                 goto lossage;
-               }
-
-             mx->single_split = dict_create_var_assert (dataset_dict (current_dataset),
-                                                         tokid, 0);
-              attach_mxd_aux (mx->single_split, MXD_CONTINUOUS, 0);
-             lex_get ();
-
-              dict_set_split_vars (dataset_dict (current_dataset), &mx->single_split, 1);
-           }
-         else
-           {
-             struct variable **split;
-             size_t n;
-
-             if (!parse_variables (dataset_dict (current_dataset), &split, &n, PV_NO_DUPLICATE))
-               goto lossage;
-
-              dict_set_split_vars (dataset_dict (current_dataset), split, n);
-           }
-         
-         {
-            struct variable *const *split = dict_get_split_vars (dataset_dict (current_dataset));
-            size_t split_cnt = dict_get_split_cnt (dataset_dict (current_dataset));
-            int i;
-
-            for (i = 0; i < split_cnt; i++)
-              {
-                struct mxd_var *mv = split[i]->aux;
-                assert (mv != NULL);
-               if (mv->var_type != MXD_CONTINUOUS)
-                 {
-                   msg (SE, _("Split variable %s is already another type."),
-                        tokid);
-                   goto lossage;
-                 }
-                var_clear_aux (split[i]);
-                attach_mxd_aux (split[i], MXD_SPLIT, i);
-              }
-         }
-       }
-      else if (lex_match_id ("FACTORS"))
-       {
-         lex_match ('=');
-         
-         if (seen & 4)
-           {
-             msg (SE, _("FACTORS subcommand multiply specified."));
-             goto lossage;
-           }
-         seen |= 4;
-
-         if (!parse_variables (dataset_dict (current_dataset), &mx->factors, &mx->n_factors,
-                                PV_NONE))
-           goto lossage;
-         
-         {
-           size_t i;
-           
-           for (i = 0; i < mx->n_factors; i++)
-             {
-                struct variable *v = mx->factors[i];
-                struct mxd_var *mv = v->aux;
-                assert (mv != NULL);
-               if (mv->var_type != MXD_CONTINUOUS)
-                 {
-                   msg (SE, _("Factor variable %s is already another type."),
-                        tokid);
-                   goto lossage;
-                 }
-                var_clear_aux (v);
-                attach_mxd_aux (v, MXD_FACTOR, i);
-             }
-         }
-       }
-      else if (lex_match_id ("CELLS"))
-       {
-         lex_match ('=');
-         
-         if (mx->cells != -1)
-           {
-             msg (SE, _("CELLS subcommand multiply specified."));
-             goto lossage;
-           }
-
-         if (!lex_is_integer () || lex_integer () < 1)
-           {
-             lex_error (_("expecting positive integer"));
-             goto lossage;
-           }
-
-         mx->cells = lex_integer ();
-         lex_get ();
-       }
-      else if (lex_match_id ("N"))
-       {
-         lex_match ('=');
-
-         if (mx->pop_n != -1)
-           {
-             msg (SE, _("N subcommand multiply specified."));
-             goto lossage;
-           }
-
-         if (!lex_is_integer () || lex_integer () < 1)
-           {
-             lex_error (_("expecting positive integer"));
-             goto lossage;
-           }
-
-         mx->pop_n = lex_integer ();
-         lex_get ();
-       }
-      else if (lex_match_id ("CONTENTS"))
-       {
-         int inside_parens = 0;
-         unsigned collide = 0;
-         int item;
-         
-         if (seen & 8)
-           {
-             msg (SE, _("CONTENTS subcommand multiply specified."));
-             goto lossage;
-           }
-         seen |= 8;
-
-         lex_match ('=');
-         
-         {
-           int i;
-           
-           for (i = 0; i <= PROX; i++)
-             mx->is_per_factor[i] = 0;
-         }
-
-         for (;;)
-           {
-             if (lex_match ('('))
-               {
-                 if (inside_parens)
-                   {
-                     msg (SE, _("Nested parentheses not allowed."));
-                     goto lossage;
-                   }
-                 inside_parens = 1;
-                 item = LPAREN;
-               }
-             else if (lex_match (')'))
-               {
-                 if (!inside_parens)
-                   {
-                     msg (SE, _("Mismatched right parenthesis (`(')."));
-                     goto lossage;
-                   }
-                 if (mx->contents[mx->n_contents - 1] == LPAREN)
-                   {
-                     msg (SE, _("Empty parentheses not allowed."));
-                     goto lossage;
-                   }
-                 inside_parens = 0;
-                 item = RPAREN;
-               }
-             else 
-               {
-                 int content_type;
-                 int collide_index;
-                 
-                 if (token != T_ID)
-                   {
-                     lex_error (_("in CONTENTS subcommand"));
-                     goto lossage;
-                   }
-
-                 content_type = string_to_content_type (tokid,
-                                                        &collide_index);
-                 if (content_type == -1)
-                   {
-                     lex_error (_("in CONTENTS subcommand"));
-                     goto lossage;
-                   }
-                 lex_get ();
-
-                 if (collide & (1 << collide_index))
-                   {
-                     msg (SE, _("Content multiply specified for %s."),
-                          content_names[content_type]);
-                     goto lossage;
-                   }
-                 collide |= (1 << collide_index);
-                 
-                 item = content_type;
-                 mx->is_per_factor[item] = inside_parens;
-               }
-             mx->contents[mx->n_contents++] = item;
-
-             if (token == '/' || token == '.')
-               break;
-           }
-
-         if (inside_parens)
-           {
-             msg (SE, _("Missing right parenthesis."));
-             goto lossage;
-           }
-         mx->contents[mx->n_contents] = EOC;
-       }
-      else 
-       {
-         lex_error (NULL);
-         goto lossage;
-       }
-    }
-  
-  if (token != '.')
-    {
-      lex_error (_("expecting end of command"));
-      goto lossage;
-    }
-  
-  if (!(seen & 1))
-    {
-      msg (SE, _("Missing VARIABLES subcommand."));
-      goto lossage;
-    }
-  
-  if (!mx->n_contents && !mx->explicit_rowtype)
-    {
-      msg (SW, _("CONTENTS subcommand not specified: assuming file "
-                "contains only CORR matrix."));
-
-      mx->contents[0] = CORR;
-      mx->contents[1] = EOC;
-      mx->n_contents = 0;
-    }
+  ss_trim (&token, ss_cstr (CC_SPACES));
+  for (size_t i = 0; i < N_ROWTYPES; i++)
+    if (lex_id_match (rowtype_name (i), token))
+      {
+        *rt = i;
+        return true;
+      }
  
-  if (mx->n_factors && !mx->explicit_rowtype && mx->cells == -1)
+  if (lex_id_match (ss_cstr ("N_VECTOR"), token))
      {
-      msg (SE, _("Missing CELLS subcommand.  CELLS is required "
-                "when ROWTYPE_ is not given in the data and "
-                "factors are present."));
-      goto lossage;
+      *rt = C_N;
+      return true;
      }
-
-  if (mx->explicit_rowtype && mx->single_split)
+  else if (lex_id_match (ss_cstr ("SD"), token))
      {
-      msg (SE, _("Split file values must be present in the data when "
-                "ROWTYPE_ is present."));
-      goto lossage;
+      *rt = C_STDDEV;
+      return true;
      }
-      
-  /* Create VARNAME_. */
-  mx->varname_ = dict_create_var_assert (dataset_dict (current_dataset), "VARNAME_", 8);
-  attach_mxd_aux (mx->varname_, MXD_VARNAME, 0);
-  
-  /* Sort the dictionary variables into the desired order for the
-     system file output. */
-  {
-    struct variable **v;
-    size_t nv;
  
-    dict_get_vars (dataset_dict (current_dataset), &v, &nv, 0);
-    qsort (v, nv, sizeof *v, compare_variables_by_mxd_var_type);
-    dict_reorder_vars (dataset_dict (current_dataset), v, nv);
-    free (v);
-  }
+  return false;
+}
  
-  /* Set formats. */
+static bool
+rowtype_parse (struct lexer *lexer, enum rowtype *rt)
+{
+  bool parsed = (lex_token (lexer) == T_ID
+                 && rowtype_from_string (lex_tokss (lexer), rt));
+  if (parsed)
+    lex_get (lexer);
+  return parsed;
+}
+\f
+struct matrix_format
    {
-    static const struct fmt_spec fmt_tab[MXD_COUNT] =
+    bool span;
+    enum triangle
        {
-       {FMT_F, 4, 0},
-        {FMT_A, 8, 0},
-        {FMT_F, 4, 0},
-       {FMT_A, 8, 0},
-       {FMT_F, 10, 4},
-      };
-    
-    int i;
-
-    mx->first_continuous = -1;
-    for (i = 0; i < dict_get_var_cnt (dataset_dict (current_dataset)); i++)
+        LOWER,
+        UPPER,
+        FULL
+      }
+    triangle;
+    enum diagonal
        {
-       struct variable *v = dict_get_var (dataset_dict (current_dataset), i);
-        struct mxd_var *mv = v->aux;
-       int type = mv->var_type;
-       
-       assert (type >= 0 && type < MXD_COUNT);
-       v->print = v->write = fmt_tab[type];
-
-       if (type == MXD_CONTINUOUS)
-         mx->n_continuous++;
-       if (mx->first_continuous == -1 && type == MXD_CONTINUOUS)
-         mx->first_continuous = i;
+        DIAGONAL,
+        NO_DIAGONAL
        }
-  }
-
-  if (mx->n_continuous == 0)
-    {
-      msg (SE, _("No continuous variables specified."));
-      goto lossage;
-    }
-
-  mx->reader = dfm_open_reader (fh);
-  if (mx->reader == NULL)
-    goto lossage;
-
-  if (mx->explicit_rowtype)
-    ok = read_matrices_with_rowtype (mx);
-  else
-    ok = read_matrices_without_rowtype (mx);
-
-  dfm_close_reader (mx->reader);
-
-  pool_destroy (mx->container);
-
-  return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
+    diagonal;
  
-lossage:
-  discard_variables (current_dataset);
-  free (mx->factors);
-  pool_destroy (mx->container);
-  return CMD_CASCADING_FAILURE;
-}
+    bool input_rowtype;
+    struct variable **input_vars;
+    size_t n_input_vars;
  
-/* Look up string S as a content-type name and return the
-   corresponding enumerated value, or -1 if there is no match.  If
-   COLLIDE is non-NULL then *COLLIDE returns a value (suitable for use
-   as a bit-index) which can be used for determining whether a related
-   statistic has already been used. */
-static int
-string_to_content_type (char *s, int *collide)
-{
-  static const struct
-    {
-      int value;
-      int collide;
-      const char *string;
-    }
-  *tp,
-  tab[] = 
-    {
-      {N_VECTOR, 0, "N_VECTOR"},
-      {N_VECTOR, 0, "N"},
-      {N_SCALAR, 0, "N_SCALAR"},
-      {N_MATRIX, 1, "N_MATRIX"},
-      {MEAN, 2, "MEAN"},
-      {STDDEV, 3, "STDDEV"},
-      {STDDEV, 3, "SD"},
-      {COUNT, 4, "COUNT"},
-      {MSE, 5, "MSE"},
-      {DFE, 6, "DFE"},
-      {MAT, 7, "MAT"},
-      {COV, 8, "COV"},
-      {CORR, 9, "CORR"},
-      {PROX, 10, "PROX"},
-      {-1, -1, NULL},
-    };
-
-  for (tp = tab; tp->value != -1; tp++)
-    if (!strcasecmp (s, tp->string))
+    /* How to read matrices with each possible number of dimensions (0=scalar,
+       1=vector, 2=matrix). */
+    struct matrix_sched
        {
-       if (collide)
-         *collide = tp->collide;
-       
-       return tp->value;
+        /* Number of rows and columns in the matrix: (1,1) for a scalar, (1,n) for
+           a vector, (n,n) for a matrix. */
+        int nr, nc;
+
+        /* Rows of data to read and the number of columns in each.  Because we
+           often read just a triangle and sometimes omit the diagonal, 'n_rp' can
+           be less than 'nr' and 'rp[i]->y' isn't always 'y'. */
+        struct row_sched
+          {
+            /* The y-value of the row inside the matrix. */
+            int y;
+
+            /* first and last (exclusive) columns to read in this row. */
+            int x0, x1;
+          }
+          *rp;
+        size_t n_rp;
        }
-  return -1;
-}
+    ms[3];
+
+    struct variable *rowtype;
+    struct variable *varname;
+    struct variable **cvars;
+    int n_cvars;
+    struct variable **svars;
+    size_t *svar_indexes;
+    size_t n_svars;
+    struct variable **fvars;
+    size_t *fvar_indexes;
+    size_t n_fvars;
+    int cells;
+    int n;
+
+    unsigned int pooled_rowtype_mask;
+    unsigned int factor_rowtype_mask;
+
+    struct content
+      {
+        bool open;
+        enum rowtype rowtype;
+        bool close;
+      }
+    *contents;
+    size_t n_contents;
+  };
  
-/* Compare two variables using p.mxd.var_type and p.mxd.sub_type
-   fields. */
-static int
-compare_variables_by_mxd_var_type (const void *a_, const void *b_)
+static void
+matrix_format_uninit (struct matrix_format *mf)
  {
-  struct variable *const *pa = a_;
-  struct variable *const *pb = b_;
-  const struct mxd_var *a = (*pa)->aux;
-  const struct mxd_var *b = (*pb)->aux;
-  
-  if (a->var_type != b->var_type)
-    return a->var_type > b->var_type ? 1 : -1;
-  else
-    return a->sub_type < b->sub_type ? -1 : a->sub_type > b->sub_type;
+  free (mf->input_vars);
+  for (int i = 0; i < 3; i++)
+    free (mf->ms[i].rp);
+  free (mf->cvars);
+  free (mf->svars);
+  free (mf->svar_indexes);
+  free (mf->fvars);
+  free (mf->fvar_indexes);
+  free (mf->contents);
  }
  
-/* Attaches a struct mxd_var with the specific member values to
-   V. */
  static void
-attach_mxd_aux (struct variable *v, int var_type, int sub_type) 
+set_string (struct ccase *outcase, const struct variable *var,
+            struct substring src)
  {
-  struct mxd_var *mv;
-  
-  assert (v->aux == NULL);
-  mv = xmalloc (sizeof *mv);
-  mv->var_type = var_type;
-  mv->sub_type = sub_type;
-  var_attach_aux (v, mv, var_dtor_free);
+  struct substring dst = case_ss (outcase, var);
+  for (size_t i = 0; i < dst.length; i++)
+    dst.string[i] = i < src.length ? src.string[i] : ' ';
  }
-\f
-/* Matrix tokenizer. */
-
-/* Matrix token types. */
-enum matrix_token_type
-  {
-    MNUM,              /* Number. */
-    MSTR               /* String. */
-  };
-
-/* A MATRIX DATA parsing token. */
-struct matrix_token
-  {
-    enum matrix_token_type type; 
-    double number;       /* MNUM: token value. */
-    char *string;        /* MSTR: token string; not null-terminated. */
-    int length;          /* MSTR: tokstr length. */
-  };
-
-static int mget_token (struct matrix_token *, struct dfm_reader *);
-
-#if DEBUGGING
-#define mget_token(TOKEN, READER) mget_token_dump(TOKEN, READER)
  
  static void
-mdump_token (const struct matrix_token *token)
+parse_msg (struct dfm_reader *reader, const struct substring *token,
+           char *text, enum msg_severity severity)
  {
-  switch (token->type)
+  int first_column = 0;
+  if (token)
      {
-    case MNUM:
-      printf (" #%g", token->number);
-      break;
-    case MSTR:
-      printf (" '%.*s'", token->length, token->string);
-      break;
-    default:
-      NOT_REACHED ();
+      struct substring line = dfm_get_record (reader);
+      if (token->string >= line.string && token->string < ss_end (line))
+        first_column = ss_pointer_to_position (line, token->string) + 1;
      }
-  fflush (stdout);
+
+  int line_number = dfm_get_line_number (reader);
+  struct msg_location *location = xmalloc (sizeof *location);
+  int last_column = (first_column && token->length
+                     ? first_column + token->length - 1
+                     : 0);
+  *location = (struct msg_location) {
+    .file_name = intern_new (dfm_get_file_name (reader)),
+    .start = { .line = line_number, .column = first_column },
+    .end = { .line = line_number, .column = last_column },
+  };
+  struct msg *m = xmalloc (sizeof *m);
+  *m = (struct msg) {
+    .category = MSG_C_DATA,
+    .severity = severity,
+    .location = location,
+    .text = text,
+  };
+  msg_emit (m);
  }
  
-static int
-mget_token_dump (struct matrix_token *token, struct dfm_reader *reader)
+static void PRINTF_FORMAT (3, 4)
+parse_warning (struct dfm_reader *reader, const struct substring *token,
+               const char *format, ...)
  {
-  int result = (mget_token) (token, reader);
-  mdump_token (token);
-  return result;
+  va_list args;
+  va_start (args, format);
+  parse_msg (reader, token, xvasprintf (format, args), MSG_S_WARNING);
+  va_end (args);
  }
-#endif
  
-/* Return the current position in READER. */
-static const char *
-context (struct dfm_reader *reader)
+static void PRINTF_FORMAT (3, 4)
+parse_error (struct dfm_reader *reader, const struct substring *token,
+             const char *format, ...)
  {
-  static struct string buf = DS_EMPTY_INITIALIZER;
-
-  ds_clear (&buf);
-  if (dfm_eof (reader))
-    ds_assign_cstr (&buf, "at end of file");
-  else 
-    {
-      struct substring p;
-      
-      p = dfm_get_record (reader);
-      ss_ltrim (&p, ss_cstr (CC_SPACES));
-      if (ss_is_empty (p))
-        ds_assign_cstr (&buf, "at end of line");
-      else
-        ds_put_format (&buf, "before `%.*s'",
-                       (int) ss_cspan (p, ss_cstr (CC_SPACES)), ss_data (p));
-    }
-  
-  return ds_cstr (&buf);
+  va_list args;
+  va_start (args, format);
+  parse_msg (reader, token, xvasprintf (format, args), MSG_S_ERROR);
+  va_end (args);
  }
  
-/* Is there at least one token left in the data file? */
+/* Advance to beginning of next token. */
  static bool
-another_token (struct dfm_reader *reader)
+more_tokens (struct substring *p, struct dfm_reader *r)
  {
    for (;;)
      {
-      struct substring p;
-      size_t space_cnt;
-      
-      if (dfm_eof (reader))
-        return false;
-
-      p = dfm_get_record (reader);
-      space_cnt = ss_span (p, ss_cstr (CC_SPACES));
-      if (space_cnt < ss_length (p)) 
-        {
-          dfm_forward_columns (reader, space_cnt);
-          return true;
-        }
+      ss_ltrim (p, ss_cstr (CC_SPACES ","));
+      if (p->length)
+        return true;
  
-      dfm_forward_record (reader);
+      dfm_forward_record (r);
+      if (dfm_eof (r))
+        return false;
+      *p = dfm_get_record (r);
      }
-  NOT_REACHED();
  }
  
-/* Parse a MATRIX DATA token from READER into TOKEN. */
-static int
-(mget_token) (struct matrix_token *token, struct dfm_reader *reader)
+static bool
+next_token (struct substring *p, struct dfm_reader *r, struct substring *token)
  {
-  struct substring line, p;
-  struct substring s;
-  int c;
-
-  if (!another_token (reader))
-    return 0;
+  if (!more_tokens (p, r))
+    return false;
  
-  line = p = dfm_get_record (reader);
-
-  /* Three types of fields: quoted with ', quoted with ", unquoted. */
-  c = ss_first (p);
+  /* Collect token. */
+  int c = ss_first (*p);
    if (c == '\'' || c == '"')
      {
-      ss_get_char (&p);
-      if (!ss_get_until (&p, c, &s))
-        msg (SW, _("Scope of string exceeds line."));
+      ss_advance (p, 1);
+      ss_get_until (p, c, token);
      }
    else
      {
-      bool is_num = isdigit (c) || c == '.';
-      const char *start = ss_data (p);
-      
-      for (;;) 
+      size_t n = 1;
+      for (;;)
          {
-          c = ss_first (p);
-          if (strchr (CC_SPACES ",-+", c) != NULL)
+          c = ss_at (*p, n);
+          if (c == EOF
+              || ss_find_byte (ss_cstr (CC_SPACES ","), c) != SIZE_MAX
+              || ((c == '+' || c == '-')
+                  && ss_find_byte (ss_cstr ("dDeE"),
+                                   ss_at (*p, n - 1)) == SIZE_MAX))
              break;
-
-          if (isdigit (c))
-            is_num = true;
-          if (strchr ("deDE", c) && strchr ("+-", ss_at (p, 1)))
-            {
-              is_num = true;
-              ss_advance (&p, 2);
-            }
-          else
-            ss_advance (&p, 1);
+          n++;
          }
-      s = ss_buffer (start, ss_data (p) - start);
-
-      if (is_num)
-       {
-         struct data_in di;
-
-         di.s = ss_data (s);
-         di.e = ss_end (s);
-         di.v = (union value *) &token->number;
-         di.f1 = dfm_get_column (reader, di.s);
-         di.format = make_output_format (FMT_F, token->length, 0);
-
-         data_in (&di);
-       }
-      else
-       token->type = MSTR;
+      ss_get_bytes (p, n, token);
      }
-  token->string = ss_data (s);
-  token->length = ss_length (s);
-  
-  dfm_reread_record (reader, dfm_get_column (reader, ss_end (s)));
-    
-  return 1;
+  return true;
  }
  
-/* Forcibly skip the end of a line for content type CONTENT in
-   READER. */
-static int
-force_eol (struct dfm_reader *reader, const char *content)
+static bool
+next_number (struct substring *p, struct dfm_reader *r, double *d)
  {
-  struct substring p;
-
-  if (dfm_eof (reader))
-    return 0;
-
-  p = dfm_get_record (reader);
-  if (ss_span (p, ss_cstr (CC_SPACES)) != ss_length (p))
+  struct substring token;
+  if (!next_token (p, r, &token))
+    return false;
+
+  union value v;
+  char *error = data_in (token, dfm_reader_get_encoding (r), FMT_F,
+                         settings_get_fmt_settings (), &v, 0, NULL);
+  if (error)
      {
-      msg (SE, _("End of line expected %s while reading %s."),
-          context (reader), content);
-      return 0;
+      parse_error (r, &token, "%s", error);
+      free (error);
      }
-  
-  dfm_forward_record (reader);
-  return 1;
+  *d = v.f;
+  return true;
  }
-\f
-/* Back end, omitting ROWTYPE_. */
  
-struct nr_aux_data 
-  {
-    struct matrix_data_pgm *mx; /* MATRIX DATA program. */
-    double ***data;             /* MATRIX DATA data. */
-    double *factor_values;      /* Factor values. */
-    int max_cell_idx;           /* Max-numbered cell that we have
-                                   read so far, plus one. */
-    double *split_values;       /* SPLIT FILE variable values. */
-  };
-
-static bool nr_read_splits (struct nr_aux_data *, int compare);
-static bool nr_read_factors (struct nr_aux_data *, int cell);
-static bool nr_output_data (struct nr_aux_data *, struct ccase *,
-                            write_case_func *, write_case_data);
-static bool matrix_data_read_without_rowtype (struct case_source *source,
-                                              struct ccase *,
-                                              write_case_func *,
-                                              write_case_data);
-
-/* Read from the data file and write it to the active file.
-   Returns true if successful, false if an I/O error occurred. */
  static bool
-read_matrices_without_rowtype (struct matrix_data_pgm *mx)
+next_rowtype (struct substring *p, struct dfm_reader *r, enum rowtype *rt)
  {
-  struct nr_aux_data nr;
-  bool ok;
-  
-  if (mx->cells == -1)
-    mx->cells = 1;
-
-  nr.mx = mx;
-  nr.data = NULL;
-  nr.factor_values = xnmalloc (mx->n_factors * mx->cells,
-                               sizeof *nr.factor_values);
-  nr.max_cell_idx = 0;
-  nr.split_values = xnmalloc (dict_get_split_cnt (dataset_dict (current_dataset)),
-                              sizeof *nr.split_values);
-
-  proc_set_source (current_dataset, create_case_source (
-                     &matrix_data_without_rowtype_source_class, &nr));
-  
-  ok = procedure (current_dataset,NULL, NULL);
-
-  free (nr.split_values);
-  free (nr.factor_values);
-
-  return ok;
+  struct substring token;
+  if (!next_token (p, r, &token))
+    return false;
+
+  if (rowtype_from_string (token, rt))
+    return true;
+
+  parse_error (r, &token, _("Unknown row type \"%.*s\"."),
+               (int) token.length, token.string);
+  return false;
  }
  
-/* Mirror data across the diagonal of matrix CP which contains
-   CONTENT type data. */
-static void
-fill_matrix (struct matrix_data_pgm *mx, int content, double *cp)
-{
-  int type = content_type[content];
+struct read_matrix_params
+  {
+    /* Adjustments to first and last row to read. */
+    int dy0, dy1;
  
-  if (type == 1 && mx->section != FULL)
-    {
-      if (mx->diag == NODIAGONAL)
-       {
-         const double fill = content == CORR ? 1.0 : SYSMIS;
-         int i;
+    /* Left and right columns to read in first row, inclusive.
+       For x1, INT_MAX is the rightmost column. */
+    int x0, x1;
  
-         for (i = 0; i < mx->n_continuous; i++)
-           cp[i * (1 + mx->n_continuous)] = fill;
-       }
-      
-      {
-       int c, r;
-       
-       if (mx->section == LOWER)
-         {
-           int n_lines = mx->n_continuous;
-           if (mx->section != FULL && mx->diag == NODIAGONAL)
-             n_lines--;
-           
-           for (r = 1; r < n_lines; r++)
-             for (c = 0; c < r; c++)
-               cp[r + c * mx->n_continuous] = cp[c + r * mx->n_continuous];
-         }
-       else 
-         {
-           assert (mx->section == UPPER);
-           for (r = 1; r < mx->n_continuous; r++)
-             for (c = 0; c < r; c++)
-               cp[c + r * mx->n_continuous] = cp[r + c * mx->n_continuous];
-         }
-      }
+    /* Adjustment to x0 and x1 for each subsequent row we read.  Each of these
+       is 0 to keep it the same or -1 or +1 to adjust it by that much. */
+    int dx0, dx1;
+  };
+
+static const struct read_matrix_params *
+get_read_matrix_params (const struct matrix_format *mf)
+{
+  if (mf->triangle == FULL)
+    {
+      /* 1 2 3 4
+         2 1 5 6
+         3 5 1 7
+         4 6 7 1 */
+      static const struct read_matrix_params rmp = { 0, 0, 0, INT_MAX, 0, 0 };
+      return &rmp;
      }
-  else if (type == 2)
+  else if (mf->triangle == LOWER)
      {
-      int c;
-
-      for (c = 1; c < mx->n_continuous; c++)
-       cp[c] = cp[0];
+      if (mf->diagonal == DIAGONAL)
+        {
+          /* 1 . . .
+             2 1 . .
+             3 5 1 .
+             4 6 7 1 */
+          static const struct read_matrix_params rmp = { 0, 0, 0, 0, 0, 1 };
+          return &rmp;
+        }
+      else
+        {
+          /* . . . .
+             2 . . .
+             3 5 . .
+             4 6 7 . */
+          static const struct read_matrix_params rmp = { 1, 0, 0, 0, 0, 1 };
+          return &rmp;
+        }
      }
-}
-
-/* Read data lines for content type CONTENT from the data file.
-   If PER_FACTOR is nonzero, then factor information is read from
-   the data file.  Data is for cell number CELL. */
-static int
-nr_read_data_lines (struct nr_aux_data *nr,
-                    int per_factor, int cell, int content, int compare)
-{
-  struct matrix_data_pgm *mx = nr->mx;
-  const int type = content_type[content];               /* Content type. */
-  int n_lines; /* Number of lines to parse from data file for this type. */
-  double *cp;                   /* Current position in vector or matrix. */
-  int i;
-
-  if (type != 1)
-    n_lines = 1;
-  else
+  else if (mf->triangle == UPPER)
      {
-      n_lines = mx->n_continuous;
-      if (mx->section != FULL && mx->diag == NODIAGONAL)
-       n_lines--;
+      if (mf->diagonal == DIAGONAL)
+        {
+          /* 1 2 3 4
+             . 1 5 6
+             . . 1 7
+             . . . 1 */
+          static const struct read_matrix_params rmp = { 0, 0, 0, INT_MAX, 1, 0 };
+          return &rmp;
+        }
+      else
+        {
+          /* . 2 3 4
+             . . 5 6
+             . . . 7
+             . . . . */
+          static const struct read_matrix_params rmp = { 0, -1, 1, INT_MAX, 1, 0 };
+          return &rmp;
+        }
      }
+  else
+    NOT_REACHED ();
+}
  
-  cp = nr->data[content][cell];
-  if (type == 1 && mx->section == LOWER && mx->diag == NODIAGONAL)
-    cp += mx->n_continuous;
-
-  for (i = 0; i < n_lines; i++)
+static void
+schedule_matrices (struct matrix_format *mf)
+{
+  struct matrix_sched *ms0 = &mf->ms[0];
+  ms0->nr = 1;
+  ms0->nc = 1;
+  ms0->rp = xmalloc (sizeof *ms0->rp);
+  ms0->rp[0] = (struct row_sched) { .y = 0, .x0 = 0, .x1 = 1 };
+  ms0->n_rp = 1;
+
+  struct matrix_sched *ms1 = &mf->ms[1];
+  ms1->nr = 1;
+  ms1->nc = mf->n_cvars;
+  ms1->rp = xmalloc (sizeof *ms1->rp);
+  ms1->rp[0] = (struct row_sched) { .y = 0, .x0 = 0, .x1 = mf->n_cvars };
+  ms1->n_rp = 1;
+
+  struct matrix_sched *ms2 = &mf->ms[2];
+  ms2->nr = mf->n_cvars;
+  ms2->nc = mf->n_cvars;
+  ms2->rp = xmalloc (mf->n_cvars * sizeof *ms2->rp);
+  ms2->n_rp = 0;
+
+  const struct read_matrix_params *rmp = get_read_matrix_params (mf);
+  int x0 = rmp->x0;
+  int x1 = rmp->x1 < mf->n_cvars ? rmp->x1 : mf->n_cvars - 1;
+  int y0 = rmp->dy0;
+  int y1 = (int) mf->n_cvars + rmp->dy1;
+  for (int y = y0; y < y1; y++)
      {
-      int n_cols;
-      
-      if (!nr_read_splits (nr, 1))
-       return 0;
-      if (per_factor && !nr_read_factors (nr, cell))
-       return 0;
-      compare = 1;
-
-      switch (type)
-       {
-       case 0:
-         n_cols = mx->n_continuous;
-         break;
-       case 1:
-         switch (mx->section)
-           {
-           case LOWER:
-             n_cols = i + 1;
-             break;
-           case UPPER:
-             cp += i;
-             n_cols = mx->n_continuous - i;
-             if (mx->diag == NODIAGONAL)
-               {
-                 n_cols--;
-                 cp++;
-               }
-             break;
-           case FULL:
-             n_cols = mx->n_continuous;
-             break;
-           default:
-              NOT_REACHED ();
-           }
-         break;
-       case 2:
-         n_cols = 1;
-         break;
-       default:
-          NOT_REACHED ();
-       }
+      assert (x0 >= 0 && x0 < mf->n_cvars);
+      assert (x1 >= 0 && x1 < mf->n_cvars);
+      assert (x1 >= x0);
  
-      {
-       int j;
-       
-       for (j = 0; j < n_cols; j++)
-         {
-            struct matrix_token token;
-           if (!mget_token (&token, mx->reader))
-             return 0;
-           if (token.type != MNUM)
-             {
-               msg (SE, _("expecting value for %s %s"),
-                    dict_get_var (dataset_dict (current_dataset), j)->name,
-                     context (mx->reader));
-               return 0;
-             }
-
-           *cp++ = token.number;
-         }
-       if (mx->fmt != FREE
-            && !force_eol (mx->reader, content_names[content]))
-         return 0;
-      }
+      ms2->rp[ms2->n_rp++] = (struct row_sched) {
+        .y = y, .x0 = x0, .x1 = x1 + 1
+      };
  
-      if (mx->section == LOWER)
-       cp += mx->n_continuous - n_cols;
+      x0 += rmp->dx0;
+      x1 += rmp->dx1;
      }
-
-  fill_matrix (mx, content, nr->data[content][cell]);
-
-  return 1;
  }
  
-/* When ROWTYPE_ does not appear in the data, reads the matrices and
-   writes them to the output file.
-   Returns true if successful, false if an I/O error occurred. */
  static bool
-matrix_data_read_without_rowtype (struct case_source *source,
-                                  struct ccase *c,
-                                  write_case_func *write_case,
-                                  write_case_data wc_data)
+read_id_columns (const struct matrix_format *mf,
+                 struct substring *p, struct dfm_reader *r,
+                 double *d, enum rowtype *rt)
  {
-  struct nr_aux_data *nr = source->aux;
-  struct matrix_data_pgm *mx = nr->mx;
-
-  {
-    int *cp;
+  for (size_t i = 0; mf->input_vars[i] != mf->cvars[0]; i++)
+    if (!(mf->input_vars[i] == mf->rowtype
+          ? next_rowtype (p, r, rt)
+          : next_number (p, r, &d[i])))
+      return false;
+  return true;
+}
  
-    nr->data = pool_nalloc (mx->container, PROX + 1, sizeof *nr->data);
-    
-    {
-      int i;
+static bool
+equal_id_columns (const struct matrix_format *mf,
+                  const double *a, const double *b)
+{
+  for (size_t i = 0; mf->input_vars[i] != mf->cvars[0]; i++)
+    if (mf->input_vars[i] != mf->rowtype && a[i] != b[i])
+      return false;
+  return true;
+}
  
-      for (i = 0; i <= PROX; i++)
-       nr->data[i] = NULL;
-    }
-    
-    for (cp = mx->contents; *cp != EOC; cp++)
-      if (*cp != LPAREN && *cp != RPAREN)
-       {
-         int per_factor = mx->is_per_factor[*cp];
-         int n_entries;
-         
-         n_entries = mx->n_continuous;
-         if (content_type[*cp] == 1)
-           n_entries *= mx->n_continuous;
-         
-         {
-           int n_vectors = per_factor ? mx->cells : 1;
-           int i;
-           
-           nr->data[*cp] = pool_nalloc (mx->container,
-                                         n_vectors, sizeof **nr->data);
-           
-           for (i = 0; i < n_vectors; i++)
-             nr->data[*cp][i] = pool_nalloc (mx->container,
-                                              n_entries, sizeof ***nr->data);
-         }
-       }
-  }
-  
-  for (;;)
+static bool
+equal_split_columns (const struct matrix_format *mf,
+                     const double *a, const double *b)
+{
+  for (size_t i = 0; i < mf->n_svars; i++)
      {
-      int *bp, *ep, *np;
-      
-      if (!nr_read_splits (nr, 0))
-       return true;
-      
-      for (bp = mx->contents; *bp != EOC; bp = np)
-       {
-         int per_factor;
-
-         /* Trap the CONTENTS that we should parse in this pass
-            between bp and ep.  Set np to the starting bp for next
-            iteration. */
-         if (*bp == LPAREN)
-           {
-             ep = ++bp;
-             while (*ep != RPAREN)
-               ep++;
-             np = &ep[1];
-             per_factor = 1;
-           }
-         else
-           {
-             ep = &bp[1];
-             while (*ep != EOC && *ep != LPAREN)
-               ep++;
-             np = ep;
-             per_factor = 0;
-           }
-         
-         {
-           int i;
-             
-           for (i = 0; i < (per_factor ? mx->cells : 1); i++)
-             {
-               int *cp;
-
-               for (cp = bp; cp < ep; cp++) 
-                 if (!nr_read_data_lines (nr, per_factor, i, *cp, cp != bp))
-                   return true;
-             }
-         }
-       }
-
-      if (!nr_output_data (nr, c, write_case, wc_data))
+      size_t idx = mf->svar_indexes[i];
+      if (a[idx] != b[idx])
          return false;
-
-      if (dict_get_split_cnt (dataset_dict (current_dataset)) == 0
-          || !another_token (mx->reader))
-       return true;
      }
+  return true;
  }
  
-/* Read the split file variables.  If COMPARE is 1, compares the
-   values read to the last values read and returns true if they're equal,
-   false otherwise. */
  static bool
-nr_read_splits (struct nr_aux_data *nr, int compare)
+is_pooled (const struct matrix_format *mf, const double *d)
+{
+  for (size_t i = 0; i < mf->n_fvars; i++)
+    if (d[mf->fvar_indexes[i]] != SYSMIS)
+      return false;
+  return true;
+}
+
+static void
+matrix_sched_init (const struct matrix_format *mf, enum rowtype rt,
+                   gsl_matrix *m)
+{
+  int n_dims = rowtype_dimensions (rt);
+  const struct matrix_sched *ms = &mf->ms[n_dims];
+  double diagonal = n_dims < 2 || rt != C_CORR ? SYSMIS : 1.0;
+  for (size_t y = 0; y < ms->nr; y++)
+    for (size_t x = 0; x < ms->nc; x++)
+      gsl_matrix_set (m, y, x, y == x ? diagonal : SYSMIS);
+}
+
+static void
+matrix_sched_output (const struct matrix_format *mf, enum rowtype rt,
+                     gsl_matrix *m, const double *d, int split_num,
+                     struct casewriter *w)
  {
-  struct matrix_data_pgm *mx = nr->mx;
-  static int just_read = 0; /* FIXME: WTF? */
-  size_t split_cnt;
-  size_t i;
+  int n_dims = rowtype_dimensions (rt);
+  const struct matrix_sched *ms = &mf->ms[n_dims];
  
-  if (compare && just_read)
+  if (rt == C_N_SCALAR)
      {
-      just_read = 0;
-      return true;
+      for (size_t x = 1; x < mf->n_cvars; x++)
+        gsl_matrix_set (m, 0, x, gsl_matrix_get (m, 0, 0));
+      rt = C_N;
      }
-  
-  if (dict_get_split_vars (dataset_dict (current_dataset)) == NULL)
-    return true;
  
-  if (mx->single_split)
+  for (int y = 0; y < ms->nr; y++)
      {
-      if (!compare) 
-        {
-          struct mxd_var *mv = dict_get_split_vars (dataset_dict (current_dataset))[0]->aux;
-          nr->split_values[0] = ++mv->sub_type; 
-        }
-      return true;
+      struct ccase *c = case_create (casewriter_get_proto (w));
+      for (size_t i = 0; mf->input_vars[i] != mf->cvars[0]; i++)
+        if (mf->input_vars[i] != mf->rowtype)
+          *case_num_rw (c, mf->input_vars[i]) = d[i];
+      if (mf->n_svars && !mf->svar_indexes)
+        *case_num_rw (c, mf->svars[0]) = split_num;
+      set_string (c, mf->rowtype, rowtype_name (rt));
+      const char *varname = n_dims == 2 ? var_get_name (mf->cvars[y]) : "";
+      set_string (c, mf->varname, ss_cstr (varname));
+      for (int x = 0; x < mf->n_cvars; x++)
+        *case_num_rw (c, mf->cvars[x]) = gsl_matrix_get (m, y, x);
+      casewriter_write (w, c);
      }
+}
  
-  if (!compare)
-    just_read = 1;
+static void
+matrix_sched_output_n (const struct matrix_format *mf, double n,
+                       gsl_matrix *m, const double *d, int split_num,
+                       struct casewriter *w)
+{
+  gsl_matrix_set (m, 0, 0, n);
+  matrix_sched_output (mf, C_N_SCALAR, m, d, split_num, w);
+}
  
-  split_cnt = dict_get_split_cnt (dataset_dict (current_dataset));
-  for (i = 0; i < split_cnt; i++) 
+static void
+check_eol (const struct matrix_format *mf, struct substring *p,
+           struct dfm_reader *r)
+{
+  if (!mf->span)
      {
-      struct matrix_token token;
-      if (!mget_token (&token, mx->reader))
-        return false;
-      if (token.type != MNUM)
+      ss_ltrim (p, ss_cstr (CC_SPACES ","));
+      if (p->length)
          {
-          msg (SE, _("Syntax error expecting SPLIT FILE value %s."),
-               context (mx->reader));
-          return false;
-        }
-
-      if (!compare)
-        nr->split_values[i] = token.number;
-      else if (nr->split_values[i] != token.number)
-        {
-          msg (SE, _("Expecting value %g for %s."),
-               nr->split_values[i],
-               dict_get_split_vars (dataset_dict (current_dataset))[i]->name);
-          return false;
+          parse_error (r, p, _("Extraneous data expecting end of line."));
+          p->length = 0;
          }
      }
-
-  return true;
  }
  
-/* Read the factors for cell CELL.  If COMPARE is 1, compares the
-   values read to the last values read and returns true if they're equal,
-   false otherwise. */
-static bool
-nr_read_factors (struct nr_aux_data *nr, int cell)
+static void
+parse_data_with_rowtype (const struct matrix_format *mf,
+                         struct dfm_reader *r, struct casewriter *w)
  {
-  struct matrix_data_pgm *mx = nr->mx;
-  bool compare;
-  
-  if (mx->n_factors == 0)
-    return true;
+  if (dfm_eof (r))
+    return;
+  struct substring p = dfm_get_record (r);
  
-  assert (nr->max_cell_idx >= cell);
-  if (cell != nr->max_cell_idx)
-    compare = true;
-  else
-    {
-      compare = false;
-      nr->max_cell_idx++;
-    }
-      
-  {
-    size_t i;
-    
-    for (i = 0; i < mx->n_factors; i++)
-      {
-        struct matrix_token token;
-       if (!mget_token (&token, mx->reader))
-         return false;
-       if (token.type != MNUM)
-         {
-           msg (SE, _("Syntax error expecting factor value %s."),
-                context (mx->reader));
-           return false;
-         }
-       
-       if (!compare)
-         nr->factor_values[i + mx->n_factors * cell] = token.number;
-       else if (nr->factor_values[i + mx->n_factors * cell] != token.number)
-         {
-           msg (SE, _("Syntax error expecting value %g for %s %s."),
-                nr->factor_values[i + mx->n_factors * cell],
-                mx->factors[i]->name, context (mx->reader));
-           return false;
-         }
-      }
-  }
+  double *prev = NULL;
+  gsl_matrix *m = gsl_matrix_alloc (mf->n_cvars, mf->n_cvars);
  
-  return true;
-}
+  double *d = xnmalloc (mf->n_input_vars, sizeof *d);
+  enum rowtype rt;
  
-/* Write the contents of a cell having content type CONTENT and data
-   CP to the active file.
-   Returns true if successful, false if an I/O error occurred. */
-static bool
-dump_cell_content (struct matrix_data_pgm *mx, int content, double *cp,
-                   struct ccase *c,
-                   write_case_func *write_case, write_case_data wc_data)
-{
-  int type = content_type[content];
+  double *d_next = xnmalloc (mf->n_input_vars, sizeof *d_next);
  
-  {
-    buf_copy_str_rpad (case_data_rw (c, mx->rowtype_->fv)->s, 8,
-                       content_names[content]);
-    
-    if (type != 1)
-      memset (case_data_rw (c, mx->varname_->fv)->s, ' ', 8);
-  }
+  if (!read_id_columns (mf, &p, r, d, &rt))
+    goto exit;
+  for (;;)
+    {
+      /* If this has rowtype N but there was an N subcommand, then the
+         subcommand takes precedence, so we will suppress outputting this
+         record.  We still need to parse it, though, so we can't skip other
+         work. */
+      bool suppress_output = mf->n >= 0 && (rt == C_N || rt == C_N_SCALAR);
+      if (suppress_output)
+        parse_error (r, NULL, _("N record is not allowed with N subcommand.  "
+                                "Ignoring N record."));
+
+      /* If there's an N subcommand, and this is a new split, then output an N
+         record. */
+      if (mf->n >= 0 && (!prev || !equal_split_columns (mf, prev, d)))
+        {
+          matrix_sched_output_n (mf, mf->n, m, d, 0, w);
  
-  {
-    int n_lines = (type == 1) ? mx->n_continuous : 1;
-    int i;
-               
-    for (i = 0; i < n_lines; i++)
-      {
-       int j;
-
-       for (j = 0; j < mx->n_continuous; j++)
-         {
-            int fv = dict_get_var (dataset_dict (current_dataset), mx->first_continuous + j)->fv;
-            case_data_rw (c, fv)->f = *cp;
-           cp++;
-         }
-       if (type == 1)
-         buf_copy_str_rpad (case_data_rw (c, mx->varname_->fv)->s, 8,
-                             dict_get_var (dataset_dict (current_dataset),
-                                           mx->first_continuous + i)->name);
-       if (!write_case (wc_data))
-          return false;
-      }
-  }
-  return true;
-}
+          if (!prev)
+            prev = xnmalloc (mf->n_input_vars, sizeof *prev);
+          memcpy (prev, d, mf->n_input_vars * sizeof *prev);
+        }
  
-/* Finally dump out everything from nr_data[] to the output file. */
-static bool
-nr_output_data (struct nr_aux_data *nr, struct ccase *c,
-                write_case_func *write_case, write_case_data wc_data)
-{
-  struct matrix_data_pgm *mx = nr->mx;
-  
-  {
-    struct variable *const *split;
-    size_t split_cnt;
-    size_t i;
+      /* Usually users don't provide the CONTENTS subcommand with ROWTYPE_, but
+         if they did then warn if ROWTYPE_ is an unexpected type. */
+      if (mf->factor_rowtype_mask || mf->pooled_rowtype_mask)
+        {
+          const char *name = rowtype_name (rt).string;
+          if (is_pooled (mf, d))
+            {
+              if (!((1u << rt) & mf->pooled_rowtype_mask))
+                parse_warning (r, NULL, _("Data contains pooled row type %s not "
+                                          "included in CONTENTS."), name);
+            }
+          else
+            {
+              if (!((1u << rt) & mf->factor_rowtype_mask))
+                parse_warning (r, NULL, _("Data contains with-factors row type "
+                                          "%s not included in CONTENTS."), name);
+            }
+        }
  
-    split_cnt = dict_get_split_cnt (dataset_dict (current_dataset));
-    split = dict_get_split_vars (dataset_dict (current_dataset));
-    for (i = 0; i < split_cnt; i++)
-      case_data_rw (c, split[i]->fv)->f = nr->split_values[i];
-  }
+      /* Initialize the matrix to be filled-in. */
+      int n_dims = rowtype_dimensions (rt);
+      const struct matrix_sched *ms = &mf->ms[n_dims];
+      matrix_sched_init (mf, rt, m);
  
-  if (mx->n_factors)
-    {
-      int cell;
+      enum rowtype rt_next;
+      bool eof;
  
-      for (cell = 0; cell < mx->cells; cell++)
-       {
-         {
-           size_t factor;
-
-           for (factor = 0; factor < mx->n_factors; factor++)
-              case_data_rw (c, mx->factors[factor]->fv)->f
-                = nr->factor_values[factor + cell * mx->n_factors];
-         }
-         
-         {
-           int content;
-           
-           for (content = 0; content <= PROX; content++)
-             if (mx->is_per_factor[content])
-               {
-                 assert (nr->data[content] != NULL
-                         && nr->data[content][cell] != NULL);
+      size_t n_rows;
+      for (n_rows = 1; ; n_rows++)
+        {
+          if (n_rows <= ms->n_rp)
+            {
+              const struct row_sched *rs = &ms->rp[n_rows - 1];
+              size_t y = rs->y;
+              for (size_t x = rs->x0; x < rs->x1; x++)
+                {
+                  double e;
+                  if (!next_number (&p, r, &e))
+                    goto exit;
+                  gsl_matrix_set (m, y, x, e);
+                  if (n_dims == 2 && mf->triangle != FULL)
+                    gsl_matrix_set (m, x, y, e);
+                }
+              check_eol (mf, &p, r);
+            }
+          else
+            {
+              /* Suppress bad input data.  We'll issue an error later. */
+              p.length = 0;
+            }
  
-                 if (!dump_cell_content (mx, content, nr->data[content][cell],
-                                          c, write_case, wc_data))
-                    return false;
-               }
-         }
-       }
-    }
+          eof = (!more_tokens (&p, r)
+                 || !read_id_columns (mf, &p, r, d_next, &rt_next));
+          if (eof)
+            break;
  
-  {
-    int content;
-    
-    {
-      size_t factor;
+          if (!equal_id_columns (mf, d, d_next) || rt_next != rt)
+            break;
+        }
+      if (!suppress_output)
+        matrix_sched_output (mf, rt, m, d, 0, w);
+
+      if (n_rows != ms->n_rp)
+        parse_error (r, NULL,
+                     _("Matrix %s had %zu rows but %zu rows were expected."),
+                     rowtype_name (rt).string, n_rows, ms->n_rp);
+      if (eof)
+        break;
  
-      for (factor = 0; factor < mx->n_factors; factor++)
-       case_data_rw (c, mx->factors[factor]->fv)->f = SYSMIS;
+      double *d_tmp = d;
+      d = d_next;
+      d_next = d_tmp;
+
+      rt = rt_next;
      }
-    
-    for (content = 0; content <= PROX; content++)
-      if (!mx->is_per_factor[content] && nr->data[content] != NULL) 
-        {
-          if (!dump_cell_content (mx, content, nr->data[content][0],
-                                  c, write_case, wc_data))
-            return false; 
-        }
-  }
  
-  return true;
+exit:
+  free (prev);
+  gsl_matrix_free (m);
+  free (d);
+  free (d_next);
  }
-\f
-/* Back end, with ROWTYPE_. */
  
-/* All the data for one set of factor values. */
-struct factor_data
-  {
-    double *factors;
-    int n_rows[PROX + 1];
-    double *data[PROX + 1];
-    struct factor_data *next;
-  };
+static void
+parse_matrix_without_rowtype (const struct matrix_format *mf,
+                              struct substring *p, struct dfm_reader *r,
+                              gsl_matrix *m, enum rowtype rowtype, bool pooled,
+                              int split_num, struct casewriter *w)
+{
+  int n_dims = rowtype_dimensions (rowtype);
+  const struct matrix_sched *ms = &mf->ms[n_dims];
  
-/* With ROWTYPE_ auxiliary data. */
-struct wr_aux_data 
-  {
-    struct matrix_data_pgm *mx;         /* MATRIX DATA program. */
-    int content;                        /* Type of current row. */
-    double *split_values;               /* SPLIT FILE variable values. */
-    struct factor_data *data;           /* All the data. */
-    struct factor_data *current;        /* Current factor. */
-  };
+  double *d = xnmalloc (mf->n_input_vars, sizeof *d);
+  matrix_sched_init (mf, rowtype, m);
+  for (size_t i = 0; i < ms->n_rp; i++)
+    {
+      int y = ms->rp[i].y;
+      int k = 0;
+      int h = 0;
+      for (size_t j = 0; j < mf->n_input_vars; j++)
+        {
+          const struct variable *iv = mf->input_vars[j];
+          if (k < mf->n_cvars && iv == mf->cvars[k])
+            {
+              if (k < ms->rp[i].x1 - ms->rp[i].x0)
+                {
+                  double e;
+                  if (!next_number (p, r, &e))
+                    goto exit;
+
+                  int x = k + ms->rp[i].x0;
+                  gsl_matrix_set (m, y, x, e);
+                  if (n_dims == 2 && mf->triangle != FULL)
+                    gsl_matrix_set (m, x, y, e);
+                }
+              k++;
+              continue;
+            }
+          if (h < mf->n_fvars && iv == mf->fvars[h])
+            {
+              h++;
+              if (pooled)
+                {
+                  d[j] = SYSMIS;
+                  continue;
+                }
+            }
  
-static bool wr_read_splits (struct wr_aux_data *, struct ccase *,
-                           write_case_func *, write_case_data);
-static bool wr_output_data (struct wr_aux_data *, struct ccase *,
-                           write_case_func *, write_case_data);
-static bool wr_read_rowtype (struct wr_aux_data *, 
-                            const struct matrix_token *, struct dfm_reader *);
-static bool wr_read_factors (struct wr_aux_data *);
-static bool wr_read_indeps (struct wr_aux_data *);
-static bool matrix_data_read_with_rowtype (struct case_source *,
-                                           struct ccase *,
-                                           write_case_func *,
-                                           write_case_data);
-
-/* When ROWTYPE_ appears in the data, reads the matrices and writes
-   them to the output file.
-   Returns true if successful, false if an I/O error occurred. */
-static bool
-read_matrices_with_rowtype (struct matrix_data_pgm *mx)
-{
-  struct wr_aux_data wr;
-  bool ok;
-
-  wr.mx = mx;
-  wr.content = -1;
-  wr.split_values = NULL;
-  wr.data = NULL;
-  wr.current = NULL;
-  mx->cells = 0;
-
-  proc_set_source (current_dataset, 
-                  create_case_source (&matrix_data_with_rowtype_source_class,
-                                       &wr));
-  ok = procedure (current_dataset,NULL, NULL);
-
-  free (wr.split_values);
-  return ok;
+          double e;
+          if (!next_number (p, r, &e))
+            goto exit;
+          d[j] = e;
+        }
+      check_eol (mf, p, r);
+    }
+
+  matrix_sched_output (mf, rowtype, m, d, split_num, w);
+exit:
+  free (d);
  }
  
-/* Read from the data file and write it to the active file.
-   Returns true if successful, false if an I/O error occurred. */
-static bool
-matrix_data_read_with_rowtype (struct case_source *source,
-                               struct ccase *c,
-                               write_case_func *write_case,
-                               write_case_data wc_data)
+static void
+parse_data_without_rowtype (const struct matrix_format *mf,
+                            struct dfm_reader *r, struct casewriter *w)
  {
-  struct wr_aux_data *wr = source->aux;
-  struct matrix_data_pgm *mx = wr->mx;
+  if (dfm_eof (r))
+    return;
+  struct substring p = dfm_get_record (r);
  
+  gsl_matrix *m = gsl_matrix_alloc (mf->n_cvars, mf->n_cvars);
+
+  int split_num = 1;
    do
      {
-      if (!wr_read_splits (wr, c, write_case, wc_data))
-       return true;
+      for (size_t i = 0; i < mf->n_contents; )
+        {
+          size_t j = i;
+          if (mf->contents[i].open)
+            while (!mf->contents[j].close)
+              j++;
  
-      if (!wr_read_factors (wr))
-       return true;
+          if (mf->contents[i].open)
+            {
+              for (size_t k = 0; k < mf->cells; k++)
+                for (size_t h = i; h <= j; h++)
+                  parse_matrix_without_rowtype (mf, &p, r, m,
+                                                mf->contents[h].rowtype, false,
+                                                split_num, w);
+            }
+          else
+            parse_matrix_without_rowtype (mf, &p, r, m, mf->contents[i].rowtype,
+                                          true, split_num, w);
+          i = j + 1;
+        }
  
-      if (!wr_read_indeps (wr))
-       return true;
+      split_num++;
      }
-  while (another_token (mx->reader));
+  while (more_tokens (&p, r));
  
-  return wr_output_data (wr, c, write_case, wc_data);
+  gsl_matrix_free (m);
  }
  
-/* Read the split file variables.  If they differ from the previous
-   set of split variables then output the data.  Returns success. */
-static bool 
-wr_read_splits (struct wr_aux_data *wr,
-                struct ccase *c,
-                write_case_func *write_case, write_case_data wc_data)
+/* Parses VARIABLES=varnames for MATRIX DATA and returns a dictionary with the
+   named variables in it. */
+static struct dictionary *
+parse_matrix_data_variables (struct lexer *lexer)
  {
-  struct matrix_data_pgm *mx = wr->mx;
-  bool compare;
-  size_t split_cnt;
+  if (!lex_force_match_id (lexer, "VARIABLES"))
+    return NULL;
+  lex_match (lexer, T_EQUALS);
  
-  split_cnt = dict_get_split_cnt (dataset_dict (current_dataset));
-  if (split_cnt == 0)
-    return true;
+  struct dictionary *dict = dict_create (get_default_encoding ());
  
-  if (wr->split_values)
-    compare = true;
-  else
+  size_t n_names = 0;
+  char **names = NULL;
+  if (!parse_DATA_LIST_vars (lexer, dict, &names, &n_names, PV_NO_DUPLICATE))
      {
-      compare = false;
-      wr->split_values = xnmalloc (split_cnt, sizeof *wr->split_values);
+      dict_unref (dict);
+      return NULL;
      }
-  
-  {
-    bool different = false;
-    int i;
  
-    for (i = 0; i < split_cnt; i++)
-      {
-        struct matrix_token token;
-       if (!mget_token (&token, mx->reader))
-         return false;
-       if (token.type != MNUM)
-         {
-           msg (SE, _("Syntax error %s expecting SPLIT FILE value."),
-                context (mx->reader));
-           return false;
-         }
-
-       if (compare && wr->split_values[i] != token.number && !different)
-         {
-           if (!wr_output_data (wr, c, write_case, wc_data))
-             return 0;
-           different = true;
-           mx->cells = 0;
-         }
-       wr->split_values[i] = token.number;
-      }
-  }
+  for (size_t i = 0; i < n_names; i++)
+    if (!strcasecmp (names[i], "ROWTYPE_"))
+      dict_create_var_assert (dict, "ROWTYPE_", 8);
+    else
+      dict_create_var_assert (dict, names[i], 0);
  
-  return true;
-}
+  for (size_t i = 0; i < n_names; ++i)
+    free (names[i]);
+  free (names);
  
-/* Compares doubles A and B, treating SYSMIS as greatest. */
-static int
-compare_doubles (const void *a_, const void *b_, void *aux UNUSED)
-{
-  const double *a = a_;
-  const double *b = b_;
-
-  if (*a == *b)
-    return 0;
-  else if (*a == SYSMIS)
-    return 1;
-  else if (*b == SYSMIS)
-    return -1;
-  else if (*a > *b)
-    return 1;
-  else
-    return -1;
+  if (dict_lookup_var (dict, "VARNAME_"))
+    {
+      msg (SE, _("VARIABLES may not include VARNAME_."));
+      dict_unref (dict);
+      return NULL;
+    }
+  return dict;
  }
  
-/* Return strcmp()-type comparison of the MX->n_factors factors at _A and
-   _B.  Sort missing values toward the end. */
-static int
-compare_factors (const void *a_, const void *b_, void *mx_)
+static bool
+parse_matrix_data_subvars (struct lexer *lexer, struct dictionary *dict,
+                           bool *taken_vars,
+                           struct variable ***vars, size_t **indexes,
+                           size_t *n_vars)
  {
-  struct matrix_data_pgm *mx = mx_;
-  struct factor_data *const *pa = a_;
-  struct factor_data *const *pb = b_;
-  const double *a = (*pa)->factors;
-  const double *b = (*pb)->factors;
-
-  return lexicographical_compare_3way (a, mx->n_factors,
-                                       b, mx->n_factors,
-                                       sizeof *a,
-                                       compare_doubles, NULL);
+  if (!parse_variables (lexer, dict, vars, n_vars, 0))
+    return false;
+
+  *indexes = xnmalloc (*n_vars, sizeof **indexes);
+  for (size_t i = 0; i < *n_vars; i++)
+    {
+      struct variable *v = (*vars)[i];
+      if (!strcasecmp (var_get_name (v), "ROWTYPE_"))
+        {
+          msg (SE, _("ROWTYPE_ is not allowed on SPLIT or FACTORS."));
+          goto error;
+        }
+      (*indexes)[i] = var_get_dict_index (v);
+
+      bool *tv = &taken_vars[var_get_dict_index (v)];
+      if (*tv)
+        {
+          msg (SE, _("%s may not appear on both SPLIT and FACTORS."),
+               var_get_name (v));
+          goto error;
+        }
+      *tv = true;
+
+      var_set_both_formats (v, &(struct fmt_spec) { .type = FMT_F, .w = 4 });
+    }
+  return true;
+
+error:
+  free (*vars);
+  *vars = NULL;
+  *n_vars = 0;
+  free (*indexes);
+  *indexes = NULL;
+  return false;
  }
  
-/* Write out the data for the current split file to the active
-   file.
-   Returns true if successful, false if an I/O error occurred. */
-static bool
-wr_output_data (struct wr_aux_data *wr,
-                struct ccase *c,
-                write_case_func *write_case, write_case_data wc_data)
+int
+cmd_matrix_data (struct lexer *lexer, struct dataset *ds)
  {
-  struct matrix_data_pgm *mx = wr->mx;
-  bool ok = true;
+  struct dictionary *dict = parse_matrix_data_variables (lexer);
+  if (!dict)
+    return CMD_FAILURE;
  
-  {
-    struct variable *const *split;
-    size_t split_cnt;
-    size_t i;
+  size_t n_input_vars = dict_get_n_vars (dict);
+  struct variable **input_vars = xnmalloc (n_input_vars, sizeof *input_vars);
+  for (size_t i = 0; i < n_input_vars; i++)
+    input_vars[i] = dict_get_var (dict, i);
  
-    split_cnt = dict_get_split_cnt (dataset_dict (current_dataset));
-    split = dict_get_split_vars (dataset_dict (current_dataset));
-    for (i = 0; i < split_cnt; i++)
-      case_data_rw (c, split[i]->fv)->f = wr->split_values[i];
-  }
+  int varname_width = 8;
+  for (size_t i = 0; i < n_input_vars; i++)
+    {
+      int w = strlen (var_get_name (input_vars[i]));
+      varname_width = MAX (w, varname_width);
+    }
  
-  /* Sort the wr->data list. */
-  {
-    struct factor_data **factors;
-    struct factor_data *iter;
-    int i;
+  struct variable *rowtype = dict_lookup_var (dict, "ROWTYPE_");
+  bool input_rowtype = rowtype != NULL;
+  if (!rowtype)
+    rowtype = dict_create_var_assert (dict, "ROWTYPE_", 8);
  
-    factors = xnmalloc (mx->cells, sizeof *factors);
+  struct matrix_format mf = {
+    .input_rowtype = input_rowtype,
+    .input_vars = input_vars,
+    .n_input_vars = n_input_vars,
  
-    for (i = 0, iter = wr->data; iter; iter = iter->next, i++)
-      factors[i] = iter;
+    .rowtype = rowtype,
+    .varname = dict_create_var_assert (dict, "VARNAME_", varname_width),
  
-    sort (factors, mx->cells, sizeof *factors, compare_factors, mx);
+    .triangle = LOWER,
+    .diagonal = DIAGONAL,
+    .n = -1,
+    .cells = -1,
+  };
  
-    wr->data = factors[0];
-    for (i = 0; i < mx->cells - 1; i++)
-      factors[i]->next = factors[i + 1];
-    factors[mx->cells - 1]->next = NULL;
+  bool *taken_vars = XCALLOC (n_input_vars, bool);
+  if (input_rowtype)
+    taken_vars[var_get_dict_index (rowtype)] = true;
  
-    free (factors);
-  }
+  struct file_handle *fh = NULL;
+  while (lex_token (lexer) != T_ENDCMD)
+    {
+      if (!lex_force_match (lexer, T_SLASH))
+       goto error;
  
-  /* Write out records for every set of factor values. */
-  {
-    struct factor_data *iter;
-    
-    for (iter = wr->data; iter; iter = iter->next)
-      {
+      if (lex_match_id (lexer, "N"))
         {
-         size_t factor;
+         lex_match (lexer, T_EQUALS);
  
-         for (factor = 0; factor < mx->n_factors; factor++)
-            case_data_rw (c, mx->factors[factor]->fv)->f
-              = iter->factors[factor];
+         if (!lex_force_int_range (lexer, "N", 0, INT_MAX))
+           goto error;
+
+         mf.n = lex_integer (lexer);
+         lex_get (lexer);
         }
-       
+      else if (lex_match_id (lexer, "FORMAT"))
         {
-         int content;
+         lex_match (lexer, T_EQUALS);
  
-         for (content = 0; content <= PROX; content++)
+         while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
             {
-             if (!iter->n_rows[content])
-               continue;
-             
-             {
-               int type = content_type[content];
-               int n_lines = (type == 1
-                              ? (mx->n_continuous
-                                 - (mx->section != FULL && mx->diag == NODIAGONAL))
-                              : 1);
-               
-               if (n_lines != iter->n_rows[content])
-                 {
-                   msg (SE, _("Expected %d lines of data for %s content; "
-                              "actually saw %d lines.  No data will be "
-                              "output for this content."),
-                        n_lines, content_names[content],
-                        iter->n_rows[content]);
-                   continue;
-                 }
-             }
-
-             fill_matrix (mx, content, iter->data[content]);
-
-             ok = dump_cell_content (mx, content, iter->data[content],
-                                      c, write_case, wc_data);
-              if (!ok)
-                break;
+             if (lex_match_id (lexer, "LIST"))
+                mf.span = false;
+             else if (lex_match_id (lexer, "FREE"))
+                mf.span = true;
+             else if (lex_match_id (lexer, "UPPER"))
+                mf.triangle = UPPER;
+             else if (lex_match_id (lexer, "LOWER"))
+                mf.triangle = LOWER;
+             else if (lex_match_id (lexer, "FULL"))
+                mf.triangle = FULL;
+             else if (lex_match_id (lexer, "DIAGONAL"))
+                mf.diagonal = DIAGONAL;
+             else if (lex_match_id (lexer, "NODIAGONAL"))
+                mf.diagonal = NO_DIAGONAL;
+             else
+               {
+                 lex_error (lexer, NULL);
+                 goto error;
+               }
             }
         }
-      }
-  }
-  
-  pool_destroy (mx->container);
-  mx->container = pool_create ();
-  
-  wr->data = wr->current = NULL;
-  
-  return ok;
-}
+      else if (lex_match_id (lexer, "FILE"))
+       {
+         lex_match (lexer, T_EQUALS);
+          fh_unref (fh);
+         fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE, NULL);
+         if (!fh)
+           goto error;
+       }
+      else if (!mf.n_svars && lex_match_id (lexer, "SPLIT"))
+        {
+          lex_match (lexer, T_EQUALS);
+          if (!mf.input_rowtype
+              && lex_token (lexer) == T_ID
+              && !dict_lookup_var (dict, lex_tokcstr (lexer)))
+            {
+              mf.svars = xmalloc (sizeof *mf.svars);
+              mf.svars[0] = dict_create_var_assert (dict, lex_tokcstr (lexer),
+                                                    0);
+              var_set_both_formats (
+                mf.svars[0], &(struct fmt_spec) { .type = FMT_F, .w = 4 });
+              mf.n_svars = 1;
+              lex_get (lexer);
+            }
+          else if (!parse_matrix_data_subvars (lexer, dict, taken_vars,
+                                               &mf.svars, &mf.svar_indexes,
+                                               &mf.n_svars))
+            goto error;
+        }
+      else if (!mf.n_fvars && lex_match_id (lexer, "FACTORS"))
+        {
+          lex_match (lexer, T_EQUALS);
+          if (!parse_matrix_data_subvars (lexer, dict, taken_vars,
+                                          &mf.fvars, &mf.fvar_indexes,
+                                          &mf.n_fvars))
+            goto error;
+        }
+      else if (lex_match_id (lexer, "CELLS"))
+       {
+          if (mf.input_rowtype)
+            msg (SW, _("CELLS is ignored when VARIABLES includes ROWTYPE_"));
  
-/* Sets ROWTYPE_ based on the given TOKEN read from READER.
-   Return success. */
-static bool 
-wr_read_rowtype (struct wr_aux_data *wr,
-                 const struct matrix_token *token,
-                 struct dfm_reader *reader)
-{
-  if (wr->content != -1)
-    {
-      msg (SE, _("Multiply specified ROWTYPE_ %s."), context (reader));
-      return false;
-    }
-  if (token->type != MSTR)
-    {
-      msg (SE, _("Syntax error %s expecting ROWTYPE_ string."),
-           context (reader));
-      return false;
-    }
-  
-  {
-    char s[16];
-    char *cp;
-    
-    memcpy (s, token->string, min (15, token->length));
-    s[min (15, token->length)] = 0;
+         lex_match (lexer, T_EQUALS);
  
-    for (cp = s; *cp; cp++)
-      *cp = toupper ((unsigned char) *cp);
+         if (!lex_force_int_range (lexer, "CELLS", 0, INT_MAX))
+           goto error;
  
-    wr->content = string_to_content_type (s, NULL);
-  }
+         mf.cells = lex_integer (lexer);
+         lex_get (lexer);
+       }
+      else if (lex_match_id (lexer, "CONTENTS"))
+        {
+          lex_match (lexer, T_EQUALS);
  
-  if (wr->content == -1)
-    {
-      msg (SE, _("Syntax error %s."), context (reader));
-      return 0;
+          size_t allocated_contents = mf.n_contents;
+          bool in_parens = false;
+          for (;;)
+            {
+              bool open = !in_parens && lex_match (lexer, T_LPAREN);
+              enum rowtype rt;
+              if (!rowtype_parse (lexer, &rt))
+                {
+                  if (open || in_parens || (lex_token (lexer) != T_ENDCMD
+                                            && lex_token (lexer) != T_SLASH))
+                    {
+                      lex_error (lexer, _("Row type keyword expected."));
+                      goto error;
+                    }
+                  break;
+                }
+
+              if (open)
+                in_parens = true;
+
+              if (in_parens)
+                mf.factor_rowtype_mask |= 1u << rt;
+              else
+                mf.pooled_rowtype_mask |= 1u << rt;
+
+              bool close = in_parens && lex_match (lexer, T_RPAREN);
+              if (close)
+                in_parens = false;
+
+              if (mf.n_contents >= allocated_contents)
+                mf.contents = x2nrealloc (mf.contents, &allocated_contents,
+                                          sizeof *mf.contents);
+              mf.contents[mf.n_contents++] = (struct content) {
+                .open = open, .rowtype = rt, .close = close
+              };
+            }
+        }
+      else
+       {
+         lex_error (lexer, NULL);
+         goto error;
+       }
      }
-
-  return true;
-}
-
-/* Read the factors for the current row.  Select a set of factors and
-   point wr_current to it. */
-static bool 
-wr_read_factors (struct wr_aux_data *wr)
-{
-  struct matrix_data_pgm *mx = wr->mx;
-  double *factor_values = local_alloc (sizeof *factor_values * mx->n_factors);
-
-  wr->content = -1;
-  {
-    size_t i;
-  
-    for (i = 0; i < mx->n_factors; i++)
-      {
-        struct matrix_token token;
-       if (!mget_token (&token, mx->reader))
-         goto lossage;
-       if (token.type == MSTR)
-         {
-           if (!wr_read_rowtype (wr, &token, mx->reader))
-             goto lossage;
-           if (!mget_token (&token, mx->reader))
-             goto lossage;
-         }
-       if (token.type != MNUM)
-         {
-           msg (SE, _("Syntax error expecting factor value %s."),
-                context (mx->reader));
-           goto lossage;
-         }
-       
-       factor_values[i] = token.number;
-      }
-  }
-  if (wr->content == -1)
+  if (mf.diagonal == NO_DIAGONAL && mf.triangle == FULL)
      {
-      struct matrix_token token;
-      if (!mget_token (&token, mx->reader))
-       goto lossage;
-      if (!wr_read_rowtype (wr, &token, mx->reader))
-       goto lossage;
+      msg (SE, _("FORMAT=FULL and FORMAT=NODIAGONAL are mutually exclusive."));
+      goto error;
      }
-  
-  /* Try the most recent factor first as a simple caching
-     mechanism. */
-  if (wr->current)
+  if (!mf.input_rowtype)
      {
-      size_t i;
-      
-      for (i = 0; i < mx->n_factors; i++)
-       if (factor_values[i] != wr->current->factors[i])
-         goto cache_miss;
-      goto winnage;
-    }
+      if (mf.cells < 0)
+        {
+          if (mf.n_fvars)
+            {
+              msg (SE, _("CELLS is required when factor variables are specified "
+                         "and VARIABLES does not include ROWTYPE_."));
+              goto error;
+            }
+          mf.cells = 1;
+        }
  
-  /* Linear search through the list. */
-cache_miss:
-  {
-    struct factor_data *iter;
+      if (!mf.n_contents)
+        {
+          msg (SW, _("CONTENTS was not specified and VARIABLES does not "
+                     "include ROWTYPE_.  Assuming CONTENTS=CORR."));
  
-    for (iter = wr->data; iter; iter = iter->next)
+          mf.n_contents = 1;
+          mf.contents = xmalloc (sizeof *mf.contents);
+          *mf.contents = (struct content) { .rowtype = C_CORR };
+        }
+    }
+  mf.cvars = xmalloc (mf.n_input_vars * sizeof *mf.cvars);
+  for (size_t i = 0; i < mf.n_input_vars; i++)
+    if (!taken_vars[i])
        {
-       size_t i;
-
-       for (i = 0; i < mx->n_factors; i++)
-         if (factor_values[i] != iter->factors[i])
-           goto next_item;
-       
-       wr->current = iter;
-       goto winnage;
-       
-      next_item: ;
+        struct variable *v = input_vars[i];
+        mf.cvars[mf.n_cvars++] = v;
+        var_set_both_formats (v, &(struct fmt_spec) { .type = FMT_F, .w = 10,
+                                                      .d = 4 });
        }
-  }
-
-  /* Not found.  Make a new item. */
-  {
-    struct factor_data *new = pool_alloc (mx->container, sizeof *new);
-
-    new->factors = pool_nalloc (mx->container,
-                                mx->n_factors, sizeof *new->factors);
-    
-    {
-      size_t i;
-
-      for (i = 0; i < mx->n_factors; i++)
-       new->factors[i] = factor_values[i];
-    }
-    
+  if (!mf.n_cvars)
      {
-      int i;
-
-      for (i = 0; i <= PROX; i++)
-       {
-         new->n_rows[i] = 0;
-         new->data[i] = NULL;
-       }
+      msg (SE, _("At least one continuous variable is required."));
+      goto error;
      }
-
-    new->next = wr->data;
-    wr->data = wr->current = new;
-    mx->cells++;
-  }
-
-winnage:
-  local_free (factor_values);
-  return true;
-
-lossage:
-  local_free (factor_values);
-  return false;
-}
-
-/* Read the independent variables into wr->current. */
-static bool 
-wr_read_indeps (struct wr_aux_data *wr)
-{
-  struct matrix_data_pgm *mx = wr->mx;
-  struct factor_data *c = wr->current;
-  const int type = content_type[wr->content];
-  const int n_rows = c->n_rows[wr->content];
-  double *cp;
-  int n_cols;
-
-  /* Allocate room for data if necessary. */
-  if (c->data[wr->content] == NULL)
+  if (mf.input_rowtype)
      {
-      int n_items = mx->n_continuous;
-      if (type == 1)
-       n_items *= mx->n_continuous;
-      
-      c->data[wr->content] = pool_nalloc (mx->container,
-                                          n_items, sizeof **c->data);
+      for (size_t i = 0; i < mf.n_cvars; i++)
+        if (mf.cvars[i] != input_vars[n_input_vars - mf.n_cvars + i])
+          {
+            msg (SE, _("VARIABLES includes ROWTYPE_ but the continuous "
+                       "variables are not the last ones on VARIABLES."));
+            goto error;
+          }
      }
-
-  cp = &c->data[wr->content][n_rows * mx->n_continuous];
-
-  /* Figure out how much to read from this line. */
-  switch (type)
+  unsigned int rowtype_mask = mf.pooled_rowtype_mask | mf.factor_rowtype_mask;
+  if (rowtype_mask & (1u << C_N) && mf.n >= 0)
      {
-    case 0:
-    case 2:
-      if (n_rows > 0)
-       {
-         msg (SE, _("Duplicate specification for %s."),
-              content_names[wr->content]);
-         return false;
-       }
-      if (type == 0)
-       n_cols = mx->n_continuous;
-      else
-       n_cols = 1;
-      break;
-    case 1:
-      if (n_rows >= mx->n_continuous - (mx->section != FULL && mx->diag == NODIAGONAL))
-       {
-         msg (SE, _("Too many rows of matrix data for %s."),
-              content_names[wr->content]);
-         return false;
-       }
-      
-      switch (mx->section)
-       {
-       case LOWER:
-         n_cols = n_rows + 1;
-         if (mx->diag == NODIAGONAL)
-           cp += mx->n_continuous;
-         break;
-       case UPPER:
-         cp += n_rows;
-         n_cols = mx->n_continuous - n_rows;
-         if (mx->diag == NODIAGONAL)
-           {
-             n_cols--;
-             cp++;
-           }
-         break;
-       case FULL:
-         n_cols = mx->n_continuous;
-         break;
-       default:
-          NOT_REACHED ();
-       }
-      break;
-    default:
-      NOT_REACHED ();
+      msg (SE, _("Cannot specify N on CONTENTS along with the N subcommand."));
+      goto error;
      }
-  c->n_rows[wr->content]++;
  
-  /* Read N_COLS items at CP. */
-  {
-    int j;
-       
-    for (j = 0; j < n_cols; j++)
-      {
-        struct matrix_token token;
-       if (!mget_token (&token, mx->reader))
-         return false;
-       if (token.type != MNUM)
-         {
-           msg (SE, _("Syntax error expecting value for %s %s."),
-                 dict_get_var (dataset_dict (current_dataset), mx->first_continuous + j)->name,
-                 context (mx->reader));
-           return false;
-         }
-
-       *cp++ = token.number;
-      }
-    if (mx->fmt != FREE
-        && !force_eol (mx->reader, content_names[wr->content]))
-      return false;
-  }
+  struct variable **order = xnmalloc (dict_get_n_vars (dict), sizeof *order);
+  size_t n_order = 0;
+  for (size_t i = 0; i < mf.n_svars; i++)
+    order[n_order++] = mf.svars[i];
+  order[n_order++] = mf.rowtype;
+  for (size_t i = 0; i < mf.n_fvars; i++)
+    order[n_order++] = mf.fvars[i];
+  order[n_order++] = mf.varname;
+  for (size_t i = 0; i < mf.n_cvars; i++)
+    order[n_order++] = mf.cvars[i];
+  assert (n_order == dict_get_n_vars (dict));
+  dict_reorder_vars (dict, order, n_order);
+  free (order);
+
+  dict_set_split_vars (dict, mf.svars, mf.n_svars);
+
+  schedule_matrices (&mf);
+
+  if (fh == NULL)
+    fh = fh_inline_file ();
+
+  if (lex_end_of_command (lexer) != CMD_SUCCESS)
+    goto error;
+
+  struct dfm_reader *reader = dfm_open_reader (fh, lexer, NULL);
+  if (reader == NULL)
+    goto error;
+
+  struct casewriter *writer = autopaging_writer_create (dict_get_proto (dict));
+  if (mf.input_rowtype)
+    parse_data_with_rowtype (&mf, reader, writer);
+  else
+    parse_data_without_rowtype (&mf, reader, writer);
+  dfm_close_reader (reader);
  
-  return true;
-}
-\f
-/* Matrix source. */
+  dataset_set_dict (ds, dict);
+  dataset_set_source (ds, casewriter_make_reader (writer));
  
-static const struct case_source_class matrix_data_with_rowtype_source_class = 
-  {
-    "MATRIX DATA",
-    NULL,
-    matrix_data_read_with_rowtype,
-    NULL,
-  };
+  matrix_format_uninit (&mf);
+  free (taken_vars);
+  fh_unref (fh);
  
-static const struct case_source_class 
-matrix_data_without_rowtype_source_class =
-  {
-    "MATRIX DATA",
-    NULL,
-    matrix_data_read_without_rowtype,
-    NULL,
-  };
+  return CMD_SUCCESS;
  
+ error:
+  matrix_format_uninit (&mf);
+  free (taken_vars);
+  dict_unref (dict);
+  fh_unref (fh);
+  return CMD_FAILURE;
+}