datasheet: Add a few assertions.

[pspp-builds.git] / src / data / datasheet.c
diff --git a/src/data/datasheet.c b/src/data/datasheet.c

index 1fc1e98b08aba877620733f90a12a225a0be4f36..de59e73fe842f4fa94009d24b626cc9157464306 100644 (file)
--- a/src/data/datasheet.c
+++ b/src/data/datasheet.c
@@ -1,20 +1,18 @@
-/* PSPP - computes sample statistics.
-   Copyright (C) 2007 Free Software Foundation, Inc.
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2007, 2009, 2010 Free Software Foundation, Inc.
  
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
  
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
  
     You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-   02110-1301, USA. */
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  
  #include <config.h>
  
@@ -26,12 +24,14 @@
  #include <data/casereader-provider.h>
  #include <data/casereader.h>
  #include <data/casewriter.h>
-#include <data/sparse-cases.h>
+#include <data/lazy-casereader.h>
+#include <data/settings.h>
  #include <libpspp/array.h>
  #include <libpspp/assertion.h>
-#include <libpspp/model-checker.h>
+#include <libpspp/misc.h>
  #include <libpspp/range-map.h>
  #include <libpspp/range-set.h>
+#include <libpspp/sparse-xarray.h>
  #include <libpspp/taint.h>
  #include <libpspp/tower.h>
  
@@ -39,6 +39,8 @@
  #include "md4.h"
  #include "xalloc.h"
  
+struct column;
+
  static struct axis *axis_create (void);
  static struct axis *axis_clone (const struct axis *);
  static void axis_destroy (struct axis *);
@@ -49,8 +51,8 @@ static bool axis_allocate (struct axis *, unsigned long int request,
                             unsigned long int *start,
                             unsigned long int *width);
  static void axis_make_available (struct axis *,
-                                unsigned long int start,
-                                unsigned long int width);
+                                unsigned long int start,
+                                unsigned long int width);
  static unsigned long int axis_extend (struct axis *, unsigned long int width);
  
  static unsigned long int axis_map (const struct axis *, unsigned long log_pos);
@@ -67,27 +69,25 @@ static void axis_move (struct axis *,
                         unsigned long int new_start,
                         unsigned long int cnt);
  
-static struct source *source_create_empty (size_t column_cnt);
+static struct source *source_create_empty (size_t n_bytes);
  static struct source *source_create_casereader (struct casereader *);
  static struct source *source_clone (const struct source *);
  static void source_destroy (struct source *);
  
-static casenumber source_get_backing_row_cnt (const struct source *);
-static size_t source_get_column_cnt (const struct source *);
-
-static bool source_read (const struct source *,
-                         casenumber row, size_t column,
-                         union value[], size_t value_cnt);
-static bool source_write (struct source *,
-                          casenumber row, size_t column,
-                          const union value[], size_t value_cnt);
-static bool source_write_columns (struct source *, size_t start_column,
-                                  const union value[], size_t value_cnt);
-static bool source_has_backing (const struct source *);
-static void source_increase_use (struct source *, size_t delta);
-static void source_decrease_use (struct source *, size_t delta);
+static casenumber source_get_backing_n_rows (const struct source *);
+
+static int source_allocate_column (struct source *, int width);
+static void source_release_column (struct source *, int ofs, int width);
  static bool source_in_use (const struct source *);
  
+static bool source_read (const struct column *, casenumber row, union value *);
+static bool source_write (const struct column *, casenumber row,
+                          const union value *);
+static bool source_write_column (struct column *, const union value *);
+static bool source_has_backing (const struct source *);
+
+static int get_source_index (const struct datasheet *ds, const struct source *source);
+
  /* A datasheet is internally composed from a set of data files,
     called "sources".  The sources that make up a datasheet must
     have the same number of rows (cases), but their numbers of
@@ -101,36 +101,46 @@ static bool source_in_use (const struct source *);
     to the column mapping.
  
     Each source in a datasheet can be a casereader or a
-   sparse_cases.  Casereaders are read-only, so when sources made
-   from casereaders need to be modified, it is done "virtually"
-   through being overlaid by a sparse_cases. */
+   sparse_xarray.  Casereaders are read-only, so when sources
+   made from casereaders need to be modified, it is done
+   "virtually" through being overlaid by a sparse_xarray. */
+struct source
+  {
+    struct range_set *avail;    /* Free bytes are set to 1s. */
+    struct sparse_xarray *data; /* Data at top level, atop the backing. */
+    struct casereader *backing; /* Backing casereader (or null). */
+    casenumber backing_rows;    /* Number of rows in backing (if backed). */
+    size_t n_used;              /* Number of column in use (if backed). */
+  };
  
-/* A datasheet. */
-struct datasheet
+/* A logical column. */
+struct column
    {
-    /* Mappings from logical to physical columns/rows. */
-    struct axis *columns;
-    struct axis *rows;
-
-    /* Mapping from physical columns to "source_info"s. */
-    struct range_map sources;
-
-    /* Minimum number of columns to put in a new source when we
-       need new columns and none are free.  We double it whenever
-       we add a new source to keep the number of file descriptors
-       needed by the datasheet to a minimum, reducing the
-       likelihood of running out. */
-    unsigned column_min_alloc;
-
-    /* Indicates corrupted data in the datasheet. */
-    struct taint *taint;
+    struct source *source;      /* Source of the underlying physical column. */
+    int value_ofs;              /* If 'source' has a backing casereader,
+                                   column's value offset in its cases. */
+    int byte_ofs;               /* Byte offset in source's sparse_xarray. */
+    int width;                  /* 0=numeric, otherwise string width. */
    };
  
-/* Maps from a range of physical columns to a source. */
-struct source_info
+/* A datasheet. */
+struct datasheet
    {
-    struct range_map_node column_range;
-    struct source *source;
+    /* Data sources. */
+    struct source **sources;    /* Sources, in no particular order. */
+    size_t n_sources;           /* Number of sources. */
+
+    /* Columns. */
+    struct caseproto *proto;    /* Prototype for rows (initialized lazily). */
+    struct column *columns;     /* Logical to physical column mapping. */
+    size_t n_columns;           /* Number of logical columns. */
+    unsigned column_min_alloc;  /* Min. # of columns to put in a new source. */
+
+    /* Rows. */
+    struct axis *rows;          /* Logical to physical row mapping. */
+
+    /* Tainting. */
+    struct taint *taint;        /* Indicates corrupted data. */
    };
  
  /* Is this operation a read or a write? */
@@ -140,13 +150,51 @@ enum rw_op
      OP_WRITE
    };
  
-static void free_source_info (struct datasheet *, struct source_info *);
-static struct source_info *source_info_from_range_map (
-  struct range_map_node *);
+static void allocate_column (struct datasheet *, int width, struct column *);
+static void release_source (struct datasheet *, struct source *);
  static bool rw_case (struct datasheet *ds, enum rw_op op,
-                     casenumber lrow, size_t start_column, size_t column_cnt,
+                     casenumber lrow, size_t start_column, size_t n_columns,
                       union value data[]);
  
+/* Returns the number of bytes needed to store a value with the
+   given WIDTH on disk. */
+static size_t
+width_to_n_bytes (int width)
+{
+  return width == 0 ? sizeof (double) : width;
+}
+
+/* Returns the address of the data in VALUE (for reading or
+   writing to/from disk).  VALUE must have the given WIDTH. */
+static void *
+value_to_data (const union value *value_, int width)
+{
+  union value *value = (union value *) value_;
+  assert (sizeof value->f == sizeof (double));
+  if (width == 0)
+    return &value->f;
+  else
+    return value_str_rw (value, width);
+}
+
+/* Returns the number of bytes needed to store all the values in
+   PROTO on disk. */
+static size_t
+caseproto_to_n_bytes (const struct caseproto *proto)
+{
+  size_t n_bytes;
+  size_t i;
+
+  n_bytes = 0;
+  for (i = 0; i < caseproto_get_n_widths (proto); i++)
+    {
+      int width = caseproto_get_width (proto, i);
+      if (width >= 0)
+        n_bytes += width_to_n_bytes (width);
+    }
+  return n_bytes;
+}
+
  /* Creates and returns a new datasheet.
  
     If READER is nonnull, then the datasheet initially contains
@@ -154,42 +202,50 @@ static bool rw_case (struct datasheet *ds, enum rw_op op,
  struct datasheet *
  datasheet_create (struct casereader *reader)
  {
-  /* Create datasheet. */
    struct datasheet *ds = xmalloc (sizeof *ds);
-  ds->columns = axis_create ();
+  ds->sources = NULL;
+  ds->n_sources = 0;
+  ds->proto = NULL;
+  ds->columns = NULL;
+  ds->n_columns = 0;
+  ds->column_min_alloc = 8;
    ds->rows = axis_create ();
-  range_map_init (&ds->sources);
-  ds->column_min_alloc = 1;
    ds->taint = taint_create ();
  
-  /* Add backing. */
    if (reader != NULL)
      {
-      size_t column_cnt;
-      casenumber row_cnt;
-      struct source_info *si;
-
-      si = xmalloc (sizeof *si);
-      si->source = source_create_casereader (reader);
-      column_cnt = source_get_column_cnt (si->source);
-      row_cnt = source_get_backing_row_cnt (si->source);
-      source_increase_use (si->source, column_cnt);
-
-      if ( column_cnt > 0 )
-       {
-         unsigned long int column_start;
-      column_start = axis_extend (ds->columns, column_cnt);
-      axis_insert (ds->columns, 0, column_start, column_cnt);
-      range_map_insert (&ds->sources, column_start, column_cnt,
-                        &si->column_range);
-       }
-
-      if ( row_cnt > 0 )
-       {
-         unsigned long int row_start;
-      row_start = axis_extend (ds->rows, row_cnt);
-      axis_insert (ds->rows, 0, row_start, row_cnt);
-       }
+      casenumber n_rows;
+      size_t byte_ofs;
+      size_t i;
+
+      taint_propagate (casereader_get_taint (reader), ds->taint);
+
+      ds->proto = caseproto_ref (casereader_get_proto (reader));
+
+      ds->sources = xmalloc (sizeof *ds->sources);
+      ds->sources[0] = source_create_casereader (reader);
+      ds->n_sources = 1;
+
+      ds->n_columns = caseproto_get_n_widths (ds->proto);
+      ds->columns = xnmalloc (ds->n_columns, sizeof *ds->columns);
+      byte_ofs = 0;
+      for (i = 0; i < ds->n_columns; i++)
+        {
+          struct column *column = &ds->columns[i];
+          int width = caseproto_get_width (ds->proto, i);
+          column->source = ds->sources[0];
+          column->width = width;
+          if (width >= 0)
+            {
+              column->value_ofs = i;
+              column->byte_ofs = byte_ofs;
+              byte_ofs += width_to_n_bytes (column->width);
+            }
+        }
+
+      n_rows = source_get_backing_n_rows (ds->sources[0]);
+      if (n_rows > 0)
+        axis_insert (ds->rows, 0, axis_extend (ds->rows, n_rows), n_rows);
      }
  
    return ds;
@@ -199,21 +255,47 @@ datasheet_create (struct casereader *reader)
  void
  datasheet_destroy (struct datasheet *ds)
  {
+  size_t i;
+
    if (ds == NULL)
      return;
  
-  axis_destroy (ds->columns);
+  for (i = 0; i < ds->n_sources; i++)
+    source_destroy (ds->sources[i]);
+  free (ds->sources);
+  caseproto_unref (ds->proto);
+  free (ds->columns);
    axis_destroy (ds->rows);
-  while (!range_map_is_empty (&ds->sources))
-    {
-      struct range_map_node *r = range_map_first (&ds->sources);
-      struct source_info *si = source_info_from_range_map (r);
-      free_source_info (ds, si);
-    }
    taint_destroy (ds->taint);
    free (ds);
  }
  
+/* Returns the prototype for the cases in DS.  The caller must
+   not unref the returned prototype. */
+const struct caseproto *
+datasheet_get_proto (const struct datasheet *ds_)
+{
+  struct datasheet *ds = CONST_CAST (struct datasheet *, ds_);
+  if (ds->proto == NULL)
+    {
+      size_t i;
+
+      ds->proto = caseproto_create ();
+      for (i = 0; i < ds->n_columns; i++)
+        ds->proto = caseproto_add_width (ds->proto, ds->columns[i].width);
+    }
+  return ds->proto;
+}
+
+/* Returns the width of the given COLUMN within DS.
+   COLUMN must be less than the number of columns in DS. */
+int
+datasheet_get_column_width (const struct datasheet *ds, size_t column)
+{
+  assert (column < datasheet_get_n_columns (ds));
+  return ds->columns[column].width;
+}
+
  /* Moves datasheet DS to a new location in memory, and returns
     the new location.  Afterward, the datasheet must not be
     accessed at its former location.
@@ -254,156 +336,232 @@ datasheet_get_taint (const struct datasheet *ds)
  
  /* Returns the number of rows in DS. */
  casenumber
-datasheet_get_row_cnt (const struct datasheet *ds)
+datasheet_get_n_rows (const struct datasheet *ds)
  {
    return axis_get_size (ds->rows);
  }
  
  /* Returns the number of columns in DS. */
  size_t
-datasheet_get_column_cnt (const struct datasheet *ds)
+datasheet_get_n_columns (const struct datasheet *ds)
  {
-  return axis_get_size (ds->columns);
+  return ds->n_columns;
  }
  
-/* Inserts CNT columns into datasheet DS just before column
-   BEFORE.  Initializes the contents of each row in the inserted
-   columns to the CNT values in INIT_VALUES.
+/* Inserts a column of the given WIDTH into datasheet DS just
+   before column BEFORE.  Initializes the contents of each row in
+   the inserted column to VALUE (which must have width WIDTH).
  
     Returns true if successful, false on failure.  In case of
     failure, the datasheet is unchanged. */
  bool
-datasheet_insert_columns (struct datasheet *ds,
-                          const union value init_values[], size_t cnt,
-                          size_t before)
+datasheet_insert_column (struct datasheet *ds,
+                         const union value *value, int width, size_t before)
  {
-  size_t added = 0;
-  while (cnt > 0)
-    {
-      unsigned long first_phy; /* First allocated physical column. */
-      unsigned long phy_cnt;   /* Number of allocated physical columns. */
+  struct column *col;
  
-      /* Allocate physical columns from the pool of available
-         columns. */
-      if (!axis_allocate (ds->columns, cnt, &first_phy, &phy_cnt))
-        {
-          /* No columns were available.  Create a new source and
-             extend the axis to make some new ones available. */
-          struct source_info *si;
-
-          phy_cnt = MAX (cnt, ds->column_min_alloc);
-          first_phy = axis_extend (ds->columns, phy_cnt);
-          ds->column_min_alloc = MIN (65536, ds->column_min_alloc * 2);
-
-          si = xmalloc (sizeof *si);
-          si->source = source_create_empty (phy_cnt);
-          range_map_insert (&ds->sources, first_phy, phy_cnt,
-                            &si->column_range);
-          if (phy_cnt > cnt)
-            {
-              axis_make_available (ds->columns, first_phy + cnt,
-                                   phy_cnt - cnt);
-              phy_cnt = cnt;
-            }
-        }
+  assert (before <= ds->n_columns);
  
-      /* Initialize the columns and insert them into the columns
-         axis. */
-      while (phy_cnt > 0)
-        {
-          struct range_map_node *r; /* Range map holding FIRST_PHY column. */
-          struct source_info *s;    /* Source containing FIRST_PHY column. */
-          size_t source_avail;      /* Number of phys columns available. */
-          size_t source_cnt;        /* Number of phys columns to use. */
-
-          /* Figure out how many columns we can and want to take
-             starting at FIRST_PHY, and then insert them into the
-             columns axis. */
-          r = range_map_lookup (&ds->sources, first_phy);
-          s = source_info_from_range_map (r);
-          source_avail = range_map_node_get_end (r) - first_phy;
-          source_cnt = MIN (phy_cnt, source_avail);
-          axis_insert (ds->columns, before, first_phy, source_cnt);
-
-          /* Initialize the data for those columns in the
-             source. */
-          if (!source_write_columns (s->source,
-                                     first_phy - range_map_node_get_start (r),
-                                     init_values, source_cnt))
-            {
-              datasheet_delete_columns (ds, before - added,
-                                        source_cnt + added);
-              taint_set_taint (ds->taint);
-              return false;
-            }
-          source_increase_use (s->source, source_cnt);
-
-          /* Advance. */
-          phy_cnt -= source_cnt;
-          first_phy += source_cnt;
-          init_values += source_cnt;
-          cnt -= source_cnt;
-          before += source_cnt;
-          added += source_cnt;
-        }
+  ds->columns = xnrealloc (ds->columns,
+                           ds->n_columns + 1, sizeof *ds->columns);
+  insert_element (ds->columns, ds->n_columns, sizeof *ds->columns, before);
+  col = &ds->columns[before];
+  ds->n_columns++;
+
+  allocate_column (ds, width, col);
+
+  if (width >= 0 && !source_write_column (col, value))
+    {
+      datasheet_delete_columns (ds, before, 1);
+      taint_set_taint (ds->taint);
+      return false;
      }
+
    return true;
  }
  
-/* Deletes the CNT columns in DS starting from column START. */
+/* Deletes the N columns in DS starting from column START. */
  void
-datasheet_delete_columns (struct datasheet *ds, size_t start, size_t cnt)
+datasheet_delete_columns (struct datasheet *ds, size_t start, size_t n)
  {
-  size_t lcol;
+  assert (start + n <= ds->n_columns);
  
-  /* Free up columns for reuse. */
-  for (lcol = start; lcol < start + cnt; lcol++)
+  if (n > 0)
      {
-      size_t pcol = axis_map (ds->columns, lcol);
-      struct range_map_node *r = range_map_lookup (&ds->sources, pcol);
-      struct source_info *si = source_info_from_range_map (r);
+      size_t i;
  
-      source_decrease_use (si->source, 1);
-      if (source_has_backing (si->source))
+      for (i = start; i < start + n; i++)
          {
-          if (!source_in_use (si->source))
-            free_source_info (ds, si);
+          struct column *column = &ds->columns[i];
+          struct source *source = column->source;
+          source_release_column (source, column->byte_ofs, column->width);
+          release_source (ds, source);
          }
-      else
-        axis_make_available (ds->columns, pcol, 1);
-    }
  
-  /* Remove columns from logical-to-physical mapping. */
-  axis_remove (ds->columns, start, cnt);
+      remove_range (ds->columns, ds->n_columns, sizeof *ds->columns, start, n);
+      ds->n_columns -= n;
+
+      caseproto_unref (ds->proto);
+      ds->proto = NULL;
+    }
  }
  
-/* Moves the CNT columns in DS starting at position OLD_START so
+/* Moves the N columns in DS starting at position OLD_START so
     that they then start at position NEW_START.  Equivalent to
     deleting the column rows, then inserting them at what becomes
-   position NEW_START after the deletion.*/
+   position NEW_START after the deletion. */
  void
  datasheet_move_columns (struct datasheet *ds,
                          size_t old_start, size_t new_start,
-                        size_t cnt)
+                        size_t n)
+{
+  assert (old_start + n <= ds->n_columns);
+  assert (new_start + n <= ds->n_columns);
+
+  move_range (ds->columns, ds->n_columns, sizeof *ds->columns,
+              old_start, new_start, n);
+
+  caseproto_unref (ds->proto);
+  ds->proto = NULL;
+}
+
+struct resize_datasheet_value_aux
+  {
+    union value src_value;
+    size_t src_ofs;
+    int src_width;
+
+    void (*resize_cb) (const union value *, union value *, void *aux);
+    void *resize_cb_aux;
+
+    union value dst_value;
+    size_t dst_ofs;
+    int dst_width;
+  };
+
+static bool
+resize_datasheet_value (const void *src, void *dst, void *aux_)
  {
-  axis_move (ds->columns, old_start, new_start, cnt);
+  struct resize_datasheet_value_aux *aux = aux_;
+
+  memcpy (value_to_data (&aux->src_value, aux->src_width),
+          (uint8_t *) src + aux->src_ofs,
+          width_to_n_bytes (aux->src_width));
+
+  aux->resize_cb (&aux->src_value, &aux->dst_value, aux->resize_cb_aux);
+
+  memcpy ((uint8_t *) dst + aux->dst_ofs,
+          value_to_data (&aux->dst_value, aux->dst_width),
+          width_to_n_bytes (aux->dst_width));
+
+  return true;
  }
  
-/* Retrieves the contents of the given ROW in datasheet DS into
-   newly created case C.  Returns true if successful, false on
-   I/O error. */
  bool
-datasheet_get_row (const struct datasheet *ds, casenumber row, struct ccase *c)
+datasheet_resize_column (struct datasheet *ds, size_t column, int new_width,
+                         void (*resize_cb) (const union value *,
+                                            union value *, void *aux),
+                         void *resize_cb_aux)
  {
-  size_t column_cnt = datasheet_get_column_cnt (ds);
-  case_create (c, column_cnt);
-  if (rw_case ((struct datasheet *) ds, OP_READ,
-               row, 0, column_cnt, case_data_all_rw (c)))
-    return true;
+  struct column old_col;
+  struct column *col;
+  int old_width;
+
+  assert (column < datasheet_get_n_columns (ds));
+
+  col = &ds->columns[column];
+  old_col = *col;
+  old_width = old_col.width;
+
+  if (new_width == -1)
+    {
+      if (old_width != -1)
+        {
+          datasheet_delete_columns (ds, column, 1);
+          datasheet_insert_column (ds, NULL, -1, column);
+        }
+    }
+  else if (old_width == -1)
+    {
+      union value value;
+      value_init (&value, new_width);
+      value_set_missing (&value, new_width);
+      if (resize_cb != NULL)
+        resize_cb (NULL, &value, resize_cb_aux);
+      datasheet_delete_columns (ds, column, 1);
+      datasheet_insert_column (ds, &value, new_width, column);
+      value_destroy (&value, new_width);
+    }
+  else if (source_has_backing (col->source))
+    {
+      unsigned long int n_rows = axis_get_size (ds->rows);
+      unsigned long int lrow;
+      union value src, dst;
+
+      source_release_column (col->source, col->byte_ofs, col->width);
+      allocate_column (ds, new_width, col);
+
+      value_init (&src, old_width);
+      value_init (&dst, new_width);
+      for (lrow = 0; lrow < n_rows; lrow++)
+        {
+          unsigned long int prow = axis_map (ds->rows, lrow);
+          if (!source_read (&old_col, prow, &src))
+            {
+              /* FIXME: back out col changes. */
+              return false;
+            }
+          resize_cb (&src, &dst, resize_cb_aux);
+          if (!source_write (col, prow, &dst))
+            {
+              /* FIXME: back out col changes. */
+              return false;
+            }
+        }
+
+      release_source (ds, old_col.source);
+    }
    else
      {
-      case_destroy (c);
-      return false;
+      struct resize_datasheet_value_aux aux;
+
+      source_release_column (col->source, col->byte_ofs, col->width);
+      allocate_column (ds, new_width, col);
+
+      value_init (&aux.src_value, old_col.width);
+      aux.src_ofs = old_col.byte_ofs;
+      aux.src_width = old_col.width;
+      aux.resize_cb = resize_cb;
+      aux.resize_cb_aux = resize_cb_aux;
+      value_init (&aux.dst_value, new_width);
+      aux.dst_ofs = col->byte_ofs;
+      aux.dst_width = new_width;
+      sparse_xarray_copy (old_col.source->data, col->source->data,
+                          resize_datasheet_value, &aux);
+      value_destroy (&aux.src_value, old_width);
+      value_destroy (&aux.dst_value, new_width);
+
+      release_source (ds, old_col.source);
+    }
+  return true;
+}
+
+/* Retrieves and returns the contents of the given ROW in
+   datasheet DS.  The caller owns the returned case and must
+   unref it when it is no longer needed.  Returns a null pointer
+   on I/O error. */
+struct ccase *
+datasheet_get_row (const struct datasheet *ds, casenumber row)
+{
+  size_t n_columns = datasheet_get_n_columns (ds);
+  struct ccase *c = case_create (datasheet_get_proto (ds));
+  if (rw_case (CONST_CAST (struct datasheet *, ds), OP_READ,
+               row, 0, n_columns, case_data_all_rw (c)))
+    return c;
+  else
+    {
+      case_unref (c);
+      return NULL;
      }
  }
  
@@ -414,43 +572,48 @@ datasheet_get_row (const struct datasheet *ds, casenumber row, struct ccase *c)
  bool
  datasheet_put_row (struct datasheet *ds, casenumber row, struct ccase *c)
  {
-  size_t column_cnt = datasheet_get_column_cnt (ds);
-  bool ok = rw_case (ds, OP_WRITE, row, 0, column_cnt,
+  size_t n_columns = datasheet_get_n_columns (ds);
+  bool ok = rw_case (ds, OP_WRITE, row, 0, n_columns,
                       (union value *) case_data_all (c));
-  case_destroy (c);
+  case_unref (c);
    return ok;
  }
  
-/* Stores the values of the WIDTH columns in DS in the given ROW
-   starting at COLUMN in DS into VALUES.  Returns true if
+/* Stores the values of COLUMN in DS in the given ROW in DS into
+   VALUE.  The caller must have already initialized VALUE as a
+   value of the appropriate width (as returned by
+   datasheet_get_column_width (DS, COLUMN)).  Returns true if
     successful, false on I/O error. */
  bool
-datasheet_get_value (const struct datasheet *ds, casenumber row, size_t column,
-                     union value *value, int width)
+datasheet_get_value (const struct datasheet *ds, casenumber row,
+                     size_t column, union value *value)
  {
-  return rw_case ((struct datasheet *) ds,
-                  OP_READ, row, column, value_cnt_from_width (width), value);
+  assert (row >= 0);
+  return rw_case (CONST_CAST (struct datasheet *, ds), OP_READ,
+                  row, column, 1, value);
  }
  
-/* Stores the WIDTH given VALUES into the given ROW in DS
-   starting at COLUMN.  Returns true if successful, false on I/O
-   error.  On failure, the given ROW might be partially modified
-   or corrupted. */
+/* Stores VALUE into DS in the given ROW and COLUMN.  VALUE must
+   have the correct width for COLUMN (as returned by
+   datasheet_get_column_width (DS, COLUMN)).  Returns true if
+   successful, false on I/O error.  On failure, ROW might be
+   partially modified or corrupted. */
  bool
-datasheet_put_value (struct datasheet *ds, casenumber row, size_t column,
-                     const union value *value, int width)
+datasheet_put_value (struct datasheet *ds UNUSED, casenumber row UNUSED,
+                     size_t column UNUSED, const union value *value UNUSED)
  {
-  return rw_case (ds, OP_WRITE, row, column, value_cnt_from_width (width),
-                  (union value *) value);
+  return rw_case (ds, OP_WRITE, row, column, 1, (union value *) value);
  }
  
-/* Inserts the CNT cases at C, which are destroyed, into
-   datasheet DS just before row BEFORE.  Returns true if
-   successful, false on I/O error.  On failure, datasheet DS is
-   not modified. */
+/* Inserts the CNT cases at C into datasheet DS just before row
+   BEFORE.  Returns true if successful, false on I/O error.  On
+   failure, datasheet DS is not modified.
+
+   Regardless of success, this function unrefs all of the cases
+   in C. */
  bool
  datasheet_insert_rows (struct datasheet *ds,
-                       casenumber before, struct ccase c[],
+                       casenumber before, struct ccase *c[],
                         casenumber cnt)
  {
    casenumber added = 0;
@@ -475,10 +638,10 @@ datasheet_insert_rows (struct datasheet *ds,
  
        /* Initialize the new rows. */
        for (i = 0; i < phy_cnt; i++)
-        if (!datasheet_put_row (ds, before + i, &c[i]))
+        if (!datasheet_put_row (ds, before + i, c[i]))
            {
              while (++i < cnt)
-              case_destroy (&c[i]);
+              case_unref (c[i]);
              datasheet_delete_rows (ds, before - added, phy_cnt + added);
              return false;
            }
@@ -531,28 +694,28 @@ datasheet_make_reader (struct datasheet *ds)
  {
    struct casereader *reader;
    ds = datasheet_rename (ds);
-  reader = casereader_create_random (datasheet_get_column_cnt (ds),
-                                     datasheet_get_row_cnt (ds),
+  reader = casereader_create_random (datasheet_get_proto (ds),
+                                     datasheet_get_n_rows (ds),
                                       &datasheet_reader_class, ds);
    taint_propagate (datasheet_get_taint (ds), casereader_get_taint (reader));
    return reader;
  }
  
  /* "read" function for the datasheet random casereader. */
-static bool
+static struct ccase *
  datasheet_reader_read (struct casereader *reader UNUSED, void *ds_,
-                       casenumber case_idx, struct ccase *c)
+                       casenumber case_idx)
  {
    struct datasheet *ds = ds_;
-  if (case_idx >= datasheet_get_row_cnt (ds))
-    return false;
-  else if (datasheet_get_row (ds, case_idx, c))
-    return true;
-  else
+  if (case_idx < datasheet_get_n_rows (ds))
      {
-      taint_set_taint (ds->taint);
-      return false;
+      struct ccase *c = datasheet_get_row (ds, case_idx);
+      if (c == NULL)
+        taint_set_taint (ds->taint);
+      return c;
      }
+  else
+    return NULL;
  }
  
  /* "destroy" function for the datasheet random casereader. */
@@ -580,50 +743,93 @@ static const struct casereader_random_class datasheet_reader_class =
      datasheet_reader_advance,
    };
  \f
-/* Removes SI from DS's set of sources and destroys its
-   source. */
  static void
-free_source_info (struct datasheet *ds, struct source_info *si)
+allocate_column (struct datasheet *ds, int width, struct column *column)
  {
-  range_map_delete (&ds->sources, &si->column_range);
-  source_destroy (si->source);
-  free (si);
+  caseproto_unref (ds->proto);
+  ds->proto = NULL;
+
+  column->value_ofs = -1;
+  column->width = width;
+  if (width >= 0)
+    {
+      int n_bytes;
+      size_t i;
+
+      n_bytes = width_to_n_bytes (width);
+      for (i = 0; i < ds->n_sources; i++)
+        {
+          column->source = ds->sources[i];
+          column->byte_ofs = source_allocate_column (column->source, n_bytes);
+          if (column->byte_ofs >= 0)
+            return;
+        }
+
+      column->source = source_create_empty (MAX (n_bytes,
+                                                 ds->column_min_alloc));
+      ds->sources = xnrealloc (ds->sources,
+                               ds->n_sources + 1, sizeof *ds->sources);
+      ds->sources[ds->n_sources++] = column->source;
+
+      ds->column_min_alloc = MIN (65536, ds->column_min_alloc * 2);
+
+      column->byte_ofs = source_allocate_column (column->source, n_bytes);
+      assert (column->byte_ofs >= 0);
+    }
+  else
+    {
+      column->source = NULL;
+      column->byte_ofs = -1;
+    }
  }
  
-static struct source_info *
-source_info_from_range_map (struct range_map_node *node)
+static void
+release_source (struct datasheet *ds, struct source *source)
  {
-  return range_map_data (node, struct source_info, column_range);
+  if (source_has_backing (source) && !source_in_use (source))
+    {
+      /* Since only the first source to be added ever
+         has a backing, this source must have index
+         0.  */
+      assert (source == ds->sources[0]);
+      ds->sources[0] = ds->sources[--ds->n_sources];
+      source_destroy (source);
+    }
  }
  
  /* Reads (if OP is OP_READ) or writes (if op is OP_WRITE) the
-   COLUMN_CNT columns starting from column START_COLUMN in row
-   LROW to/from the COLUMN_CNT values in DATA. */
+   N_COLUMNS columns starting from column START_COLUMN in row
+   LROW to/from the N_COLUMNS values in DATA. */
  static bool
  rw_case (struct datasheet *ds, enum rw_op op,
-         casenumber lrow, size_t start_column, size_t column_cnt,
+         casenumber lrow, size_t start_column, size_t n_columns,
           union value data[])
  {
    casenumber prow;
-  size_t lcol;
+  size_t i;
  
-  assert (lrow < datasheet_get_row_cnt (ds));
-  assert (column_cnt <= datasheet_get_column_cnt (ds));
-  assert (start_column + column_cnt <= datasheet_get_column_cnt (ds));
+  assert (lrow < datasheet_get_n_rows (ds));
+  assert (n_columns <= datasheet_get_n_columns (ds));
+  assert (start_column + n_columns <= datasheet_get_n_columns (ds));
  
    prow = axis_map (ds->rows, lrow);
-  for (lcol = start_column; lcol < start_column + column_cnt; lcol++, data++)
+  for (i = 0; i < n_columns; i++)
      {
-      size_t pcol = axis_map (ds->columns, lcol);
-      struct range_map_node *r = range_map_lookup (&ds->sources, pcol);
-      struct source_info *s = source_info_from_range_map (r);
-      size_t pcol_ofs = pcol - range_map_node_get_start (r);
-      if (!(op == OP_READ
-            ? source_read (s->source, prow, pcol_ofs, data, 1)
-            : source_write (s->source, prow, pcol_ofs, data, 1)))
+      struct column *c = &ds->columns[start_column + i];
+      if (c->width >= 0)
          {
-          taint_set_taint (ds->taint);
-          return false;
+          bool ok;
+
+          if (op == OP_READ)
+            ok = source_read (c, prow, &data[i]);
+          else
+            ok = source_write (c, prow, &data[i]);
+
+          if (!ok)
+            {
+              taint_set_taint (ds->taint);
+              return false;
+            }
          }
      }
    return true;
@@ -636,25 +842,23 @@ rw_case (struct datasheet *ds, enum rw_op op,
     axis_map and axis_get_size functions inspect this mapping, and
     the axis_insert, axis_remove, and axis_move functions modify
     it.  Second, it tracks the set of ordinates that are unused
-   and available for reuse.  (Not all unused ordinates are
-   available for reuse: in particular, unused columns that are
-   backed by a casereader are never reused.)  The axis_allocate,
+   and available for reuse.  The axis_allocate,
     axis_make_available, and axis_extend functions affect the set
     of available ordinates. */
  struct axis
-  {
-    struct tower log_to_phy;     /* Map from logical to physical ordinates;
-                                    contains "struct axis_group"s. */
-    struct range_set *available; /* Set of unused, available ordinates. */
-    unsigned long int phy_size;  /* Current physical length of axis. */
-  };
+{
+  struct tower log_to_phy;     /* Map from logical to physical ordinates;
+                                 contains "struct axis_group"s. */
+  struct range_set *available; /* Set of unused, available ordinates. */
+  unsigned long int phy_size;  /* Current physical length of axis. */
+};
  
  /* A mapping from logical to physical ordinates. */
  struct axis_group
-  {
-    struct tower_node logical;  /* Range of logical ordinates. */
-    unsigned long phy_start;    /* First corresponding physical ordinate. */
-  };
+{
+  struct tower_node logical;  /* Range of logical ordinates. */
+  unsigned long phy_start;    /* First corresponding physical ordinate. */
+};
  
  static struct axis_group *axis_group_from_tower_node (struct tower_node *);
  static struct tower_node *make_axis_group (unsigned long int phy_start);
@@ -692,7 +896,7 @@ axis_clone (const struct axis *old)
    for (node = tower_first (&old->log_to_phy); node != NULL;
         node = tower_next (&old->log_to_phy, node))
      {
-      unsigned long int size = tower_node_get_height (node);
+      unsigned long int size = tower_node_get_size (node);
        struct axis_group *group = tower_data (node, struct axis_group, logical);
        tower_insert (&new->log_to_phy, size, make_axis_group (group->phy_start),
                      NULL);
@@ -716,7 +920,7 @@ axis_hash (const struct axis *axis, struct md4_ctx *ctx)
      {
        struct axis_group *group = tower_data (tn, struct axis_group, logical);
        unsigned long int phy_start = group->phy_start;
-      unsigned long int size = tower_node_get_height (tn);
+      unsigned long int size = tower_node_get_size (tn);
  
        md4_process_bytes (&phy_start, sizeof phy_start, ctx);
        md4_process_bytes (&size, sizeof size, ctx);
@@ -920,7 +1124,7 @@ split_axis (struct axis *axis, unsigned long int where)
    if (where > group_start)
      {
        unsigned long int size_1 = where - group_start;
-      unsigned long int size_2 = tower_node_get_height (group_node) - size_1;
+      unsigned long int size_2 = tower_node_get_size (group_node) - size_1;
        struct tower_node *next = tower_next (&axis->log_to_phy, group_node);
        struct tower_node *new = make_axis_group (group->phy_start + size_1);
        tower_resize (&axis->log_to_phy, group_node, size_1);
@@ -960,11 +1164,11 @@ merge_axis_nodes (struct axis *axis, struct tower_node *node,
    if (next != NULL)
      {
        struct axis_group *next_group = axis_group_from_tower_node (next);
-      unsigned long this_height = tower_node_get_height (node);
+      unsigned long this_height = tower_node_get_size (node);
  
        if (group->phy_start + this_height == next_group->phy_start)
          {
-          unsigned long next_height = tower_node_get_height (next);
+          unsigned long next_height = tower_node_get_size (next);
            tower_resize (t, node, this_height + next_height);
            if (other_node != NULL && *other_node == next)
              *other_node = tower_next (t, *other_node);
@@ -978,11 +1182,11 @@ merge_axis_nodes (struct axis *axis, struct tower_node *node,
    if (prev != NULL)
      {
        struct axis_group *prev_group = axis_group_from_tower_node (prev);
-      unsigned long prev_height = tower_node_get_height (prev);
+      unsigned long prev_height = tower_node_get_size (prev);
  
        if (prev_group->phy_start + prev_height == group->phy_start)
          {
-          unsigned long this_height = tower_node_get_height (node);
+          unsigned long this_height = tower_node_get_size (node);
            group->phy_start = prev_group->phy_start;
            tower_resize (t, node, this_height + prev_height);
            if (other_node != NULL && *other_node == prev)
@@ -1006,7 +1210,7 @@ check_axis_merged (const struct axis *axis UNUSED)
      if (prev != NULL)
        {
          struct axis_group *prev_group = axis_group_from_tower_node (prev);
-        unsigned long prev_height = tower_node_get_height (prev);
+        unsigned long prev_height = tower_node_get_size (prev);
          struct axis_group *node_group = axis_group_from_tower_node (node);
          assert (prev_group->phy_start + prev_height != node_group->phy_start);
        }
@@ -1014,24 +1218,21 @@ check_axis_merged (const struct axis *axis UNUSED)
  }
  \f
  /* A source. */
-struct source
-  {
-    size_t columns_used;        /* Number of columns in use by client. */
-    struct sparse_cases *data;  /* Data at top level, atop the backing. */
-    struct casereader *backing; /* Backing casereader (or null). */
-    casenumber backing_rows;    /* Number of rows in backing (if nonnull). */
-  };
  
-/* Creates and returns an empty, unbacked source with COLUMN_CNT
-   columns and an initial "columns_used" of 0. */
+/* Creates and returns an empty, unbacked source with N_BYTES
+   bytes per case, none of which are initially in use. */
  static struct source *
-source_create_empty (size_t column_cnt)
+source_create_empty (size_t n_bytes)
  {
    struct source *source = xmalloc (sizeof *source);
-  source->columns_used = 0;
-  source->data = sparse_cases_create (column_cnt);
+  size_t row_size = n_bytes + 4 * sizeof (void *);
+  size_t max_memory_rows = settings_get_workspace () / row_size;
+  source->avail = range_set_create ();
+  range_set_insert (source->avail, 0, n_bytes);
+  source->data = sparse_xarray_create (n_bytes, MAX (max_memory_rows, 4));
    source->backing = NULL;
    source->backing_rows = 0;
+  source->n_used = 0;
    return source;
  }
  
@@ -1040,10 +1241,22 @@ source_create_empty (size_t column_cnt)
  static struct source *
  source_create_casereader (struct casereader *reader)
  {
-  struct source *source
-    = source_create_empty (casereader_get_value_cnt (reader));
+  const struct caseproto *proto = casereader_get_proto (reader);
+  size_t n_bytes = caseproto_to_n_bytes (proto);
+  struct source *source = source_create_empty (n_bytes);
+  size_t n_columns;
+  size_t i;
+
+  range_set_delete (source->avail, 0, n_bytes);
    source->backing = reader;
    source->backing_rows = casereader_count_cases (reader);
+
+  source->n_used = 0;
+  n_columns = caseproto_get_n_widths (proto);
+  for (i = 0; i < n_columns; i++)
+    if (caseproto_get_width (proto, i) >= 0)
+      source->n_used++;
+
    return source;
  }
  
@@ -1056,10 +1269,11 @@ static struct source *
  source_clone (const struct source *old)
  {
    struct source *new = xmalloc (sizeof *new);
-  new->columns_used = old->columns_used;
-  new->data = sparse_cases_clone (old->data);
+  new->avail = range_set_clone (old->avail, NULL);
+  new->data = sparse_xarray_clone (old->data);
    new->backing = old->backing != NULL ? casereader_clone (old->backing) : NULL;
    new->backing_rows = old->backing_rows;
+  new->n_used = old->n_used;
    if (new->data == NULL)
      {
        source_destroy (new);
@@ -1068,23 +1282,28 @@ source_clone (const struct source *old)
    return new;
  }
  
-/* Increases the columns_used count of SOURCE by DELTA.
-   The new value must not exceed SOURCE's number of columns. */
-static void
-source_increase_use (struct source *source, size_t delta)
+static int
+source_allocate_column (struct source *source, int width)
  {
-  source->columns_used += delta;
-  assert (source->columns_used <= sparse_cases_get_value_cnt (source->data));
+  unsigned long int start;
+  int n_bytes;
+
+  assert (width >= 0);
+  n_bytes = width_to_n_bytes (width);
+  if (source->backing == NULL
+      && range_set_allocate_fully (source->avail, n_bytes, &start))
+    return start;
+  else
+    return -1;
  }
  
-/* Decreases the columns_used count of SOURCE by DELTA.
-   This must not attempt to decrease the columns_used count below
-   zero. */
  static void
-source_decrease_use (struct source *source, size_t delta)
+source_release_column (struct source *source, int ofs, int width)
  {
-  assert (delta <= source->columns_used);
-  source->columns_used -= delta;
+  assert (width >= 0);
+  range_set_insert (source->avail, ofs, width_to_n_bytes (width));
+  if (source->backing != NULL)
+    source->n_used--;
  }
  
  /* Returns true if SOURCE has any columns in use,
@@ -1092,7 +1311,7 @@ source_decrease_use (struct source *source, size_t delta)
  static bool
  source_in_use (const struct source *source)
  {
-  return source->columns_used > 0;
+  return source->n_used > 0;
  }
  
  /* Destroys SOURCE and its data and backing, if any. */
@@ -1101,7 +1320,8 @@ source_destroy (struct source *source)
  {
    if (source != NULL)
      {
-      sparse_cases_destroy (source->data);
+      range_set_destroy (source->avail);
+      sparse_xarray_destroy (source->data);
        casereader_destroy (source->backing);
        free (source);
      }
@@ -1110,89 +1330,99 @@ source_destroy (struct source *source)
  /* Returns the number of rows in SOURCE's backing casereader
     (SOURCE must have a backing casereader). */
  static casenumber
-source_get_backing_row_cnt (const struct source *source)
+source_get_backing_n_rows (const struct source *source)
  {
    assert (source_has_backing (source));
    return source->backing_rows;
  }
  
-/* Returns the number of columns in SOURCE. */
-static size_t
-source_get_column_cnt (const struct source *source)
-{
-  return sparse_cases_get_value_cnt (source->data);
-}
+/* Reads the given COLUMN from SOURCE in the given ROW, into
+   VALUE.  Returns true if successful, false on I/O error.
  
-/* Reads VALUE_CNT columns from SOURCE in the given ROW, starting
-   from COLUMN, into VALUES.  Returns true if successful, false
-   on I/O error. */
+   The caller must have initialized VALUE with the proper
+   width. */
  static bool
-source_read (const struct source *source,
-             casenumber row, size_t column,
-             union value values[], size_t value_cnt)
+source_read (const struct column *column, casenumber row, union value *value)
  {
-  if (source->backing == NULL || sparse_cases_contains_row (source->data, row))
-    return sparse_cases_read (source->data, row, column, values, value_cnt);
+  struct source *source = column->source;
+
+  assert (column->width >= 0);
+  if (source->backing == NULL
+      || sparse_xarray_contains_row (source->data, row))
+    return sparse_xarray_read (source->data, row, column->byte_ofs,
+                               width_to_n_bytes (column->width),
+                               value_to_data (value, column->width));
    else
      {
-      struct ccase c;
-      bool ok;
-
-      assert (source->backing != NULL);
-      ok = casereader_peek (source->backing, row, &c);
+      struct ccase *c = casereader_peek (source->backing, row);
+      bool ok = c != NULL;
        if (ok)
          {
-          case_copy_out (&c, column, values, value_cnt);
-          case_destroy (&c);
+          value_copy (value, case_data_idx (c, column->value_ofs),
+                      column->width);
+          case_unref (c);
          }
        return ok;
      }
  }
  
-/* Writes the VALUE_CNT values in VALUES to SOURCE in the given
-   ROW, starting at ROW.  Returns true if successful, false on
-   I/O error.  On error, the row's data may be completely or
-   partially corrupted, both inside and outside the region to be
-   written.  */
  static bool
-source_write (struct source *source,
-              casenumber row, size_t column,
-              const union value values[], size_t value_cnt)
+copy_case_into_source (struct source *source, struct ccase *c, casenumber row)
  {
-  size_t column_cnt = sparse_cases_get_value_cnt (source->data);
-  bool ok;
+  const struct caseproto *proto = casereader_get_proto (source->backing);
+  size_t n_widths = caseproto_get_n_widths (proto);
+  size_t ofs;
+  size_t i;
  
-  if (source->backing == NULL
-      || (column == 0 && value_cnt == column_cnt)
-      || sparse_cases_contains_row (source->data, row))
-    ok = sparse_cases_write (source->data, row, column, values, value_cnt);
-  else
+  ofs = 0;
+  for (i = 0; i < n_widths; i++)
      {
-      struct ccase c;
-      if (row < source->backing_rows)
-        ok = casereader_peek (source->backing, row, &c);
-      else
-        {
-          /* It's not one of the backed rows.  Ideally, this
-             should never happen: we'd always be writing the full
-             contents of new, unbacked rows in a single call to
-             this function, so that the first case above would
-             trigger.  But that's a little difficult at higher
-             levels, so that we in fact usually write the full
-             contents of new, unbacked rows in multiple calls to
-             this function.  Make this work. */
-          case_create (&c, column_cnt);
-          ok = true;
-        }
-      if (ok)
+      int width = caseproto_get_width (proto, i);
+      if (width >= 0)
          {
-          case_copy_in (&c, column, values, value_cnt);
-          ok = sparse_cases_write (source->data, row, 0,
-                                   case_data_all (&c), column_cnt);
-          case_destroy (&c);
+          int n_bytes = width_to_n_bytes (width);
+          if (!sparse_xarray_write (source->data, row, ofs, n_bytes,
+                                    value_to_data (case_data_idx (c, i),
+                                                   width)))
+            return false;
+          ofs += n_bytes;
          }
      }
-  return ok;
+  return true;
+}
+
+/* Writes VALUE to SOURCE in the given ROW and COLUMN.  Returns
+   true if successful, false on I/O error.  On error, the row's
+   data may be completely or partially corrupted, both inside and
+   outside the region to be written.  */
+static bool
+source_write (const struct column *column, casenumber row,
+              const union value *value)
+{
+  struct source *source = column->source;
+  struct casereader *backing = source->backing;
+
+  assert (column->width >= 0);
+  if (backing != NULL
+      && !sparse_xarray_contains_row (source->data, row)
+      && row < source->backing_rows)
+    {
+      struct ccase *c;
+      bool ok;
+
+      c = casereader_peek (backing, row);
+      if (c == NULL)
+        return false;
+
+      ok = copy_case_into_source (source, c, row);
+      case_unref (c);
+      if (!ok)
+        return false;
+    }
+
+  return sparse_xarray_write (source->data, row, column->byte_ofs,
+                              width_to_n_bytes (column->width),
+                              value_to_data (value, column->width));
  }
  
  /* Within SOURCE, which must not have a backing casereader,
@@ -1202,17 +1432,20 @@ source_write (struct source *source,
     false if an I/O error occurs.
  
     We don't support backing != NULL because (1) it's harder and
-   (2) source_write_columns is only called by
-   datasheet_insert_columns, which doesn't reuse columns from
+   (2) this function is only called by
+   datasheet_insert_column, which doesn't reuse columns from
     sources that are backed by casereaders. */
  static bool
-source_write_columns (struct source *source, size_t start_column,
-                      const union value values[], size_t value_cnt)
+source_write_column (struct column *column, const union value *value)
  {
-  assert (source->backing == NULL);
+  int width = column->width;
+
+  assert (column->source->backing == NULL);
+  assert (width >= 0);
  
-  return sparse_cases_write_columns (source->data, start_column,
-                                     values, value_cnt);
+  return sparse_xarray_write_columns (column->source->data, column->byte_ofs,
+                                      width_to_n_bytes (width),
+                                      value_to_data (value, width));
  }
  
  /* Returns true if SOURCE has a backing casereader, false
@@ -1225,374 +1458,69 @@ source_has_backing (const struct source *source)
  \f
  /* Datasheet model checker test driver. */
  
-/* Maximum size of datasheet supported for model checking
-   purposes. */
-#define MAX_ROWS 5
-#define MAX_COLS 5
-
-/* Hashes the structure of datasheet DS and returns the hash.
-   We use MD4 because it is much faster than MD5 or SHA-1 but its
-   collision resistance is just as good. */
-static unsigned int
-hash_datasheet (const struct datasheet *ds)
+static int
+get_source_index (const struct datasheet *ds, const struct source *source)
  {
-  unsigned int hash[DIV_RND_UP (20, sizeof (unsigned int))];
-  struct md4_ctx ctx;
-  struct range_map_node *r;
+  size_t i;
  
-  md4_init_ctx (&ctx);
-  axis_hash (ds->columns, &ctx);
-  axis_hash (ds->rows, &ctx);
-  for (r = range_map_first (&ds->sources); r != NULL;
-       r = range_map_next (&ds->sources, r))
-    {
-      unsigned long int start = range_map_node_get_start (r);
-      unsigned long int end = range_map_node_get_end (r);
-      md4_process_bytes (&start, sizeof start, &ctx);
-      md4_process_bytes (&end, sizeof end, &ctx);
-    }
-  md4_process_bytes (&ds->column_min_alloc, sizeof ds->column_min_alloc,
-                      &ctx);
-  md4_finish_ctx (&ctx, hash);
-  return hash[0];
+  for (i = 0; i < ds->n_sources; i++)
+    if (ds->sources[i] == source)
+      return i;
+  NOT_REACHED ();
  }
  
-/* Checks that datasheet DS contains has ROW_CNT rows, COLUMN_CNT
-   columns, and the same contents as ARRAY, reporting any
-   mismatches via mc_error.  Then, adds DS to MC as a new state. */
-static void
-check_datasheet (struct mc *mc, struct datasheet *ds,
-                 double array[MAX_ROWS][MAX_COLS],
-                 size_t row_cnt, size_t column_cnt)
+/* Clones the structure and contents of ODS into a new datasheet,
+   and returns the new datasheet. */
+struct datasheet *
+clone_datasheet (const struct datasheet *ods)
  {
-  assert (row_cnt < MAX_ROWS);
-  assert (column_cnt < MAX_COLS);
-
-  /* If it is a duplicate hash, discard the state before checking
-     its consistency, to save time. */
-  if (mc_discard_dup_state (mc, hash_datasheet (ds)))
-    {
-      datasheet_destroy (ds);
-      return;
-    }
-
-  if (row_cnt != datasheet_get_row_cnt (ds))
-    mc_error (mc, "row count (%lu) does not match expected (%zu)",
-              (unsigned long int) datasheet_get_row_cnt (ds), row_cnt);
-  else if (column_cnt != datasheet_get_column_cnt (ds))
-    mc_error (mc, "column count (%lu) does not match expected (%zu)",
-              (unsigned long int) datasheet_get_column_cnt (ds), column_cnt);
-  else
-    {
-      size_t row, col;
-
-      for (row = 0; row < row_cnt; row++)
-        for (col = 0; col < column_cnt; col++)
-          {
-            union value v;
-            if (!datasheet_get_value (ds, row, col, &v, 1))
-              NOT_REACHED ();
-            if (v.f != array[row][col])
-              mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: %g != %g",
-                        row, col, row_cnt, column_cnt, v.f, array[row][col]);
-          }
-    }
-
-  mc_add_state (mc, ds);
-}
+  struct datasheet *ds;
+  size_t i;
  
-/* Extracts the contents of DS into DATA. */
-static void
-extract_data (const struct datasheet *ds, double data[MAX_ROWS][MAX_COLS])
-{
-  size_t column_cnt = datasheet_get_column_cnt (ds);
-  size_t row_cnt = datasheet_get_row_cnt (ds);
-  size_t row, col;
+  ds = xmalloc (sizeof *ds);
  
-  assert (row_cnt < MAX_ROWS);
-  assert (column_cnt < MAX_COLS);
-  for (row = 0; row < row_cnt; row++)
-    for (col = 0; col < column_cnt; col++)
-      {
-        union value v;
-        if (!datasheet_get_value (ds, row, col, &v, 1))
-          NOT_REACHED ();
-        data[row][col] = v.f;
-      }
-}
+  ds->sources = xmalloc (ods->n_sources * sizeof *ds->sources);
+  for (i = 0; i < ods->n_sources; i++)
+    ds->sources[i] = source_clone (ods->sources[i]);
+  ds->n_sources = ods->n_sources;
  
-/* Clones the structure and contents of ODS into *DS,
-   and the contents of ODATA into DATA. */
-static void
-clone_model (const struct datasheet *ods, double odata[MAX_ROWS][MAX_COLS],
-             struct datasheet **ds_, double data[MAX_ROWS][MAX_COLS])
-{
-  struct datasheet *ds;
-  struct range_map_node *r;
+  ds->proto = ods->proto != NULL ? caseproto_ref (ods->proto) : NULL;
+  ds->columns = xmemdup (ods->columns, ods->n_columns * sizeof *ods->columns);
+  for (i = 0; i < ods->n_columns; i++)
+    ds->columns[i].source
+      = ds->sources[get_source_index (ods, ods->columns[i].source)];
+  ds->n_columns = ods->n_columns;
+  ds->column_min_alloc = ods->column_min_alloc;
  
-  /* Clone ODS into DS. */
-  ds = *ds_ = xmalloc (sizeof *ds);
-  ds->columns = axis_clone (ods->columns);
    ds->rows = axis_clone (ods->rows);
-  range_map_init (&ds->sources);
-  for (r = range_map_first (&ods->sources); r != NULL;
-       r = range_map_next (&ods->sources, r))
-    {
-      const struct source_info *osi = source_info_from_range_map (r);
-      struct source_info *si = xmalloc (sizeof *si);
-      si->source = source_clone (osi->source);
-      range_map_insert (&ds->sources, range_map_node_get_start (r),
-                        range_map_node_get_width (r), &si->column_range);
-    }
-  ds->column_min_alloc = ods->column_min_alloc;
+
    ds->taint = taint_create ();
  
-  /* Clone ODATA into DATA. */
-  memcpy (data, odata, MAX_ROWS * MAX_COLS * sizeof **data);
+  return ds;
  }
  
-/* "init" function for struct mc_class. */
-static void
-datasheet_mc_init (struct mc *mc)
+/* Hashes the structure of datasheet DS and returns the hash.
+   We use MD4 because it is much faster than MD5 or SHA-1 but its
+   collision resistance is just as good. */
+unsigned int
+hash_datasheet (const struct datasheet *ds)
  {
-  struct datasheet_test_params *params = mc_get_aux (mc);
-  struct datasheet *ds;
+  unsigned int hash[DIV_RND_UP (20, sizeof (unsigned int))];
+  struct md4_ctx ctx;
+  size_t i;
  
-  if (params->backing_rows == 0 && params->backing_cols == 0)
+  md4_init_ctx (&ctx);
+  for (i = 0; i < ds->n_columns; i++)
      {
-      /* Create unbacked datasheet. */
-      ds = datasheet_create (NULL);
-      mc_name_operation (mc, "empty datasheet");
-      check_datasheet (mc, ds, NULL, 0, 0);
+      const struct column *column = &ds->columns[i];
+      int source_n_bytes = sparse_xarray_get_n_columns (column->source->data);
+      md4_process_bytes (&source_n_bytes, sizeof source_n_bytes, &ctx);
+      /*md4_process_bytes (&column->byte_ofs, sizeof column->byte_ofs, &ctx);*/
+      md4_process_bytes (&column->value_ofs, sizeof column->value_ofs, &ctx);
+      md4_process_bytes (&column->width, sizeof column->width, &ctx);
      }
-  else
-    {
-      /* Create datasheet with backing. */
-      struct casewriter *writer;
-      struct casereader *reader;
-      double data[MAX_ROWS][MAX_COLS];
-      int row;
-
-      assert (params->backing_rows > 0 && params->backing_rows <= MAX_ROWS);
-      assert (params->backing_cols > 0 && params->backing_cols <= MAX_COLS);
-
-      writer = mem_writer_create (params->backing_cols);
-      for (row = 0; row < params->backing_rows; row++)
-        {
-          struct ccase c;
-          int col;
-
-          case_create (&c, params->backing_cols);
-          for (col = 0; col < params->backing_cols; col++)
-            {
-              double value = params->next_value++;
-              data[row][col] = value;
-              case_data_rw_idx (&c, col)->f = value;
-            }
-          casewriter_write (writer, &c);
-        }
-      reader = casewriter_make_reader (writer);
-      assert (reader != NULL);
-
-      ds = datasheet_create (reader);
-      mc_name_operation (mc, "datasheet with (%d,%d) backing",
-                         params->backing_rows, params->backing_cols);
-      check_datasheet (mc, ds, data,
-                       params->backing_rows, params->backing_cols);
-    }
-}
-
-/* "mutate" function for struct mc_class. */
-static void
-datasheet_mc_mutate (struct mc *mc, const void *ods_)
-{
-  struct datasheet_test_params *params = mc_get_aux (mc);
-
-  const struct datasheet *ods = ods_;
-  double odata[MAX_ROWS][MAX_COLS];
-  double data[MAX_ROWS][MAX_COLS];
-  size_t column_cnt = datasheet_get_column_cnt (ods);
-  size_t row_cnt = datasheet_get_row_cnt (ods);
-  size_t pos, new_pos, cnt;
-
-  extract_data (ods, odata);
-
-  /* Insert all possible numbers of columns in all possible
-     positions. */
-  for (pos = 0; pos <= column_cnt; pos++)
-    for (cnt = 0; cnt <= params->max_cols - column_cnt; cnt++)
-      if (mc_include_state (mc))
-        {
-          struct datasheet *ds;
-          union value new[MAX_COLS];
-          size_t i, j;
-
-          mc_name_operation (mc, "insert %zu columns at %zu", cnt, pos);
-          clone_model (ods, odata, &ds, data);
-
-          for (i = 0; i < cnt; i++)
-            new[i].f = params->next_value++;
-
-          if (!datasheet_insert_columns (ds, new, cnt, pos))
-            mc_error (mc, "datasheet_insert_columns failed");
-
-          for (i = 0; i < row_cnt; i++)
-            {
-              insert_range (&data[i][0], column_cnt, sizeof data[i][0],
-                            pos, cnt);
-              for (j = 0; j < cnt; j++)
-                data[i][pos + j] = new[j].f;
-            }
-
-          check_datasheet (mc, ds, data, row_cnt, column_cnt + cnt);
-        }
-
-  /* Delete all possible numbers of columns from all possible
-     positions. */
-  for (pos = 0; pos < column_cnt; pos++)
-    for (cnt = 0; cnt < column_cnt - pos; cnt++)
-      if (mc_include_state (mc))
-        {
-          struct datasheet *ds;
-          size_t i;
-
-          mc_name_operation (mc, "delete %zu columns at %zu", cnt, pos);
-          clone_model (ods, odata, &ds, data);
-
-          datasheet_delete_columns (ds, pos, cnt);
-
-          for (i = 0; i < row_cnt; i++)
-            remove_range (&data[i], column_cnt, sizeof *data[i], pos, cnt);
-
-          check_datasheet (mc, ds, data, row_cnt, column_cnt - cnt);
-        }
-
-  /* Move all possible numbers of columns from all possible
-     existing positions to all possible new positions. */
-  for (pos = 0; pos < column_cnt; pos++)
-    for (cnt = 0; cnt < column_cnt - pos; cnt++)
-      for (new_pos = 0; new_pos < column_cnt - cnt; new_pos++)
-        if (mc_include_state (mc))
-          {
-            struct datasheet *ds;
-            size_t i;
-
-            clone_model (ods, odata, &ds, data);
-            mc_name_operation (mc, "move %zu columns from %zu to %zu",
-                               cnt, pos, new_pos);
-
-            datasheet_move_columns (ds, pos, new_pos, cnt);
-
-            for (i = 0; i < row_cnt; i++)
-              move_range (&data[i], column_cnt, sizeof data[i][0],
-                          pos, new_pos, cnt);
-
-            check_datasheet (mc, ds, data, row_cnt, column_cnt);
-          }
-
-  /* Insert all possible numbers of rows in all possible
-     positions. */
-  for (pos = 0; pos <= row_cnt; pos++)
-    for (cnt = 0; cnt <= params->max_rows - row_cnt; cnt++)
-      if (mc_include_state (mc))
-        {
-          struct datasheet *ds;
-          struct ccase c[MAX_ROWS];
-          size_t i, j;
-
-          clone_model (ods, odata, &ds, data);
-          mc_name_operation (mc, "insert %zu rows at %zu", cnt, pos);
-
-          for (i = 0; i < cnt; i++)
-            {
-              case_create (&c[i], column_cnt);
-              for (j = 0; j < column_cnt; j++)
-                case_data_rw_idx (&c[i], j)->f = params->next_value++;
-            }
-
-          insert_range (data, row_cnt, sizeof data[pos], pos, cnt);
-          for (i = 0; i < cnt; i++)
-            for (j = 0; j < column_cnt; j++)
-              data[i + pos][j] = case_num_idx (&c[i], j);
-
-          if (!datasheet_insert_rows (ds, pos, c, cnt))
-            mc_error (mc, "datasheet_insert_rows failed");
-
-          check_datasheet (mc, ds, data, row_cnt + cnt, column_cnt);
-        }
-
-  /* Delete all possible numbers of rows from all possible
-     positions. */
-  for (pos = 0; pos < row_cnt; pos++)
-    for (cnt = 0; cnt < row_cnt - pos; cnt++)
-      if (mc_include_state (mc))
-        {
-          struct datasheet *ds;
-
-          clone_model (ods, odata, &ds, data);
-          mc_name_operation (mc, "delete %zu rows at %zu", cnt, pos);
-
-          datasheet_delete_rows (ds, pos, cnt);
-
-          remove_range (&data[0], row_cnt, sizeof data[0], pos, cnt);
-
-          check_datasheet (mc, ds, data, row_cnt - cnt, column_cnt);
-        }
-
-  /* Move all possible numbers of rows from all possible existing
-     positions to all possible new positions. */
-  for (pos = 0; pos < row_cnt; pos++)
-    for (cnt = 0; cnt < row_cnt - pos; cnt++)
-      for (new_pos = 0; new_pos < row_cnt - cnt; new_pos++)
-        if (mc_include_state (mc))
-          {
-            struct datasheet *ds;
-
-            clone_model (ods, odata, &ds, data);
-            mc_name_operation (mc, "move %zu rows from %zu to %zu",
-                               cnt, pos, new_pos);
-
-            datasheet_move_rows (ds, pos, new_pos, cnt);
-
-            move_range (&data[0], row_cnt, sizeof data[0],
-                        pos, new_pos, cnt);
-
-            check_datasheet (mc, ds, data, row_cnt, column_cnt);
-          }
-}
-
-/* "destroy" function for struct mc_class. */
-static void
-datasheet_mc_destroy (const struct mc *mc UNUSED, void *ds_)
-{
-  struct datasheet *ds = ds_;
-  datasheet_destroy (ds);
-}
-
-/* Executes the model checker on the datasheet test driver with
-   the given OPTIONS and passing in the given PARAMS, which must
-   point to a modifiable "struct datasheet_test_params".  If any
-   value in PARAMS is out of range, it will be adjusted into the
-   valid range before running the test.
-
-   Returns the results of the model checking run. */
-struct mc_results *
-datasheet_test (struct mc_options *options, void *params_)
-{
-  struct datasheet_test_params *params = params_;
-  static const struct mc_class datasheet_mc_class =
-    {
-      datasheet_mc_init,
-      datasheet_mc_mutate,
-      datasheet_mc_destroy,
-    };
-
-  params->next_value = 1;
-  params->max_rows = MIN (params->max_rows, MAX_ROWS);
-  params->max_cols = MIN (params->max_cols, MAX_COLS);
-  params->backing_rows = MIN (params->backing_rows, params->max_rows);
-  params->backing_cols = MIN (params->backing_cols, params->max_cols);
-
-  mc_options_set_aux (options, params);
-  return mc_run (&datasheet_mc_class, options);
+  axis_hash (ds->rows, &ctx);
+  md4_process_bytes (&ds->column_min_alloc, sizeof ds->column_min_alloc, &ctx);
+  md4_finish_ctx (&ctx, hash);
+  return hash[0];
  }