Moved the datasheet testing code out of src/{libspp,data}

[pspp-builds.git] / src / data / datasheet.c
diff --git a/src/data/datasheet.c b/src/data/datasheet.c

index 4a3eda32cc7700a030adc96b624f1bd444935aa3..a11a1e596c4446106e7f9adab05a385dc307479b 100644 (file)
--- a/src/data/datasheet.c
+++ b/src/data/datasheet.c
@@ -1,20 +1,18 @@
-/* PSPP - computes sample statistics.
+/* PSPP - a program for statistical analysis.
     Copyright (C) 2007 Free Software Foundation, Inc.
  
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
  
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
  
     You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-   02110-1301, USA. */
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  
  #include <config.h>
  
@@ -26,10 +24,10 @@
  #include <data/casereader-provider.h>
  #include <data/casereader.h>
  #include <data/casewriter.h>
+#include <data/lazy-casereader.h>
  #include <data/sparse-cases.h>
  #include <libpspp/array.h>
  #include <libpspp/assertion.h>
-#include <libpspp/model-checker.h>
  #include <libpspp/range-map.h>
  #include <libpspp/range-set.h>
  #include <libpspp/taint.h>
@@ -229,20 +227,23 @@ datasheet_rename (struct datasheet *ds)
    return new;
  }
  
-/* Returns true if a I/O error has occurred while processing a
-   datasheet operation. */
+/* Returns true if datasheet DS is tainted.
+   A datasheet is tainted by an I/O error or by taint
+   propagation to the datasheet. */
  bool
  datasheet_error (const struct datasheet *ds)
  {
    return taint_is_tainted (ds->taint);
  }
  
+/* Marks datasheet DS tainted. */
  void
  datasheet_force_error (struct datasheet *ds)
  {
    taint_set_taint (ds->taint);
  }
  
+/* Returns datasheet DS's taint object. */
  const struct taint *
  datasheet_get_taint (const struct datasheet *ds)
  {
@@ -353,6 +354,8 @@ datasheet_delete_columns (struct datasheet *ds, size_t start, size_t cnt)
  {
    size_t lcol;
  
+  assert ( start + cnt <= axis_get_size (ds->columns) );
+
    /* Free up columns for reuse. */
    for (lcol = start; lcol < start + cnt; lcol++)
      {
@@ -425,6 +428,7 @@ bool
  datasheet_get_value (const struct datasheet *ds, casenumber row, size_t column,
                       union value *value, int width)
  {
+  assert ( row >= 0 );
    return rw_case ((struct datasheet *) ds,
                    OP_READ, row, column, value_cnt_from_width (width), value);
  }
@@ -535,6 +539,7 @@ datasheet_make_reader (struct datasheet *ds)
    return reader;
  }
  
+/* "read" function for the datasheet random casereader. */
  static bool
  datasheet_reader_read (struct casereader *reader UNUSED, void *ds_,
                         casenumber case_idx, struct ccase *c)
@@ -551,6 +556,7 @@ datasheet_reader_read (struct casereader *reader UNUSED, void *ds_,
      }
  }
  
+/* "destroy" function for the datasheet random casereader. */
  static void
  datasheet_reader_destroy (struct casereader *reader UNUSED, void *ds_)
  {
@@ -558,6 +564,7 @@ datasheet_reader_destroy (struct casereader *reader UNUSED, void *ds_)
    datasheet_destroy (ds);
  }
  
+/* "advance" function for the datasheet random casereader. */
  static void
  datasheet_reader_advance (struct casereader *reader UNUSED, void *ds_,
                            casenumber case_cnt)
@@ -566,6 +573,7 @@ datasheet_reader_advance (struct casereader *reader UNUSED, void *ds_,
    datasheet_delete_rows (ds, 0, case_cnt);
  }
  
+/* Random casereader class for a datasheet. */
  static const struct casereader_random_class datasheet_reader_class =
    {
      datasheet_reader_read,
@@ -1223,108 +1231,16 @@ source_has_backing (const struct source *source)
  #define MAX_ROWS 5
  #define MAX_COLS 5
  
-/* Hashes the structure of datasheet DS and returns the hash.
-   We use MD4 because it is much faster than MD5 or SHA-1 but its
-   collision resistance is just as good. */
-static unsigned int
-hash_datasheet (const struct datasheet *ds)
-{
-  unsigned int hash[DIV_RND_UP (20, sizeof (unsigned int))];
-  struct md4_ctx ctx;
-  struct range_map_node *r;
-
-  md4_init_ctx (&ctx);
-  axis_hash (ds->columns, &ctx);
-  axis_hash (ds->rows, &ctx);
-  for (r = range_map_first (&ds->sources); r != NULL;
-       r = range_map_next (&ds->sources, r))
-    {
-      unsigned long int start = range_map_node_get_start (r);
-      unsigned long int end = range_map_node_get_end (r);
-      md4_process_bytes (&start, sizeof start, &ctx);
-      md4_process_bytes (&end, sizeof end, &ctx);
-    }
-  md4_process_bytes (&ds->column_min_alloc, sizeof ds->column_min_alloc,
-                      &ctx);
-  md4_finish_ctx (&ctx, hash);
-  return hash[0];
-}
-
-/* Checks that datasheet DS contains has ROW_CNT rows, COLUMN_CNT
-   columns, and the same contents as ARRAY, reporting any
-   mismatches via mc_error.  Then, adds DS to MC as a new state. */
-static void
-check_datasheet (struct mc *mc, struct datasheet *ds,
-                 double array[MAX_ROWS][MAX_COLS],
-                 size_t row_cnt, size_t column_cnt)
-{
-  assert (row_cnt < MAX_ROWS);
-  assert (column_cnt < MAX_COLS);
-
-  /* If it is a duplicate hash, discard the state before checking
-     its consistency, to save time. */
-  if (mc_discard_dup_state (mc, hash_datasheet (ds)))
-    {
-      datasheet_destroy (ds);
-      return;
-    }
-
-  if (row_cnt != datasheet_get_row_cnt (ds))
-    mc_error (mc, "row count (%lu) does not match expected (%zu)",
-              (unsigned long int) datasheet_get_row_cnt (ds), row_cnt);
-  else if (column_cnt != datasheet_get_column_cnt (ds))
-    mc_error (mc, "column count (%lu) does not match expected (%zu)",
-              (unsigned long int) datasheet_get_column_cnt (ds), column_cnt);
-  else
-    {
-      size_t row, col;
-
-      for (row = 0; row < row_cnt; row++)
-        for (col = 0; col < column_cnt; col++)
-          {
-            union value v;
-            if (!datasheet_get_value (ds, row, col, &v, 1))
-              NOT_REACHED ();
-            if (v.f != array[row][col])
-              mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: %g != %g",
-                        row, col, row_cnt, column_cnt, v.f, array[row][col]);
-          }
-    }
-
-  mc_add_state (mc, ds);
-}
-
-/* Extracts the contents of DS into DATA. */
-static void
-extract_data (const struct datasheet *ds, double data[MAX_ROWS][MAX_COLS])
-{
-  size_t column_cnt = datasheet_get_column_cnt (ds);
-  size_t row_cnt = datasheet_get_row_cnt (ds);
-  size_t row, col;
  
-  assert (row_cnt < MAX_ROWS);
-  assert (column_cnt < MAX_COLS);
-  for (row = 0; row < row_cnt; row++)
-    for (col = 0; col < column_cnt; col++)
-      {
-        union value v;
-        if (!datasheet_get_value (ds, row, col, &v, 1))
-          NOT_REACHED ();
-        data[row][col] = v.f;
-      }
-}
-
-/* Clones the structure and contents of ODS into *DS,
-   and the contents of ODATA into DATA. */
-static void
-clone_model (const struct datasheet *ods, double odata[MAX_ROWS][MAX_COLS],
-             struct datasheet **ds_, double data[MAX_ROWS][MAX_COLS])
+/* Clones the structure and contents of ODS into a new datasheet,
+   and returns the new datasheet. */
+struct datasheet *
+clone_datasheet (const struct datasheet *ods)
  {
    struct datasheet *ds;
    struct range_map_node *r;
  
-  /* Clone ODS into DS. */
-  ds = *ds_ = xmalloc (sizeof *ds);
+  ds = xmalloc (sizeof *ds);
    ds->columns = axis_clone (ods->columns);
    ds->rows = axis_clone (ods->rows);
    range_map_init (&ds->sources);
@@ -1340,252 +1256,36 @@ clone_model (const struct datasheet *ods, double odata[MAX_ROWS][MAX_COLS],
    ds->column_min_alloc = ods->column_min_alloc;
    ds->taint = taint_create ();
  
-  /* Clone ODATA into DATA. */
-  memcpy (data, odata, MAX_ROWS * MAX_COLS * sizeof **data);
+  return ds;
  }
  
-/* "init" function for struct mc_class. */
-static void
-datasheet_mc_init (struct mc *mc)
+
+/* Hashes the structure of datasheet DS and returns the hash.
+   We use MD4 because it is much faster than MD5 or SHA-1 but its
+   collision resistance is just as good. */
+unsigned int
+hash_datasheet (const struct datasheet *ds)
  {
-  struct datasheet_test_params *params = mc_get_aux (mc);
-  struct datasheet *ds;
+  unsigned int hash[DIV_RND_UP (20, sizeof (unsigned int))];
+  struct md4_ctx ctx;
+  struct range_map_node *r;
  
-  if (params->backing_rows == 0 && params->backing_cols == 0)
-    {
-      /* Create unbacked datasheet. */
-      ds = datasheet_create (NULL);
-      mc_name_operation (mc, "empty datasheet");
-      check_datasheet (mc, ds, NULL, 0, 0);
-    }
-  else
+  md4_init_ctx (&ctx);
+  axis_hash (ds->columns, &ctx);
+  axis_hash (ds->rows, &ctx);
+  for (r = range_map_first (&ds->sources); r != NULL;
+       r = range_map_next (&ds->sources, r))
      {
-      /* Create datasheet with backing. */
-      struct casewriter *writer;
-      struct casereader *reader;
-      double data[MAX_ROWS][MAX_COLS];
-      int row;
-
-      assert (params->backing_rows > 0 && params->backing_rows <= MAX_ROWS);
-      assert (params->backing_cols > 0 && params->backing_cols <= MAX_COLS);
-
-      writer = mem_writer_create (params->backing_cols);
-      for (row = 0; row < params->backing_rows; row++)
-        {
-          struct ccase c;
-          int col;
-
-          case_create (&c, params->backing_cols);
-          for (col = 0; col < params->backing_cols; col++)
-            {
-              double value = params->next_value++;
-              data[row][col] = value;
-              case_data_rw_idx (&c, col)->f = value;
-            }
-          casewriter_write (writer, &c);
-        }
-      reader = casewriter_make_reader (writer);
-      assert (reader != NULL);
-
-      ds = datasheet_create (reader);
-      mc_name_operation (mc, "datasheet with (%d,%d) backing",
-                         params->backing_rows, params->backing_cols);
-      check_datasheet (mc, ds, data,
-                       params->backing_rows, params->backing_cols);
+      unsigned long int start = range_map_node_get_start (r);
+      unsigned long int end = range_map_node_get_end (r);
+      md4_process_bytes (&start, sizeof start, &ctx);
+      md4_process_bytes (&end, sizeof end, &ctx);
      }
+  md4_process_bytes (&ds->column_min_alloc, sizeof ds->column_min_alloc,
+                      &ctx);
+  md4_finish_ctx (&ctx, hash);
+  return hash[0];
  }
  
-/* "mutate" function for struct mc_class. */
-static void
-datasheet_mc_mutate (struct mc *mc, const void *ods_)
-{
-  struct datasheet_test_params *params = mc_get_aux (mc);
-
-  const struct datasheet *ods = ods_;
-  double odata[MAX_ROWS][MAX_COLS];
-  double data[MAX_ROWS][MAX_COLS];
-  size_t column_cnt = datasheet_get_column_cnt (ods);
-  size_t row_cnt = datasheet_get_row_cnt (ods);
-  size_t pos, new_pos, cnt;
-
-  extract_data (ods, odata);
-
-  /* Insert all possible numbers of columns in all possible
-     positions. */
-  for (pos = 0; pos <= column_cnt; pos++)
-    for (cnt = 0; cnt <= params->max_cols - column_cnt; cnt++)
-      if (mc_include_state (mc))
-        {
-          struct datasheet *ds;
-          union value new[MAX_COLS];
-          size_t i, j;
-
-          mc_name_operation (mc, "insert %zu columns at %zu", cnt, pos);
-          clone_model (ods, odata, &ds, data);
-
-          for (i = 0; i < cnt; i++)
-            new[i].f = params->next_value++;
-
-          if (!datasheet_insert_columns (ds, new, cnt, pos))
-            mc_error (mc, "datasheet_insert_columns failed");
-
-          for (i = 0; i < row_cnt; i++)
-            {
-              insert_range (&data[i][0], column_cnt, sizeof data[i][0],
-                            pos, cnt);
-              for (j = 0; j < cnt; j++)
-                data[i][pos + j] = new[j].f;
-            }
-
-          check_datasheet (mc, ds, data, row_cnt, column_cnt + cnt);
-        }
-
-  /* Delete all possible numbers of columns from all possible
-     positions. */
-  for (pos = 0; pos < column_cnt; pos++)
-    for (cnt = 0; cnt < column_cnt - pos; cnt++)
-      if (mc_include_state (mc))
-        {
-          struct datasheet *ds;
-          size_t i;
-
-          mc_name_operation (mc, "delete %zu columns at %zu", cnt, pos);
-          clone_model (ods, odata, &ds, data);
-
-          datasheet_delete_columns (ds, pos, cnt);
-
-          for (i = 0; i < row_cnt; i++)
-            remove_range (&data[i], column_cnt, sizeof *data[i], pos, cnt);
-
-          check_datasheet (mc, ds, data, row_cnt, column_cnt - cnt);
-        }
-
-  /* Move all possible numbers of columns from all possible
-     existing positions to all possible new positions. */
-  for (pos = 0; pos < column_cnt; pos++)
-    for (cnt = 0; cnt < column_cnt - pos; cnt++)
-      for (new_pos = 0; new_pos < column_cnt - cnt; new_pos++)
-        if (mc_include_state (mc))
-          {
-            struct datasheet *ds;
-            size_t i;
-
-            clone_model (ods, odata, &ds, data);
-            mc_name_operation (mc, "move %zu columns from %zu to %zu",
-                               cnt, pos, new_pos);
-
-            datasheet_move_columns (ds, pos, new_pos, cnt);
-
-            for (i = 0; i < row_cnt; i++)
-              move_range (&data[i], column_cnt, sizeof data[i][0],
-                          pos, new_pos, cnt);
-
-            check_datasheet (mc, ds, data, row_cnt, column_cnt);
-          }
-
-  /* Insert all possible numbers of rows in all possible
-     positions. */
-  for (pos = 0; pos <= row_cnt; pos++)
-    for (cnt = 0; cnt <= params->max_rows - row_cnt; cnt++)
-      if (mc_include_state (mc))
-        {
-          struct datasheet *ds;
-          struct ccase c[MAX_ROWS];
-          size_t i, j;
-
-          clone_model (ods, odata, &ds, data);
-          mc_name_operation (mc, "insert %zu rows at %zu", cnt, pos);
-
-          for (i = 0; i < cnt; i++)
-            {
-              case_create (&c[i], column_cnt);
-              for (j = 0; j < column_cnt; j++)
-                case_data_rw_idx (&c[i], j)->f = params->next_value++;
-            }
-
-          insert_range (data, row_cnt, sizeof data[pos], pos, cnt);
-          for (i = 0; i < cnt; i++)
-            for (j = 0; j < column_cnt; j++)
-              data[i + pos][j] = case_num_idx (&c[i], j);
-
-          if (!datasheet_insert_rows (ds, pos, c, cnt))
-            mc_error (mc, "datasheet_insert_rows failed");
-
-          check_datasheet (mc, ds, data, row_cnt + cnt, column_cnt);
-        }
-
-  /* Delete all possible numbers of rows from all possible
-     positions. */
-  for (pos = 0; pos < row_cnt; pos++)
-    for (cnt = 0; cnt < row_cnt - pos; cnt++)
-      if (mc_include_state (mc))
-        {
-          struct datasheet *ds;
-
-          clone_model (ods, odata, &ds, data);
-          mc_name_operation (mc, "delete %zu rows at %zu", cnt, pos);
-
-          datasheet_delete_rows (ds, pos, cnt);
-
-          remove_range (&data[0], row_cnt, sizeof data[0], pos, cnt);
-
-          check_datasheet (mc, ds, data, row_cnt - cnt, column_cnt);
-        }
-
-  /* Move all possible numbers of rows from all possible existing
-     positions to all possible new positions. */
-  for (pos = 0; pos < row_cnt; pos++)
-    for (cnt = 0; cnt < row_cnt - pos; cnt++)
-      for (new_pos = 0; new_pos < row_cnt - cnt; new_pos++)
-        if (mc_include_state (mc))
-          {
-            struct datasheet *ds;
-
-            clone_model (ods, odata, &ds, data);
-            mc_name_operation (mc, "move %zu rows from %zu to %zu",
-                               cnt, pos, new_pos);
-
-            datasheet_move_rows (ds, pos, new_pos, cnt);
-
-            move_range (&data[0], row_cnt, sizeof data[0],
-                        pos, new_pos, cnt);
-
-            check_datasheet (mc, ds, data, row_cnt, column_cnt);
-          }
-}
-
-/* "destroy" function for struct mc_class. */
-static void
-datasheet_mc_destroy (const struct mc *mc UNUSED, void *ds_)
-{
-  struct datasheet *ds = ds_;
-  datasheet_destroy (ds);
-}
  
-/* Executes the model checker on the datasheet test driver with
-   the given OPTIONS and passing in the given PARAMS, which must
-   point to a modifiable "struct datasheet_test_params".  If any
-   value in PARAMS is out of range, it will be adjusted into the
-   valid range before running the test.
  
-   Returns the results of the model checking run. */
-struct mc_results *
-datasheet_test (struct mc_options *options, void *params_)
-{
-  struct datasheet_test_params *params = params_;
-  static const struct mc_class datasheet_mc_class =
-    {
-      datasheet_mc_init,
-      datasheet_mc_mutate,
-      datasheet_mc_destroy,
-    };
-
-  params->next_value = 1;
-  params->max_rows = MIN (params->max_rows, MAX_ROWS);
-  params->max_cols = MIN (params->max_cols, MAX_COLS);
-  params->backing_rows = MIN (params->backing_rows, params->max_rows);
-  params->backing_cols = MIN (params->backing_cols, params->max_cols);
-
-  mc_options_set_aux (options, params);
-  return mc_run (&datasheet_mc_class, options);
-}