src/data/datasheet.c

   1 /* PSPP - a program for statistical analysis.
   2    Copyright (C) 2007 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  16
  17 #include <config.h>
  18
  19 #include <data/datasheet.h>
  20
  21 #include <stdlib.h>
  22 #include <string.h>
  23
  24 #include <data/casereader-provider.h>
  25 #include <data/casereader.h>
  26 #include <data/casewriter.h>
  27 #include <data/lazy-casereader.h>
  28 #include <data/sparse-cases.h>
  29 #include <libpspp/array.h>
  30 #include <libpspp/assertion.h>
  31 #include <libpspp/model-checker.h>
  32 #include <libpspp/range-map.h>
  33 #include <libpspp/range-set.h>
  34 #include <libpspp/taint.h>
  35 #include <libpspp/tower.h>
  36
  37 #include "minmax.h"
  38 #include "md4.h"
  39 #include "xalloc.h"
  40
  41 static struct axis *axis_create (void);
  42 static struct axis *axis_clone (const struct axis *);
  43 static void axis_destroy (struct axis *);
  44
  45 static void axis_hash (const struct axis *, struct md4_ctx *);
  46
  47 static bool axis_allocate (struct axis *, unsigned long int request,
  48                            unsigned long int *start,
  49                            unsigned long int *width);
  50 static void axis_make_available (struct axis *,
  51                                 unsigned long int start,
  52                                 unsigned long int width);
  53 static unsigned long int axis_extend (struct axis *, unsigned long int width);
  54
  55 static unsigned long int axis_map (const struct axis *, unsigned long log_pos);
  56
  57 static unsigned long axis_get_size (const struct axis *);
  58 static void axis_insert (struct axis *,
  59                          unsigned long int log_start,
  60                          unsigned long int phy_start,
  61                          unsigned long int cnt);
  62 static void axis_remove (struct axis *,
  63                          unsigned long int start, unsigned long int cnt);
  64 static void axis_move (struct axis *,
  65                        unsigned long int old_start,
  66                        unsigned long int new_start,
  67                        unsigned long int cnt);
  68
  69 static struct source *source_create_empty (size_t column_cnt);
  70 static struct source *source_create_casereader (struct casereader *);
  71 static struct source *source_clone (const struct source *);
  72 static void source_destroy (struct source *);
  73
  74 static casenumber source_get_backing_row_cnt (const struct source *);
  75 static size_t source_get_column_cnt (const struct source *);
  76
  77 static bool source_read (const struct source *,
  78                          casenumber row, size_t column,
  79                          union value[], size_t value_cnt);
  80 static bool source_write (struct source *,
  81                           casenumber row, size_t column,
  82                           const union value[], size_t value_cnt);
  83 static bool source_write_columns (struct source *, size_t start_column,
  84                                   const union value[], size_t value_cnt);
  85 static bool source_has_backing (const struct source *);
  86 static void source_increase_use (struct source *, size_t delta);
  87 static void source_decrease_use (struct source *, size_t delta);
  88 static bool source_in_use (const struct source *);
  89
  90 /* A datasheet is internally composed from a set of data files,
  91    called "sources".  The sources that make up a datasheet must
  92    have the same number of rows (cases), but their numbers of
  93    columns (variables) may vary.
  94
  95    A datasheet's external view is produced by mapping (permuting
  96    and selecting) its internal data.  Thus, we can rearrange or
  97    delete rows or columns simply by modifying the mapping.  We
  98    add rows by adding rows to each source and to the row mapping.
  99    We add columns by adding a new source, then adding that source
 100    to the column mapping.
 101
 102    Each source in a datasheet can be a casereader or a
 103    sparse_cases.  Casereaders are read-only, so when sources made
 104    from casereaders need to be modified, it is done "virtually"
 105    through being overlaid by a sparse_cases. */
 106
 107 /* A datasheet. */
 108 struct datasheet
 109   {
 110     /* Mappings from logical to physical columns/rows. */
 111     struct axis *columns;
 112     struct axis *rows;
 113
 114     /* Mapping from physical columns to "source_info"s. */
 115     struct range_map sources;
 116
 117     /* Minimum number of columns to put in a new source when we
 118        need new columns and none are free.  We double it whenever
 119        we add a new source to keep the number of file descriptors
 120        needed by the datasheet to a minimum, reducing the
 121        likelihood of running out. */
 122     unsigned column_min_alloc;
 123
 124     /* Indicates corrupted data in the datasheet. */
 125     struct taint *taint;
 126   };
 127
 128 /* Maps from a range of physical columns to a source. */
 129 struct source_info
 130   {
 131     struct range_map_node column_range;
 132     struct source *source;
 133   };
 134
 135 /* Is this operation a read or a write? */
 136 enum rw_op
 137   {
 138     OP_READ,
 139     OP_WRITE
 140   };
 141
 142 static void free_source_info (struct datasheet *, struct source_info *);
 143 static struct source_info *source_info_from_range_map (
 144   struct range_map_node *);
 145 static bool rw_case (struct datasheet *ds, enum rw_op op,
 146                      casenumber lrow, size_t start_column, size_t column_cnt,
 147                      union value data[]);
 148
 149 /* Creates and returns a new datasheet.
 150
 151    If READER is nonnull, then the datasheet initially contains
 152    the contents of READER. */
 153 struct datasheet *
 154 datasheet_create (struct casereader *reader)
 155 {
 156   /* Create datasheet. */
 157   struct datasheet *ds = xmalloc (sizeof *ds);
 158   ds->columns = axis_create ();
 159   ds->rows = axis_create ();
 160   range_map_init (&ds->sources);
 161   ds->column_min_alloc = 1;
 162   ds->taint = taint_create ();
 163
 164   /* Add backing. */
 165   if (reader != NULL)
 166     {
 167       size_t column_cnt;
 168       casenumber row_cnt;
 169       struct source_info *si;
 170
 171       si = xmalloc (sizeof *si);
 172       si->source = source_create_casereader (reader);
 173       column_cnt = source_get_column_cnt (si->source);
 174       row_cnt = source_get_backing_row_cnt (si->source);
 175       source_increase_use (si->source, column_cnt);
 176
 177       if ( column_cnt > 0 )
 178         {
 179           unsigned long int column_start;
 180       column_start = axis_extend (ds->columns, column_cnt);
 181       axis_insert (ds->columns, 0, column_start, column_cnt);
 182       range_map_insert (&ds->sources, column_start, column_cnt,
 183                         &si->column_range);
 184         }
 185
 186       if ( row_cnt > 0 )
 187         {
 188           unsigned long int row_start;
 189       row_start = axis_extend (ds->rows, row_cnt);
 190       axis_insert (ds->rows, 0, row_start, row_cnt);
 191         }
 192     }
 193
 194   return ds;
 195 }
 196
 197 /* Destroys datasheet DS. */
 198 void
 199 datasheet_destroy (struct datasheet *ds)
 200 {
 201   if (ds == NULL)
 202     return;
 203
 204   axis_destroy (ds->columns);
 205   axis_destroy (ds->rows);
 206   while (!range_map_is_empty (&ds->sources))
 207     {
 208       struct range_map_node *r = range_map_first (&ds->sources);
 209       struct source_info *si = source_info_from_range_map (r);
 210       free_source_info (ds, si);
 211     }
 212   taint_destroy (ds->taint);
 213   free (ds);
 214 }
 215
 216 /* Moves datasheet DS to a new location in memory, and returns
 217    the new location.  Afterward, the datasheet must not be
 218    accessed at its former location.
 219
 220    This function is useful for ensuring that all references to a
 221    datasheet have been dropped, especially in conjunction with
 222    tools like Valgrind. */
 223 struct datasheet *
 224 datasheet_rename (struct datasheet *ds)
 225 {
 226   struct datasheet *new = xmemdup (ds, sizeof *ds);
 227   free (ds);
 228   return new;
 229 }
 230
 231 /* Returns true if datasheet DS is tainted.
 232    A datasheet is tainted by an I/O error or by taint
 233    propagation to the datasheet. */
 234 bool
 235 datasheet_error (const struct datasheet *ds)
 236 {
 237   return taint_is_tainted (ds->taint);
 238 }
 239
 240 /* Marks datasheet DS tainted. */
 241 void
 242 datasheet_force_error (struct datasheet *ds)
 243 {
 244   taint_set_taint (ds->taint);
 245 }
 246
 247 /* Returns datasheet DS's taint object. */
 248 const struct taint *
 249 datasheet_get_taint (const struct datasheet *ds)
 250 {
 251   return ds->taint;
 252 }
 253
 254 /* Returns the number of rows in DS. */
 255 casenumber
 256 datasheet_get_row_cnt (const struct datasheet *ds)
 257 {
 258   return axis_get_size (ds->rows);
 259 }
 260
 261 /* Returns the number of columns in DS. */
 262 size_t
 263 datasheet_get_column_cnt (const struct datasheet *ds)
 264 {
 265   return axis_get_size (ds->columns);
 266 }
 267
 268 /* Inserts CNT columns into datasheet DS just before column
 269    BEFORE.  Initializes the contents of each row in the inserted
 270    columns to the CNT values in INIT_VALUES.
 271
 272    Returns true if successful, false on failure.  In case of
 273    failure, the datasheet is unchanged. */
 274 bool
 275 datasheet_insert_columns (struct datasheet *ds,
 276                           const union value init_values[], size_t cnt,
 277                           size_t before)
 278 {
 279   size_t added = 0;
 280   while (cnt > 0)
 281     {
 282       unsigned long first_phy; /* First allocated physical column. */
 283       unsigned long phy_cnt;   /* Number of allocated physical columns. */
 284
 285       /* Allocate physical columns from the pool of available
 286          columns. */
 287       if (!axis_allocate (ds->columns, cnt, &first_phy, &phy_cnt))
 288         {
 289           /* No columns were available.  Create a new source and
 290              extend the axis to make some new ones available. */
 291           struct source_info *si;
 292
 293           phy_cnt = MAX (cnt, ds->column_min_alloc);
 294           first_phy = axis_extend (ds->columns, phy_cnt);
 295           ds->column_min_alloc = MIN (65536, ds->column_min_alloc * 2);
 296
 297           si = xmalloc (sizeof *si);
 298           si->source = source_create_empty (phy_cnt);
 299           range_map_insert (&ds->sources, first_phy, phy_cnt,
 300                             &si->column_range);
 301           if (phy_cnt > cnt)
 302             {
 303               axis_make_available (ds->columns, first_phy + cnt,
 304                                    phy_cnt - cnt);
 305               phy_cnt = cnt;
 306             }
 307         }
 308
 309       /* Initialize the columns and insert them into the columns
 310          axis. */
 311       while (phy_cnt > 0)
 312         {
 313           struct range_map_node *r; /* Range map holding FIRST_PHY column. */
 314           struct source_info *s;    /* Source containing FIRST_PHY column. */
 315           size_t source_avail;      /* Number of phys columns available. */
 316           size_t source_cnt;        /* Number of phys columns to use. */
 317
 318           /* Figure out how many columns we can and want to take
 319              starting at FIRST_PHY, and then insert them into the
 320              columns axis. */
 321           r = range_map_lookup (&ds->sources, first_phy);
 322           s = source_info_from_range_map (r);
 323           source_avail = range_map_node_get_end (r) - first_phy;
 324           source_cnt = MIN (phy_cnt, source_avail);
 325           axis_insert (ds->columns, before, first_phy, source_cnt);
 326
 327           /* Initialize the data for those columns in the
 328              source. */
 329           if (!source_write_columns (s->source,
 330                                      first_phy - range_map_node_get_start (r),
 331                                      init_values, source_cnt))
 332             {
 333               datasheet_delete_columns (ds, before - added,
 334                                         source_cnt + added);
 335               taint_set_taint (ds->taint);
 336               return false;
 337             }
 338           source_increase_use (s->source, source_cnt);
 339
 340           /* Advance. */
 341           phy_cnt -= source_cnt;
 342           first_phy += source_cnt;
 343           init_values += source_cnt;
 344           cnt -= source_cnt;
 345           before += source_cnt;
 346           added += source_cnt;
 347         }
 348     }
 349   return true;
 350 }
 351
 352 /* Deletes the CNT columns in DS starting from column START. */
 353 void
 354 datasheet_delete_columns (struct datasheet *ds, size_t start, size_t cnt)
 355 {
 356   size_t lcol;
 357
 358   assert ( start + cnt <= axis_get_size (ds->columns) );
 359
 360   /* Free up columns for reuse. */
 361   for (lcol = start; lcol < start + cnt; lcol++)
 362     {
 363       size_t pcol = axis_map (ds->columns, lcol);
 364       struct range_map_node *r = range_map_lookup (&ds->sources, pcol);
 365       struct source_info *si = source_info_from_range_map (r);
 366
 367       source_decrease_use (si->source, 1);
 368       if (source_has_backing (si->source))
 369         {
 370           if (!source_in_use (si->source))
 371             free_source_info (ds, si);
 372         }
 373       else
 374         axis_make_available (ds->columns, pcol, 1);
 375     }
 376
 377   /* Remove columns from logical-to-physical mapping. */
 378   axis_remove (ds->columns, start, cnt);
 379 }
 380
 381 /* Moves the CNT columns in DS starting at position OLD_START so
 382    that they then start at position NEW_START.  Equivalent to
 383    deleting the column rows, then inserting them at what becomes
 384    position NEW_START after the deletion.*/
 385 void
 386 datasheet_move_columns (struct datasheet *ds,
 387                         size_t old_start, size_t new_start,
 388                         size_t cnt)
 389 {
 390   axis_move (ds->columns, old_start, new_start, cnt);
 391 }
 392
 393 /* Retrieves the contents of the given ROW in datasheet DS into
 394    newly created case C.  Returns true if successful, false on
 395    I/O error. */
 396 bool
 397 datasheet_get_row (const struct datasheet *ds, casenumber row, struct ccase *c)
 398 {
 399   size_t column_cnt = datasheet_get_column_cnt (ds);
 400   case_create (c, column_cnt);
 401   if (rw_case ((struct datasheet *) ds, OP_READ,
 402                row, 0, column_cnt, case_data_all_rw (c)))
 403     return true;
 404   else
 405     {
 406       case_destroy (c);
 407       return false;
 408     }
 409 }
 410
 411 /* Stores the contents of case C, which is destroyed, into the
 412    given ROW in DS.  Returns true on success, false on I/O error.
 413    On failure, the given ROW might be partially modified or
 414    corrupted. */
 415 bool
 416 datasheet_put_row (struct datasheet *ds, casenumber row, struct ccase *c)
 417 {
 418   size_t column_cnt = datasheet_get_column_cnt (ds);
 419   bool ok = rw_case (ds, OP_WRITE, row, 0, column_cnt,
 420                      (union value *) case_data_all (c));
 421   case_destroy (c);
 422   return ok;
 423 }
 424
 425 /* Stores the values of the WIDTH columns in DS in the given ROW
 426    starting at COLUMN in DS into VALUES.  Returns true if
 427    successful, false on I/O error. */
 428 bool
 429 datasheet_get_value (const struct datasheet *ds, casenumber row, size_t column,
 430                      union value *value, int width)
 431 {
 432   assert ( row >= 0 );
 433   return rw_case ((struct datasheet *) ds,
 434                   OP_READ, row, column, value_cnt_from_width (width), value);
 435 }
 436
 437 /* Stores the WIDTH given VALUES into the given ROW in DS
 438    starting at COLUMN.  Returns true if successful, false on I/O
 439    error.  On failure, the given ROW might be partially modified
 440    or corrupted. */
 441 bool
 442 datasheet_put_value (struct datasheet *ds, casenumber row, size_t column,
 443                      const union value *value, int width)
 444 {
 445   return rw_case (ds, OP_WRITE, row, column, value_cnt_from_width (width),
 446                   (union value *) value);
 447 }
 448
 449 /* Inserts the CNT cases at C, which are destroyed, into
 450    datasheet DS just before row BEFORE.  Returns true if
 451    successful, false on I/O error.  On failure, datasheet DS is
 452    not modified. */
 453 bool
 454 datasheet_insert_rows (struct datasheet *ds,
 455                        casenumber before, struct ccase c[],
 456                        casenumber cnt)
 457 {
 458   casenumber added = 0;
 459   while (cnt > 0)
 460     {
 461       unsigned long first_phy;
 462       unsigned long phy_cnt;
 463       unsigned long i;
 464
 465       /* Allocate physical rows from the pool of available
 466          rows. */
 467       if (!axis_allocate (ds->rows, cnt, &first_phy, &phy_cnt))
 468         {
 469           /* No rows were available.  Extend the row axis to make
 470              some new ones available. */
 471           phy_cnt = cnt;
 472           first_phy = axis_extend (ds->rows, cnt);
 473         }
 474
 475       /* Insert the new rows into the row mapping. */
 476       axis_insert (ds->rows, before, first_phy, phy_cnt);
 477
 478       /* Initialize the new rows. */
 479       for (i = 0; i < phy_cnt; i++)
 480         if (!datasheet_put_row (ds, before + i, &c[i]))
 481           {
 482             while (++i < cnt)
 483               case_destroy (&c[i]);
 484             datasheet_delete_rows (ds, before - added, phy_cnt + added);
 485             return false;
 486           }
 487
 488       /* Advance. */
 489       c += phy_cnt;
 490       cnt -= phy_cnt;
 491       before += phy_cnt;
 492       added += phy_cnt;
 493     }
 494   return true;
 495 }
 496
 497 /* Deletes the CNT rows in DS starting from row FIRST. */
 498 void
 499 datasheet_delete_rows (struct datasheet *ds,
 500                        casenumber first, casenumber cnt)
 501 {
 502   size_t lrow;
 503
 504   /* Free up rows for reuse.
 505      FIXME: optimize. */
 506   for (lrow = first; lrow < first + cnt; lrow++)
 507     axis_make_available (ds->rows, axis_map (ds->rows, lrow), 1);
 508
 509   /* Remove rows from logical-to-physical mapping. */
 510   axis_remove (ds->rows, first, cnt);
 511 }
 512
 513 /* Moves the CNT rows in DS starting at position OLD_START so
 514    that they then start at position NEW_START.  Equivalent to
 515    deleting the given rows, then inserting them at what becomes
 516    position NEW_START after the deletion. */
 517 void
 518 datasheet_move_rows (struct datasheet *ds,
 519                      size_t old_start, size_t new_start,
 520                      size_t cnt)
 521 {
 522   axis_move (ds->rows, old_start, new_start, cnt);
 523 }
 524 \f
 525 static const struct casereader_random_class datasheet_reader_class;
 526
 527 /* Creates and returns a casereader whose input cases are the
 528    rows in datasheet DS.  From the caller's perspective, DS is
 529    effectively destroyed by this operation, such that the caller
 530    must not reference it again. */
 531 struct casereader *
 532 datasheet_make_reader (struct datasheet *ds)
 533 {
 534   struct casereader *reader;
 535   ds = datasheet_rename (ds);
 536   reader = casereader_create_random (datasheet_get_column_cnt (ds),
 537                                      datasheet_get_row_cnt (ds),
 538                                      &datasheet_reader_class, ds);
 539   taint_propagate (datasheet_get_taint (ds), casereader_get_taint (reader));
 540   return reader;
 541 }
 542
 543 /* "read" function for the datasheet random casereader. */
 544 static bool
 545 datasheet_reader_read (struct casereader *reader UNUSED, void *ds_,
 546                        casenumber case_idx, struct ccase *c)
 547 {
 548   struct datasheet *ds = ds_;
 549   if (case_idx >= datasheet_get_row_cnt (ds))
 550     return false;
 551   else if (datasheet_get_row (ds, case_idx, c))
 552     return true;
 553   else
 554     {
 555       taint_set_taint (ds->taint);
 556       return false;
 557     }
 558 }
 559
 560 /* "destroy" function for the datasheet random casereader. */
 561 static void
 562 datasheet_reader_destroy (struct casereader *reader UNUSED, void *ds_)
 563 {
 564   struct datasheet *ds = ds_;
 565   datasheet_destroy (ds);
 566 }
 567
 568 /* "advance" function for the datasheet random casereader. */
 569 static void
 570 datasheet_reader_advance (struct casereader *reader UNUSED, void *ds_,
 571                           casenumber case_cnt)
 572 {
 573   struct datasheet *ds = ds_;
 574   datasheet_delete_rows (ds, 0, case_cnt);
 575 }
 576
 577 /* Random casereader class for a datasheet. */
 578 static const struct casereader_random_class datasheet_reader_class =
 579   {
 580     datasheet_reader_read,
 581     datasheet_reader_destroy,
 582     datasheet_reader_advance,
 583   };
 584 \f
 585 /* Removes SI from DS's set of sources and destroys its
 586    source. */
 587 static void
 588 free_source_info (struct datasheet *ds, struct source_info *si)
 589 {
 590   range_map_delete (&ds->sources, &si->column_range);
 591   source_destroy (si->source);
 592   free (si);
 593 }
 594
 595 static struct source_info *
 596 source_info_from_range_map (struct range_map_node *node)
 597 {
 598   return range_map_data (node, struct source_info, column_range);
 599 }
 600
 601 /* Reads (if OP is OP_READ) or writes (if op is OP_WRITE) the
 602    COLUMN_CNT columns starting from column START_COLUMN in row
 603    LROW to/from the COLUMN_CNT values in DATA. */
 604 static bool
 605 rw_case (struct datasheet *ds, enum rw_op op,
 606          casenumber lrow, size_t start_column, size_t column_cnt,
 607          union value data[])
 608 {
 609   casenumber prow;
 610   size_t lcol;
 611
 612   assert (lrow < datasheet_get_row_cnt (ds));
 613   assert (column_cnt <= datasheet_get_column_cnt (ds));
 614   assert (start_column + column_cnt <= datasheet_get_column_cnt (ds));
 615
 616   prow = axis_map (ds->rows, lrow);
 617   for (lcol = start_column; lcol < start_column + column_cnt; lcol++, data++)
 618     {
 619       size_t pcol = axis_map (ds->columns, lcol);
 620       struct range_map_node *r = range_map_lookup (&ds->sources, pcol);
 621       struct source_info *s = source_info_from_range_map (r);
 622       size_t pcol_ofs = pcol - range_map_node_get_start (r);
 623       if (!(op == OP_READ
 624             ? source_read (s->source, prow, pcol_ofs, data, 1)
 625             : source_write (s->source, prow, pcol_ofs, data, 1)))
 626         {
 627           taint_set_taint (ds->taint);
 628           return false;
 629         }
 630     }
 631   return true;
 632 }
 633 \f
 634 /* An axis.
 635
 636    An axis has two functions.  First, it maintains a mapping from
 637    logical (client-visible) to physical (storage) ordinates.  The
 638    axis_map and axis_get_size functions inspect this mapping, and
 639    the axis_insert, axis_remove, and axis_move functions modify
 640    it.  Second, it tracks the set of ordinates that are unused
 641    and available for reuse.  (Not all unused ordinates are
 642    available for reuse: in particular, unused columns that are
 643    backed by a casereader are never reused.)  The axis_allocate,
 644    axis_make_available, and axis_extend functions affect the set
 645    of available ordinates. */
 646 struct axis
 647   {
 648     struct tower log_to_phy;     /* Map from logical to physical ordinates;
 649                                     contains "struct axis_group"s. */
 650     struct range_set *available; /* Set of unused, available ordinates. */
 651     unsigned long int phy_size;  /* Current physical length of axis. */
 652   };
 653
 654 /* A mapping from logical to physical ordinates. */
 655 struct axis_group
 656   {
 657     struct tower_node logical;  /* Range of logical ordinates. */
 658     unsigned long phy_start;    /* First corresponding physical ordinate. */
 659   };
 660
 661 static struct axis_group *axis_group_from_tower_node (struct tower_node *);
 662 static struct tower_node *make_axis_group (unsigned long int phy_start);
 663 static struct tower_node *split_axis (struct axis *, unsigned long int where);
 664 static void merge_axis_nodes (struct axis *, struct tower_node *,
 665                               struct tower_node **other_node);
 666 static void check_axis_merged (const struct axis *axis UNUSED);
 667
 668 /* Creates and returns a new, initially empty axis. */
 669 static struct axis *
 670 axis_create (void)
 671 {
 672   struct axis *axis = xmalloc (sizeof *axis);
 673   tower_init (&axis->log_to_phy);
 674   axis->available = range_set_create ();
 675   axis->phy_size = 0;
 676   return axis;
 677 }
 678
 679 /* Returns a clone of existing axis OLD.
 680
 681    Currently this is used only by the datasheet model checker
 682    driver, but it could be otherwise useful. */
 683 static struct axis *
 684 axis_clone (const struct axis *old)
 685 {
 686   const struct tower_node *node;
 687   struct axis *new;
 688
 689   new = xmalloc (sizeof *new);
 690   tower_init (&new->log_to_phy);
 691   new->available = range_set_clone (old->available, NULL);
 692   new->phy_size = old->phy_size;
 693
 694   for (node = tower_first (&old->log_to_phy); node != NULL;
 695        node = tower_next (&old->log_to_phy, node))
 696     {
 697       unsigned long int size = tower_node_get_height (node);
 698       struct axis_group *group = tower_data (node, struct axis_group, logical);
 699       tower_insert (&new->log_to_phy, size, make_axis_group (group->phy_start),
 700                     NULL);
 701     }
 702
 703   return new;
 704 }
 705
 706 /* Adds the state of AXIS to the MD4 hash context CTX.
 707
 708    This is only used by the datasheet model checker driver.  It
 709    is unlikely to be otherwise useful. */
 710 static void
 711 axis_hash (const struct axis *axis, struct md4_ctx *ctx)
 712 {
 713   const struct tower_node *tn;
 714   const struct range_set_node *rsn;
 715
 716   for (tn = tower_first (&axis->log_to_phy); tn != NULL;
 717        tn = tower_next (&axis->log_to_phy, tn))
 718     {
 719       struct axis_group *group = tower_data (tn, struct axis_group, logical);
 720       unsigned long int phy_start = group->phy_start;
 721       unsigned long int size = tower_node_get_height (tn);
 722
 723       md4_process_bytes (&phy_start, sizeof phy_start, ctx);
 724       md4_process_bytes (&size, sizeof size, ctx);
 725     }
 726
 727   for (rsn = range_set_first (axis->available); rsn != NULL;
 728        rsn = range_set_next (axis->available, rsn))
 729     {
 730       unsigned long int start = range_set_node_get_start (rsn);
 731       unsigned long int end = range_set_node_get_end (rsn);
 732
 733       md4_process_bytes (&start, sizeof start, ctx);
 734       md4_process_bytes (&end, sizeof end, ctx);
 735     }
 736
 737   md4_process_bytes (&axis->phy_size, sizeof axis->phy_size, ctx);
 738 }
 739
 740 /* Destroys AXIS. */
 741 static void
 742 axis_destroy (struct axis *axis)
 743 {
 744   if (axis == NULL)
 745     return;
 746
 747   while (!tower_is_empty (&axis->log_to_phy))
 748     {
 749       struct tower_node *node = tower_first (&axis->log_to_phy);
 750       struct axis_group *group = tower_data (node, struct axis_group,
 751                                              logical);
 752       tower_delete (&axis->log_to_phy, node);
 753       free (group);
 754     }
 755
 756   range_set_destroy (axis->available);
 757   free (axis);
 758 }
 759
 760 /* Allocates up to REQUEST contiguous unused and available
 761    ordinates from AXIS.  If successful, stores the number
 762    obtained into *WIDTH and the ordinate of the first into
 763    *START, marks the ordinates as now unavailable return true.
 764    On failure, which occurs only if AXIS has no unused and
 765    available ordinates, returns false without modifying AXIS. */
 766 static bool
 767 axis_allocate (struct axis *axis, unsigned long int request,
 768                unsigned long int *start, unsigned long int *width)
 769 {
 770   return range_set_allocate (axis->available, request, start, width);
 771 }
 772
 773 /* Marks the WIDTH contiguous ordinates in AXIS, starting from
 774    START, as unused and available. */
 775 static void
 776 axis_make_available (struct axis *axis,
 777                      unsigned long int start, unsigned long int width)
 778 {
 779   range_set_insert (axis->available, start, width);
 780 }
 781
 782 /* Extends the total physical length of AXIS by WIDTH and returns
 783    the first ordinate in the new physical region. */
 784 static unsigned long int
 785 axis_extend (struct axis *axis, unsigned long int width)
 786 {
 787   unsigned long int start = axis->phy_size;
 788   axis->phy_size += width;
 789   return start;
 790 }
 791
 792 /* Returns the physical ordinate in AXIS corresponding to logical
 793    ordinate LOG_POS.  LOG_POS must be less than the logical
 794    length of AXIS. */
 795 static unsigned long int
 796 axis_map (const struct axis *axis, unsigned long log_pos)
 797 {
 798   struct tower_node *node;
 799   struct axis_group *group;
 800   unsigned long int group_start;
 801
 802   node = tower_lookup (&axis->log_to_phy, log_pos, &group_start);
 803   group = tower_data (node, struct axis_group, logical);
 804   return group->phy_start + (log_pos - group_start);
 805 }
 806
 807 /* Returns the logical length of AXIS. */
 808 static unsigned long
 809 axis_get_size (const struct axis *axis)
 810 {
 811   return tower_height (&axis->log_to_phy);
 812 }
 813
 814 /* Inserts the CNT contiguous physical ordinates starting at
 815    PHY_START into AXIS's logical-to-physical mapping, starting at
 816    logical position LOG_START. */
 817 static void
 818 axis_insert (struct axis *axis,
 819              unsigned long int log_start, unsigned long int phy_start,
 820              unsigned long int cnt)
 821 {
 822   struct tower_node *before = split_axis (axis, log_start);
 823   struct tower_node *new = make_axis_group (phy_start);
 824   tower_insert (&axis->log_to_phy, cnt, new, before);
 825   merge_axis_nodes (axis, new, NULL);
 826   check_axis_merged (axis);
 827 }
 828
 829 /* Removes CNT ordinates from AXIS's logical-to-physical mapping
 830    starting at logical position START. */
 831 static void
 832 axis_remove (struct axis *axis,
 833              unsigned long int start, unsigned long int cnt)
 834 {
 835   if (cnt > 0)
 836     {
 837       struct tower_node *last = split_axis (axis, start + cnt);
 838       struct tower_node *cur, *next;
 839       for (cur = split_axis (axis, start); cur != last; cur = next)
 840         {
 841           next = tower_delete (&axis->log_to_phy, cur);
 842           free (axis_group_from_tower_node (cur));
 843         }
 844       merge_axis_nodes (axis, last, NULL);
 845       check_axis_merged (axis);
 846     }
 847 }
 848
 849 /* Moves the CNT ordinates in AXIS's logical-to-mapping starting
 850    at logical position OLD_START so that they then start at
 851    position NEW_START. */
 852 static void
 853 axis_move (struct axis *axis,
 854            unsigned long int old_start, unsigned long int new_start,
 855            unsigned long int cnt)
 856 {
 857   if (cnt > 0 && old_start != new_start)
 858     {
 859       struct tower_node *old_first, *old_last, *new_first;
 860       struct tower_node *merge1, *merge2;
 861       struct tower tmp_array;
 862
 863       /* Move ordinates OLD_START...(OLD_START + CNT) into new,
 864          separate TMP_ARRAY. */
 865       old_first = split_axis (axis, old_start);
 866       old_last = split_axis (axis, old_start + cnt);
 867       tower_init (&tmp_array);
 868       tower_splice (&tmp_array, NULL,
 869                     &axis->log_to_phy, old_first, old_last);
 870       merge_axis_nodes (axis, old_last, NULL);
 871       check_axis_merged (axis);
 872
 873       /* Move TMP_ARRAY to position NEW_START. */
 874       new_first = split_axis (axis, new_start);
 875       merge1 = tower_first (&tmp_array);
 876       merge2 = tower_last (&tmp_array);
 877       if (merge2 == merge1)
 878         merge2 = NULL;
 879       tower_splice (&axis->log_to_phy, new_first, &tmp_array, old_first, NULL);
 880       merge_axis_nodes (axis, merge1, &merge2);
 881       merge_axis_nodes (axis, merge2, NULL);
 882       check_axis_merged (axis);
 883     }
 884 }
 885
 886 /* Returns the axis_group in which NODE is embedded. */
 887 static struct axis_group *
 888 axis_group_from_tower_node (struct tower_node *node)
 889 {
 890   return tower_data (node, struct axis_group, logical);
 891 }
 892
 893 /* Creates and returns a new axis_group at physical position
 894    PHY_START. */
 895 static struct tower_node *
 896 make_axis_group (unsigned long phy_start)
 897 {
 898   struct axis_group *group = xmalloc (sizeof *group);
 899   group->phy_start = phy_start;
 900   return &group->logical;
 901 }
 902
 903 /* Returns the tower_node in AXIS's logical-to-physical map whose
 904    bottom edge is at exact level WHERE.  If there is no such
 905    tower_node in AXIS's logical-to-physical map, then split_axis
 906    creates one by breaking an existing tower_node into two
 907    separate ones, unless WHERE is equal to the tower height, in
 908    which case it simply returns a null pointer. */
 909 static struct tower_node *
 910 split_axis (struct axis *axis, unsigned long int where)
 911 {
 912   unsigned long int group_start;
 913   struct tower_node *group_node;
 914   struct axis_group *group;
 915
 916   assert (where <= tower_height (&axis->log_to_phy));
 917   if (where >= tower_height (&axis->log_to_phy))
 918     return NULL;
 919
 920   group_node = tower_lookup (&axis->log_to_phy, where, &group_start);
 921   group = axis_group_from_tower_node (group_node);
 922   if (where > group_start)
 923     {
 924       unsigned long int size_1 = where - group_start;
 925       unsigned long int size_2 = tower_node_get_height (group_node) - size_1;
 926       struct tower_node *next = tower_next (&axis->log_to_phy, group_node);
 927       struct tower_node *new = make_axis_group (group->phy_start + size_1);
 928       tower_resize (&axis->log_to_phy, group_node, size_1);
 929       tower_insert (&axis->log_to_phy, size_2, new, next);
 930       return new;
 931     }
 932   else
 933     return &group->logical;
 934 }
 935
 936 /* Within AXIS, attempts to merge NODE (or the last node in AXIS,
 937    if NODE is null) with its neighbor nodes.  This is possible
 938    when logically adjacent nodes are also adjacent physically (in
 939    the same order).
 940
 941    When a merge occurs, and OTHER_NODE is non-null and points to
 942    the node to be deleted, this function also updates
 943    *OTHER_NODE, if necessary, to ensure that it remains a valid
 944    pointer. */
 945 static void
 946 merge_axis_nodes (struct axis *axis, struct tower_node *node,
 947                   struct tower_node **other_node)
 948 {
 949   struct tower *t = &axis->log_to_phy;
 950   struct axis_group *group;
 951   struct tower_node *next, *prev;
 952
 953   /* Find node to potentially merge with neighbors. */
 954   if (node == NULL)
 955     node = tower_last (t);
 956   if (node == NULL)
 957     return;
 958   group = axis_group_from_tower_node (node);
 959
 960   /* Try to merge NODE with successor. */
 961   next = tower_next (t, node);
 962   if (next != NULL)
 963     {
 964       struct axis_group *next_group = axis_group_from_tower_node (next);
 965       unsigned long this_height = tower_node_get_height (node);
 966
 967       if (group->phy_start + this_height == next_group->phy_start)
 968         {
 969           unsigned long next_height = tower_node_get_height (next);
 970           tower_resize (t, node, this_height + next_height);
 971           if (other_node != NULL && *other_node == next)
 972             *other_node = tower_next (t, *other_node);
 973           tower_delete (t, next);
 974           free (next_group);
 975         }
 976     }
 977
 978   /* Try to merge NODE with predecessor. */
 979   prev = tower_prev (t, node);
 980   if (prev != NULL)
 981     {
 982       struct axis_group *prev_group = axis_group_from_tower_node (prev);
 983       unsigned long prev_height = tower_node_get_height (prev);
 984
 985       if (prev_group->phy_start + prev_height == group->phy_start)
 986         {
 987           unsigned long this_height = tower_node_get_height (node);
 988           group->phy_start = prev_group->phy_start;
 989           tower_resize (t, node, this_height + prev_height);
 990           if (other_node != NULL && *other_node == prev)
 991             *other_node = tower_next (t, *other_node);
 992           tower_delete (t, prev);
 993           free (prev_group);
 994         }
 995     }
 996 }
 997
 998 /* Verify that all potentially merge-able nodes in AXIS are
 999    actually merged. */
1000 static void
1001 check_axis_merged (const struct axis *axis UNUSED)
1002 {
1003 #if ASSERT_LEVEL >= 10
1004   struct tower_node *prev, *node;
1005
1006   for (prev = NULL, node = tower_first (&axis->log_to_phy); node != NULL;
1007        prev = node, node = tower_next (&axis->log_to_phy, node))
1008     if (prev != NULL)
1009       {
1010         struct axis_group *prev_group = axis_group_from_tower_node (prev);
1011         unsigned long prev_height = tower_node_get_height (prev);
1012         struct axis_group *node_group = axis_group_from_tower_node (node);
1013         assert (prev_group->phy_start + prev_height != node_group->phy_start);
1014       }
1015 #endif
1016 }
1017 \f
1018 /* A source. */
1019 struct source
1020   {
1021     size_t columns_used;        /* Number of columns in use by client. */
1022     struct sparse_cases *data;  /* Data at top level, atop the backing. */
1023     struct casereader *backing; /* Backing casereader (or null). */
1024     casenumber backing_rows;    /* Number of rows in backing (if nonnull). */
1025   };
1026
1027 /* Creates and returns an empty, unbacked source with COLUMN_CNT
1028    columns and an initial "columns_used" of 0. */
1029 static struct source *
1030 source_create_empty (size_t column_cnt)
1031 {
1032   struct source *source = xmalloc (sizeof *source);
1033   source->columns_used = 0;
1034   source->data = sparse_cases_create (column_cnt);
1035   source->backing = NULL;
1036   source->backing_rows = 0;
1037   return source;
1038 }
1039
1040 /* Creates and returns a new source backed by READER and with the
1041    same initial dimensions and content. */
1042 static struct source *
1043 source_create_casereader (struct casereader *reader)
1044 {
1045   struct source *source
1046     = source_create_empty (casereader_get_value_cnt (reader));
1047   source->backing = reader;
1048   source->backing_rows = casereader_count_cases (reader);
1049   return source;
1050 }
1051
1052 /* Returns a clone of source OLD with the same data and backing
1053    (if any).
1054
1055    Currently this is used only by the datasheet model checker
1056    driver, but it could be otherwise useful. */
1057 static struct source *
1058 source_clone (const struct source *old)
1059 {
1060   struct source *new = xmalloc (sizeof *new);
1061   new->columns_used = old->columns_used;
1062   new->data = sparse_cases_clone (old->data);
1063   new->backing = old->backing != NULL ? casereader_clone (old->backing) : NULL;
1064   new->backing_rows = old->backing_rows;
1065   if (new->data == NULL)
1066     {
1067       source_destroy (new);
1068       new = NULL;
1069     }
1070   return new;
1071 }
1072
1073 /* Increases the columns_used count of SOURCE by DELTA.
1074    The new value must not exceed SOURCE's number of columns. */
1075 static void
1076 source_increase_use (struct source *source, size_t delta)
1077 {
1078   source->columns_used += delta;
1079   assert (source->columns_used <= sparse_cases_get_value_cnt (source->data));
1080 }
1081
1082 /* Decreases the columns_used count of SOURCE by DELTA.
1083    This must not attempt to decrease the columns_used count below
1084    zero. */
1085 static void
1086 source_decrease_use (struct source *source, size_t delta)
1087 {
1088   assert (delta <= source->columns_used);
1089   source->columns_used -= delta;
1090 }
1091
1092 /* Returns true if SOURCE has any columns in use,
1093    false otherwise. */
1094 static bool
1095 source_in_use (const struct source *source)
1096 {
1097   return source->columns_used > 0;
1098 }
1099
1100 /* Destroys SOURCE and its data and backing, if any. */
1101 static void
1102 source_destroy (struct source *source)
1103 {
1104   if (source != NULL)
1105     {
1106       sparse_cases_destroy (source->data);
1107       casereader_destroy (source->backing);
1108       free (source);
1109     }
1110 }
1111
1112 /* Returns the number of rows in SOURCE's backing casereader
1113    (SOURCE must have a backing casereader). */
1114 static casenumber
1115 source_get_backing_row_cnt (const struct source *source)
1116 {
1117   assert (source_has_backing (source));
1118   return source->backing_rows;
1119 }
1120
1121 /* Returns the number of columns in SOURCE. */
1122 static size_t
1123 source_get_column_cnt (const struct source *source)
1124 {
1125   return sparse_cases_get_value_cnt (source->data);
1126 }
1127
1128 /* Reads VALUE_CNT columns from SOURCE in the given ROW, starting
1129    from COLUMN, into VALUES.  Returns true if successful, false
1130    on I/O error. */
1131 static bool
1132 source_read (const struct source *source,
1133              casenumber row, size_t column,
1134              union value values[], size_t value_cnt)
1135 {
1136   if (source->backing == NULL || sparse_cases_contains_row (source->data, row))
1137     return sparse_cases_read (source->data, row, column, values, value_cnt);
1138   else
1139     {
1140       struct ccase c;
1141       bool ok;
1142
1143       assert (source->backing != NULL);
1144       ok = casereader_peek (source->backing, row, &c);
1145       if (ok)
1146         {
1147           case_copy_out (&c, column, values, value_cnt);
1148           case_destroy (&c);
1149         }
1150       return ok;
1151     }
1152 }
1153
1154 /* Writes the VALUE_CNT values in VALUES to SOURCE in the given
1155    ROW, starting at ROW.  Returns true if successful, false on
1156    I/O error.  On error, the row's data may be completely or
1157    partially corrupted, both inside and outside the region to be
1158    written.  */
1159 static bool
1160 source_write (struct source *source,
1161               casenumber row, size_t column,
1162               const union value values[], size_t value_cnt)
1163 {
1164   size_t column_cnt = sparse_cases_get_value_cnt (source->data);
1165   bool ok;
1166
1167   if (source->backing == NULL
1168       || (column == 0 && value_cnt == column_cnt)
1169       || sparse_cases_contains_row (source->data, row))
1170     ok = sparse_cases_write (source->data, row, column, values, value_cnt);
1171   else
1172     {
1173       struct ccase c;
1174       if (row < source->backing_rows)
1175         ok = casereader_peek (source->backing, row, &c);
1176       else
1177         {
1178           /* It's not one of the backed rows.  Ideally, this
1179              should never happen: we'd always be writing the full
1180              contents of new, unbacked rows in a single call to
1181              this function, so that the first case above would
1182              trigger.  But that's a little difficult at higher
1183              levels, so that we in fact usually write the full
1184              contents of new, unbacked rows in multiple calls to
1185              this function.  Make this work. */
1186           case_create (&c, column_cnt);
1187           ok = true;
1188         }
1189       if (ok)
1190         {
1191           case_copy_in (&c, column, values, value_cnt);
1192           ok = sparse_cases_write (source->data, row, 0,
1193                                    case_data_all (&c), column_cnt);
1194           case_destroy (&c);
1195         }
1196     }
1197   return ok;
1198 }
1199
1200 /* Within SOURCE, which must not have a backing casereader,
1201    writes the VALUE_CNT values in VALUES_CNT to the VALUE_CNT
1202    columns starting from START_COLUMN, in every row, even in rows
1203    not yet otherwise initialized.  Returns true if successful,
1204    false if an I/O error occurs.
1205
1206    We don't support backing != NULL because (1) it's harder and
1207    (2) source_write_columns is only called by
1208    datasheet_insert_columns, which doesn't reuse columns from
1209    sources that are backed by casereaders. */
1210 static bool
1211 source_write_columns (struct source *source, size_t start_column,
1212                       const union value values[], size_t value_cnt)
1213 {
1214   assert (source->backing == NULL);
1215
1216   return sparse_cases_write_columns (source->data, start_column,
1217                                      values, value_cnt);
1218 }
1219
1220 /* Returns true if SOURCE has a backing casereader, false
1221    otherwise. */
1222 static bool
1223 source_has_backing (const struct source *source)
1224 {
1225   return source->backing != NULL;
1226 }
1227 \f
1228 /* Datasheet model checker test driver. */
1229
1230 /* Maximum size of datasheet supported for model checking
1231    purposes. */
1232 #define MAX_ROWS 5
1233 #define MAX_COLS 5
1234
1235 /* Hashes the structure of datasheet DS and returns the hash.
1236    We use MD4 because it is much faster than MD5 or SHA-1 but its
1237    collision resistance is just as good. */
1238 static unsigned int
1239 hash_datasheet (const struct datasheet *ds)
1240 {
1241   unsigned int hash[DIV_RND_UP (20, sizeof (unsigned int))];
1242   struct md4_ctx ctx;
1243   struct range_map_node *r;
1244
1245   md4_init_ctx (&ctx);
1246   axis_hash (ds->columns, &ctx);
1247   axis_hash (ds->rows, &ctx);
1248   for (r = range_map_first (&ds->sources); r != NULL;
1249        r = range_map_next (&ds->sources, r))
1250     {
1251       unsigned long int start = range_map_node_get_start (r);
1252       unsigned long int end = range_map_node_get_end (r);
1253       md4_process_bytes (&start, sizeof start, &ctx);
1254       md4_process_bytes (&end, sizeof end, &ctx);
1255     }
1256   md4_process_bytes (&ds->column_min_alloc, sizeof ds->column_min_alloc,
1257                       &ctx);
1258   md4_finish_ctx (&ctx, hash);
1259   return hash[0];
1260 }
1261
1262 /* Clones the structure and contents of ODS into a new datasheet,
1263    and returns the new datasheet. */
1264 static struct datasheet *
1265 clone_datasheet (const struct datasheet *ods)
1266 {
1267   struct datasheet *ds;
1268   struct range_map_node *r;
1269
1270   ds = xmalloc (sizeof *ds);
1271   ds->columns = axis_clone (ods->columns);
1272   ds->rows = axis_clone (ods->rows);
1273   range_map_init (&ds->sources);
1274   for (r = range_map_first (&ods->sources); r != NULL;
1275        r = range_map_next (&ods->sources, r))
1276     {
1277       const struct source_info *osi = source_info_from_range_map (r);
1278       struct source_info *si = xmalloc (sizeof *si);
1279       si->source = source_clone (osi->source);
1280       range_map_insert (&ds->sources, range_map_node_get_start (r),
1281                         range_map_node_get_width (r), &si->column_range);
1282     }
1283   ds->column_min_alloc = ods->column_min_alloc;
1284   ds->taint = taint_create ();
1285
1286   return ds;
1287 }
1288
1289 /* lazy_casereader callback function to instantiate a casereader
1290    from the datasheet. */
1291 static struct casereader *
1292 lazy_callback (void *ds_)
1293 {
1294   struct datasheet *ds = ds_;
1295   return datasheet_make_reader (ds);
1296 }
1297
1298 /* Checks that READER contains the ROW_CNT rows and COLUMN_CNT
1299    columns of data in ARRAY, reporting any errors via MC. */
1300 static void
1301 check_datasheet_casereader (struct mc *mc, struct casereader *reader,
1302                             double array[MAX_ROWS][MAX_COLS],
1303                             size_t row_cnt, size_t column_cnt)
1304 {
1305   if (casereader_get_case_cnt (reader) != row_cnt)
1306     {
1307       if (casereader_get_case_cnt (reader) == CASENUMBER_MAX
1308           && casereader_count_cases (reader) == row_cnt)
1309         mc_error (mc, "datasheet casereader has unknown case count");
1310       else
1311         mc_error (mc, "casereader row count (%lu) does not match "
1312                   "expected (%zu)",
1313                   (unsigned long int) casereader_get_case_cnt (reader),
1314                   row_cnt);
1315     }
1316   else if (casereader_get_value_cnt (reader) != column_cnt)
1317     mc_error (mc, "casereader column count (%zu) does not match "
1318               "expected (%zu)",
1319               casereader_get_value_cnt (reader), column_cnt);
1320   else
1321     {
1322       struct ccase c;
1323       size_t row;
1324
1325       for (row = 0; row < row_cnt; row++)
1326         {
1327           size_t col;
1328
1329           if (!casereader_read (reader, &c))
1330             {
1331               mc_error (mc, "casereader_read failed reading row %zu of %zu "
1332                         "(%zu columns)", row, row_cnt, column_cnt);
1333               return;
1334             }
1335
1336           for (col = 0; col < column_cnt; col++)
1337             if (case_num_idx (&c, col) != array[row][col])
1338               mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: "
1339                         "%g != %g",
1340                         row, col, row_cnt, column_cnt,
1341                         case_num_idx (&c, col), array[row][col]);
1342
1343           case_destroy (&c);
1344         }
1345
1346       if (casereader_read (reader, &c))
1347         mc_error (mc, "casereader has extra cases (expected %zu)", row_cnt);
1348     }
1349 }
1350
1351 /* Checks that datasheet DS contains has ROW_CNT rows, COLUMN_CNT
1352    columns, and the same contents as ARRAY, reporting any
1353    mismatches via mc_error.  Then, adds DS to MC as a new state. */
1354 static void
1355 check_datasheet (struct mc *mc, struct datasheet *ds,
1356                  double array[MAX_ROWS][MAX_COLS],
1357                  size_t row_cnt, size_t column_cnt)
1358 {
1359   struct datasheet *ds2;
1360   struct casereader *reader;
1361   unsigned long int serial = 0;
1362
1363   assert (row_cnt < MAX_ROWS);
1364   assert (column_cnt < MAX_COLS);
1365
1366   /* If it is a duplicate hash, discard the state before checking
1367      its consistency, to save time. */
1368   if (mc_discard_dup_state (mc, hash_datasheet (ds)))
1369     {
1370       datasheet_destroy (ds);
1371       return;
1372     }
1373
1374   /* Check contents of datasheet via datasheet functions. */
1375   if (row_cnt != datasheet_get_row_cnt (ds))
1376     mc_error (mc, "row count (%lu) does not match expected (%zu)",
1377               (unsigned long int) datasheet_get_row_cnt (ds), row_cnt);
1378   else if (column_cnt != datasheet_get_column_cnt (ds))
1379     mc_error (mc, "column count (%zu) does not match expected (%zu)",
1380               datasheet_get_column_cnt (ds), column_cnt);
1381   else
1382     {
1383       size_t row, col;
1384
1385       for (row = 0; row < row_cnt; row++)
1386         for (col = 0; col < column_cnt; col++)
1387           {
1388             union value v;
1389             if (!datasheet_get_value (ds, row, col, &v, 1))
1390               NOT_REACHED ();
1391             if (v.f != array[row][col])
1392               mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: %g != %g",
1393                         row, col, row_cnt, column_cnt, v.f, array[row][col]);
1394           }
1395     }
1396
1397   /* Check that datasheet contents are correct when read through
1398      casereader. */
1399   ds2 = clone_datasheet (ds);
1400   reader = datasheet_make_reader (ds2);
1401   check_datasheet_casereader (mc, reader, array, row_cnt, column_cnt);
1402   casereader_destroy (reader);
1403
1404   /* Check that datasheet contents are correct when read through
1405      casereader with lazy_casereader wrapped around it.  This is
1406      valuable because otherwise there is no non-GUI code that
1407      uses the lazy_casereader. */
1408   ds2 = clone_datasheet (ds);
1409   reader = lazy_casereader_create (column_cnt, row_cnt,
1410                                    lazy_callback, ds2, &serial);
1411   check_datasheet_casereader (mc, reader, array, row_cnt, column_cnt);
1412   if (lazy_casereader_destroy (reader, serial))
1413     {
1414       /* Lazy casereader was never instantiated.  This will
1415          only happen if there are no rows (because in that case
1416          casereader_read never gets called). */
1417       datasheet_destroy (ds2);
1418       if (row_cnt != 0)
1419         mc_error (mc, "lazy casereader not instantiated, but should "
1420                   "have been (size %zu,%zu)", row_cnt, column_cnt);
1421     }
1422   else
1423     {
1424       /* Lazy casereader was instantiated.  This is the common
1425          case, in which some casereader operation
1426          (casereader_read in this case) was performed on the
1427          lazy casereader. */
1428       casereader_destroy (reader);
1429       if (row_cnt == 0)
1430         mc_error (mc, "lazy casereader instantiated, but should not "
1431                   "have been (size %zu,%zu)", row_cnt, column_cnt);
1432     }
1433
1434   mc_add_state (mc, ds);
1435 }
1436
1437 /* Extracts the contents of DS into DATA. */
1438 static void
1439 extract_data (const struct datasheet *ds, double data[MAX_ROWS][MAX_COLS])
1440 {
1441   size_t column_cnt = datasheet_get_column_cnt (ds);
1442   size_t row_cnt = datasheet_get_row_cnt (ds);
1443   size_t row, col;
1444
1445   assert (row_cnt < MAX_ROWS);
1446   assert (column_cnt < MAX_COLS);
1447   for (row = 0; row < row_cnt; row++)
1448     for (col = 0; col < column_cnt; col++)
1449       {
1450         union value v;
1451         if (!datasheet_get_value (ds, row, col, &v, 1))
1452           NOT_REACHED ();
1453         data[row][col] = v.f;
1454       }
1455 }
1456
1457 /* Clones the structure and contents of ODS into *DS,
1458    and the contents of ODATA into DATA. */
1459 static void
1460 clone_model (const struct datasheet *ods, double odata[MAX_ROWS][MAX_COLS],
1461              struct datasheet **ds, double data[MAX_ROWS][MAX_COLS])
1462 {
1463   *ds = clone_datasheet (ods);
1464   memcpy (data, odata, MAX_ROWS * MAX_COLS * sizeof **data);
1465 }
1466
1467 /* "init" function for struct mc_class. */
1468 static void
1469 datasheet_mc_init (struct mc *mc)
1470 {
1471   struct datasheet_test_params *params = mc_get_aux (mc);
1472   struct datasheet *ds;
1473
1474   if (params->backing_rows == 0 && params->backing_cols == 0)
1475     {
1476       /* Create unbacked datasheet. */
1477       ds = datasheet_create (NULL);
1478       mc_name_operation (mc, "empty datasheet");
1479       check_datasheet (mc, ds, NULL, 0, 0);
1480     }
1481   else
1482     {
1483       /* Create datasheet with backing. */
1484       struct casewriter *writer;
1485       struct casereader *reader;
1486       double data[MAX_ROWS][MAX_COLS];
1487       int row;
1488
1489       assert (params->backing_rows > 0 && params->backing_rows <= MAX_ROWS);
1490       assert (params->backing_cols > 0 && params->backing_cols <= MAX_COLS);
1491
1492       writer = mem_writer_create (params->backing_cols);
1493       for (row = 0; row < params->backing_rows; row++)
1494         {
1495           struct ccase c;
1496           int col;
1497
1498           case_create (&c, params->backing_cols);
1499           for (col = 0; col < params->backing_cols; col++)
1500             {
1501               double value = params->next_value++;
1502               data[row][col] = value;
1503               case_data_rw_idx (&c, col)->f = value;
1504             }
1505           casewriter_write (writer, &c);
1506         }
1507       reader = casewriter_make_reader (writer);
1508       assert (reader != NULL);
1509
1510       ds = datasheet_create (reader);
1511       mc_name_operation (mc, "datasheet with (%d,%d) backing",
1512                          params->backing_rows, params->backing_cols);
1513       check_datasheet (mc, ds, data,
1514                        params->backing_rows, params->backing_cols);
1515     }
1516 }
1517
1518 /* "mutate" function for struct mc_class. */
1519 static void
1520 datasheet_mc_mutate (struct mc *mc, const void *ods_)
1521 {
1522   struct datasheet_test_params *params = mc_get_aux (mc);
1523
1524   const struct datasheet *ods = ods_;
1525   double odata[MAX_ROWS][MAX_COLS];
1526   double data[MAX_ROWS][MAX_COLS];
1527   size_t column_cnt = datasheet_get_column_cnt (ods);
1528   size_t row_cnt = datasheet_get_row_cnt (ods);
1529   size_t pos, new_pos, cnt;
1530
1531   extract_data (ods, odata);
1532
1533   /* Insert all possible numbers of columns in all possible
1534      positions. */
1535   for (pos = 0; pos <= column_cnt; pos++)
1536     for (cnt = 0; cnt <= params->max_cols - column_cnt; cnt++)
1537       if (mc_include_state (mc))
1538         {
1539           struct datasheet *ds;
1540           union value new[MAX_COLS];
1541           size_t i, j;
1542
1543           mc_name_operation (mc, "insert %zu columns at %zu", cnt, pos);
1544           clone_model (ods, odata, &ds, data);
1545
1546           for (i = 0; i < cnt; i++)
1547             new[i].f = params->next_value++;
1548
1549           if (!datasheet_insert_columns (ds, new, cnt, pos))
1550             mc_error (mc, "datasheet_insert_columns failed");
1551
1552           for (i = 0; i < row_cnt; i++)
1553             {
1554               insert_range (&data[i][0], column_cnt, sizeof data[i][0],
1555                             pos, cnt);
1556               for (j = 0; j < cnt; j++)
1557                 data[i][pos + j] = new[j].f;
1558             }
1559
1560           check_datasheet (mc, ds, data, row_cnt, column_cnt + cnt);
1561         }
1562
1563   /* Delete all possible numbers of columns from all possible
1564      positions. */
1565   for (pos = 0; pos < column_cnt; pos++)
1566     for (cnt = 0; cnt < column_cnt - pos; cnt++)
1567       if (mc_include_state (mc))
1568         {
1569           struct datasheet *ds;
1570           size_t i;
1571
1572           mc_name_operation (mc, "delete %zu columns at %zu", cnt, pos);
1573           clone_model (ods, odata, &ds, data);
1574
1575           datasheet_delete_columns (ds, pos, cnt);
1576
1577           for (i = 0; i < row_cnt; i++)
1578             remove_range (&data[i], column_cnt, sizeof *data[i], pos, cnt);
1579
1580           check_datasheet (mc, ds, data, row_cnt, column_cnt - cnt);
1581         }
1582
1583   /* Move all possible numbers of columns from all possible
1584      existing positions to all possible new positions. */
1585   for (pos = 0; pos < column_cnt; pos++)
1586     for (cnt = 0; cnt < column_cnt - pos; cnt++)
1587       for (new_pos = 0; new_pos < column_cnt - cnt; new_pos++)
1588         if (mc_include_state (mc))
1589           {
1590             struct datasheet *ds;
1591             size_t i;
1592
1593             clone_model (ods, odata, &ds, data);
1594             mc_name_operation (mc, "move %zu columns from %zu to %zu",
1595                                cnt, pos, new_pos);
1596
1597             datasheet_move_columns (ds, pos, new_pos, cnt);
1598
1599             for (i = 0; i < row_cnt; i++)
1600               move_range (&data[i], column_cnt, sizeof data[i][0],
1601                           pos, new_pos, cnt);
1602
1603             check_datasheet (mc, ds, data, row_cnt, column_cnt);
1604           }
1605
1606   /* Insert all possible numbers of rows in all possible
1607      positions. */
1608   for (pos = 0; pos <= row_cnt; pos++)
1609     for (cnt = 0; cnt <= params->max_rows - row_cnt; cnt++)
1610       if (mc_include_state (mc))
1611         {
1612           struct datasheet *ds;
1613           struct ccase c[MAX_ROWS];
1614           size_t i, j;
1615
1616           clone_model (ods, odata, &ds, data);
1617           mc_name_operation (mc, "insert %zu rows at %zu", cnt, pos);
1618
1619           for (i = 0; i < cnt; i++)
1620             {
1621               case_create (&c[i], column_cnt);
1622               for (j = 0; j < column_cnt; j++)
1623                 case_data_rw_idx (&c[i], j)->f = params->next_value++;
1624             }
1625
1626           insert_range (data, row_cnt, sizeof data[pos], pos, cnt);
1627           for (i = 0; i < cnt; i++)
1628             for (j = 0; j < column_cnt; j++)
1629               data[i + pos][j] = case_num_idx (&c[i], j);
1630
1631           if (!datasheet_insert_rows (ds, pos, c, cnt))
1632             mc_error (mc, "datasheet_insert_rows failed");
1633
1634           check_datasheet (mc, ds, data, row_cnt + cnt, column_cnt);
1635         }
1636
1637   /* Delete all possible numbers of rows from all possible
1638      positions. */
1639   for (pos = 0; pos < row_cnt; pos++)
1640     for (cnt = 0; cnt < row_cnt - pos; cnt++)
1641       if (mc_include_state (mc))
1642         {
1643           struct datasheet *ds;
1644
1645           clone_model (ods, odata, &ds, data);
1646           mc_name_operation (mc, "delete %zu rows at %zu", cnt, pos);
1647
1648           datasheet_delete_rows (ds, pos, cnt);
1649
1650           remove_range (&data[0], row_cnt, sizeof data[0], pos, cnt);
1651
1652           check_datasheet (mc, ds, data, row_cnt - cnt, column_cnt);
1653         }
1654
1655   /* Move all possible numbers of rows from all possible existing
1656      positions to all possible new positions. */
1657   for (pos = 0; pos < row_cnt; pos++)
1658     for (cnt = 0; cnt < row_cnt - pos; cnt++)
1659       for (new_pos = 0; new_pos < row_cnt - cnt; new_pos++)
1660         if (mc_include_state (mc))
1661           {
1662             struct datasheet *ds;
1663
1664             clone_model (ods, odata, &ds, data);
1665             mc_name_operation (mc, "move %zu rows from %zu to %zu",
1666                                cnt, pos, new_pos);
1667
1668             datasheet_move_rows (ds, pos, new_pos, cnt);
1669
1670             move_range (&data[0], row_cnt, sizeof data[0],
1671                         pos, new_pos, cnt);
1672
1673             check_datasheet (mc, ds, data, row_cnt, column_cnt);
1674           }
1675 }
1676
1677 /* "destroy" function for struct mc_class. */
1678 static void
1679 datasheet_mc_destroy (const struct mc *mc UNUSED, void *ds_)
1680 {
1681   struct datasheet *ds = ds_;
1682   datasheet_destroy (ds);
1683 }
1684
1685 /* Executes the model checker on the datasheet test driver with
1686    the given OPTIONS and passing in the given PARAMS, which must
1687    point to a modifiable "struct datasheet_test_params".  If any
1688    value in PARAMS is out of range, it will be adjusted into the
1689    valid range before running the test.
1690
1691    Returns the results of the model checking run. */
1692 struct mc_results *
1693 datasheet_test (struct mc_options *options, void *params_)
1694 {
1695   struct datasheet_test_params *params = params_;
1696   static const struct mc_class datasheet_mc_class =
1697     {
1698       datasheet_mc_init,
1699       datasheet_mc_mutate,
1700       datasheet_mc_destroy,
1701     };
1702
1703   params->next_value = 1;
1704   params->max_rows = MIN (params->max_rows, MAX_ROWS);
1705   params->max_cols = MIN (params->max_cols, MAX_COLS);
1706   params->backing_rows = MIN (params->backing_rows, params->max_rows);
1707   params->backing_cols = MIN (params->backing_cols, params->max_cols);
1708
1709   mc_options_set_aux (options, params);
1710   return mc_run (&datasheet_mc_class, options);
1711 }