1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2007 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <data/datasheet.h>
24 #include <data/casereader-provider.h>
25 #include <data/casereader.h>
26 #include <data/casewriter.h>
27 #include <data/sparse-cases.h>
28 #include <libpspp/array.h>
29 #include <libpspp/assertion.h>
30 #include <libpspp/model-checker.h>
31 #include <libpspp/range-map.h>
32 #include <libpspp/range-set.h>
33 #include <libpspp/taint.h>
34 #include <libpspp/tower.h>
40 static struct axis *axis_create (void);
41 static struct axis *axis_clone (const struct axis *);
42 static void axis_destroy (struct axis *);
44 static void axis_hash (const struct axis *, struct md4_ctx *);
46 static bool axis_allocate (struct axis *, unsigned long int request,
47 unsigned long int *start,
48 unsigned long int *width);
49 static void axis_make_available (struct axis *,
50 unsigned long int start,
51 unsigned long int width);
52 static unsigned long int axis_extend (struct axis *, unsigned long int width);
54 static unsigned long int axis_map (const struct axis *, unsigned long log_pos);
56 static unsigned long axis_get_size (const struct axis *);
57 static void axis_insert (struct axis *,
58 unsigned long int log_start,
59 unsigned long int phy_start,
60 unsigned long int cnt);
61 static void axis_remove (struct axis *,
62 unsigned long int start, unsigned long int cnt);
63 static void axis_move (struct axis *,
64 unsigned long int old_start,
65 unsigned long int new_start,
66 unsigned long int cnt);
68 static struct source *source_create_empty (size_t column_cnt);
69 static struct source *source_create_casereader (struct casereader *);
70 static struct source *source_clone (const struct source *);
71 static void source_destroy (struct source *);
73 static casenumber source_get_backing_row_cnt (const struct source *);
74 static size_t source_get_column_cnt (const struct source *);
76 static bool source_read (const struct source *,
77 casenumber row, size_t column,
78 union value[], size_t value_cnt);
79 static bool source_write (struct source *,
80 casenumber row, size_t column,
81 const union value[], size_t value_cnt);
82 static bool source_write_columns (struct source *, size_t start_column,
83 const union value[], size_t value_cnt);
84 static bool source_has_backing (const struct source *);
85 static void source_increase_use (struct source *, size_t delta);
86 static void source_decrease_use (struct source *, size_t delta);
87 static bool source_in_use (const struct source *);
89 /* A datasheet is internally composed from a set of data files,
90 called "sources". The sources that make up a datasheet must
91 have the same number of rows (cases), but their numbers of
92 columns (variables) may vary.
94 A datasheet's external view is produced by mapping (permuting
95 and selecting) its internal data. Thus, we can rearrange or
96 delete rows or columns simply by modifying the mapping. We
97 add rows by adding rows to each source and to the row mapping.
98 We add columns by adding a new source, then adding that source
99 to the column mapping.
101 Each source in a datasheet can be a casereader or a
102 sparse_cases. Casereaders are read-only, so when sources made
103 from casereaders need to be modified, it is done "virtually"
104 through being overlaid by a sparse_cases. */
109 /* Mappings from logical to physical columns/rows. */
110 struct axis *columns;
113 /* Mapping from physical columns to "source_info"s. */
114 struct range_map sources;
116 /* Minimum number of columns to put in a new source when we
117 need new columns and none are free. We double it whenever
118 we add a new source to keep the number of file descriptors
119 needed by the datasheet to a minimum, reducing the
120 likelihood of running out. */
121 unsigned column_min_alloc;
123 /* Indicates corrupted data in the datasheet. */
127 /* Maps from a range of physical columns to a source. */
130 struct range_map_node column_range;
131 struct source *source;
134 /* Is this operation a read or a write? */
141 static void free_source_info (struct datasheet *, struct source_info *);
142 static struct source_info *source_info_from_range_map (
143 struct range_map_node *);
144 static bool rw_case (struct datasheet *ds, enum rw_op op,
145 casenumber lrow, size_t start_column, size_t column_cnt,
148 /* Creates and returns a new datasheet.
150 If READER is nonnull, then the datasheet initially contains
151 the contents of READER. */
153 datasheet_create (struct casereader *reader)
155 /* Create datasheet. */
156 struct datasheet *ds = xmalloc (sizeof *ds);
157 ds->columns = axis_create ();
158 ds->rows = axis_create ();
159 range_map_init (&ds->sources);
160 ds->column_min_alloc = 1;
161 ds->taint = taint_create ();
168 struct source_info *si;
170 si = xmalloc (sizeof *si);
171 si->source = source_create_casereader (reader);
172 column_cnt = source_get_column_cnt (si->source);
173 row_cnt = source_get_backing_row_cnt (si->source);
174 source_increase_use (si->source, column_cnt);
176 if ( column_cnt > 0 )
178 unsigned long int column_start;
179 column_start = axis_extend (ds->columns, column_cnt);
180 axis_insert (ds->columns, 0, column_start, column_cnt);
181 range_map_insert (&ds->sources, column_start, column_cnt,
187 unsigned long int row_start;
188 row_start = axis_extend (ds->rows, row_cnt);
189 axis_insert (ds->rows, 0, row_start, row_cnt);
196 /* Destroys datasheet DS. */
198 datasheet_destroy (struct datasheet *ds)
203 axis_destroy (ds->columns);
204 axis_destroy (ds->rows);
205 while (!range_map_is_empty (&ds->sources))
207 struct range_map_node *r = range_map_first (&ds->sources);
208 struct source_info *si = source_info_from_range_map (r);
209 free_source_info (ds, si);
211 taint_destroy (ds->taint);
215 /* Moves datasheet DS to a new location in memory, and returns
216 the new location. Afterward, the datasheet must not be
217 accessed at its former location.
219 This function is useful for ensuring that all references to a
220 datasheet have been dropped, especially in conjunction with
221 tools like Valgrind. */
223 datasheet_rename (struct datasheet *ds)
225 struct datasheet *new = xmemdup (ds, sizeof *ds);
230 /* Returns true if datasheet DS is tainted.
231 A datasheet is tainted by an I/O error or by taint
232 propagation to the datasheet. */
234 datasheet_error (const struct datasheet *ds)
236 return taint_is_tainted (ds->taint);
239 /* Marks datasheet DS tainted. */
241 datasheet_force_error (struct datasheet *ds)
243 taint_set_taint (ds->taint);
246 /* Returns datasheet DS's taint object. */
248 datasheet_get_taint (const struct datasheet *ds)
253 /* Returns the number of rows in DS. */
255 datasheet_get_row_cnt (const struct datasheet *ds)
257 return axis_get_size (ds->rows);
260 /* Returns the number of columns in DS. */
262 datasheet_get_column_cnt (const struct datasheet *ds)
264 return axis_get_size (ds->columns);
267 /* Inserts CNT columns into datasheet DS just before column
268 BEFORE. Initializes the contents of each row in the inserted
269 columns to the CNT values in INIT_VALUES.
271 Returns true if successful, false on failure. In case of
272 failure, the datasheet is unchanged. */
274 datasheet_insert_columns (struct datasheet *ds,
275 const union value init_values[], size_t cnt,
281 unsigned long first_phy; /* First allocated physical column. */
282 unsigned long phy_cnt; /* Number of allocated physical columns. */
284 /* Allocate physical columns from the pool of available
286 if (!axis_allocate (ds->columns, cnt, &first_phy, &phy_cnt))
288 /* No columns were available. Create a new source and
289 extend the axis to make some new ones available. */
290 struct source_info *si;
292 phy_cnt = MAX (cnt, ds->column_min_alloc);
293 first_phy = axis_extend (ds->columns, phy_cnt);
294 ds->column_min_alloc = MIN (65536, ds->column_min_alloc * 2);
296 si = xmalloc (sizeof *si);
297 si->source = source_create_empty (phy_cnt);
298 range_map_insert (&ds->sources, first_phy, phy_cnt,
302 axis_make_available (ds->columns, first_phy + cnt,
308 /* Initialize the columns and insert them into the columns
312 struct range_map_node *r; /* Range map holding FIRST_PHY column. */
313 struct source_info *s; /* Source containing FIRST_PHY column. */
314 size_t source_avail; /* Number of phys columns available. */
315 size_t source_cnt; /* Number of phys columns to use. */
317 /* Figure out how many columns we can and want to take
318 starting at FIRST_PHY, and then insert them into the
320 r = range_map_lookup (&ds->sources, first_phy);
321 s = source_info_from_range_map (r);
322 source_avail = range_map_node_get_end (r) - first_phy;
323 source_cnt = MIN (phy_cnt, source_avail);
324 axis_insert (ds->columns, before, first_phy, source_cnt);
326 /* Initialize the data for those columns in the
328 if (!source_write_columns (s->source,
329 first_phy - range_map_node_get_start (r),
330 init_values, source_cnt))
332 datasheet_delete_columns (ds, before - added,
334 taint_set_taint (ds->taint);
337 source_increase_use (s->source, source_cnt);
340 phy_cnt -= source_cnt;
341 first_phy += source_cnt;
342 init_values += source_cnt;
344 before += source_cnt;
351 /* Deletes the CNT columns in DS starting from column START. */
353 datasheet_delete_columns (struct datasheet *ds, size_t start, size_t cnt)
357 assert ( start + cnt <= axis_get_size (ds->columns) );
359 /* Free up columns for reuse. */
360 for (lcol = start; lcol < start + cnt; lcol++)
362 size_t pcol = axis_map (ds->columns, lcol);
363 struct range_map_node *r = range_map_lookup (&ds->sources, pcol);
364 struct source_info *si = source_info_from_range_map (r);
366 source_decrease_use (si->source, 1);
367 if (source_has_backing (si->source))
369 if (!source_in_use (si->source))
370 free_source_info (ds, si);
373 axis_make_available (ds->columns, pcol, 1);
376 /* Remove columns from logical-to-physical mapping. */
377 axis_remove (ds->columns, start, cnt);
380 /* Moves the CNT columns in DS starting at position OLD_START so
381 that they then start at position NEW_START. Equivalent to
382 deleting the column rows, then inserting them at what becomes
383 position NEW_START after the deletion.*/
385 datasheet_move_columns (struct datasheet *ds,
386 size_t old_start, size_t new_start,
389 axis_move (ds->columns, old_start, new_start, cnt);
392 /* Retrieves the contents of the given ROW in datasheet DS into
393 newly created case C. Returns true if successful, false on
396 datasheet_get_row (const struct datasheet *ds, casenumber row, struct ccase *c)
398 size_t column_cnt = datasheet_get_column_cnt (ds);
399 case_create (c, column_cnt);
400 if (rw_case ((struct datasheet *) ds, OP_READ,
401 row, 0, column_cnt, case_data_all_rw (c)))
410 /* Stores the contents of case C, which is destroyed, into the
411 given ROW in DS. Returns true on success, false on I/O error.
412 On failure, the given ROW might be partially modified or
415 datasheet_put_row (struct datasheet *ds, casenumber row, struct ccase *c)
417 size_t column_cnt = datasheet_get_column_cnt (ds);
418 bool ok = rw_case (ds, OP_WRITE, row, 0, column_cnt,
419 (union value *) case_data_all (c));
424 /* Stores the values of the WIDTH columns in DS in the given ROW
425 starting at COLUMN in DS into VALUES. Returns true if
426 successful, false on I/O error. */
428 datasheet_get_value (const struct datasheet *ds, casenumber row, size_t column,
429 union value *value, int width)
431 return rw_case ((struct datasheet *) ds,
432 OP_READ, row, column, value_cnt_from_width (width), value);
435 /* Stores the WIDTH given VALUES into the given ROW in DS
436 starting at COLUMN. Returns true if successful, false on I/O
437 error. On failure, the given ROW might be partially modified
440 datasheet_put_value (struct datasheet *ds, casenumber row, size_t column,
441 const union value *value, int width)
443 return rw_case (ds, OP_WRITE, row, column, value_cnt_from_width (width),
444 (union value *) value);
447 /* Inserts the CNT cases at C, which are destroyed, into
448 datasheet DS just before row BEFORE. Returns true if
449 successful, false on I/O error. On failure, datasheet DS is
452 datasheet_insert_rows (struct datasheet *ds,
453 casenumber before, struct ccase c[],
456 casenumber added = 0;
459 unsigned long first_phy;
460 unsigned long phy_cnt;
463 /* Allocate physical rows from the pool of available
465 if (!axis_allocate (ds->rows, cnt, &first_phy, &phy_cnt))
467 /* No rows were available. Extend the row axis to make
468 some new ones available. */
470 first_phy = axis_extend (ds->rows, cnt);
473 /* Insert the new rows into the row mapping. */
474 axis_insert (ds->rows, before, first_phy, phy_cnt);
476 /* Initialize the new rows. */
477 for (i = 0; i < phy_cnt; i++)
478 if (!datasheet_put_row (ds, before + i, &c[i]))
481 case_destroy (&c[i]);
482 datasheet_delete_rows (ds, before - added, phy_cnt + added);
495 /* Deletes the CNT rows in DS starting from row FIRST. */
497 datasheet_delete_rows (struct datasheet *ds,
498 casenumber first, casenumber cnt)
502 /* Free up rows for reuse.
504 for (lrow = first; lrow < first + cnt; lrow++)
505 axis_make_available (ds->rows, axis_map (ds->rows, lrow), 1);
507 /* Remove rows from logical-to-physical mapping. */
508 axis_remove (ds->rows, first, cnt);
511 /* Moves the CNT rows in DS starting at position OLD_START so
512 that they then start at position NEW_START. Equivalent to
513 deleting the given rows, then inserting them at what becomes
514 position NEW_START after the deletion. */
516 datasheet_move_rows (struct datasheet *ds,
517 size_t old_start, size_t new_start,
520 axis_move (ds->rows, old_start, new_start, cnt);
523 static const struct casereader_random_class datasheet_reader_class;
525 /* Creates and returns a casereader whose input cases are the
526 rows in datasheet DS. From the caller's perspective, DS is
527 effectively destroyed by this operation, such that the caller
528 must not reference it again. */
530 datasheet_make_reader (struct datasheet *ds)
532 struct casereader *reader;
533 ds = datasheet_rename (ds);
534 reader = casereader_create_random (datasheet_get_column_cnt (ds),
535 datasheet_get_row_cnt (ds),
536 &datasheet_reader_class, ds);
537 taint_propagate (datasheet_get_taint (ds), casereader_get_taint (reader));
541 /* "read" function for the datasheet random casereader. */
543 datasheet_reader_read (struct casereader *reader UNUSED, void *ds_,
544 casenumber case_idx, struct ccase *c)
546 struct datasheet *ds = ds_;
547 if (case_idx >= datasheet_get_row_cnt (ds))
549 else if (datasheet_get_row (ds, case_idx, c))
553 taint_set_taint (ds->taint);
558 /* "destroy" function for the datasheet random casereader. */
560 datasheet_reader_destroy (struct casereader *reader UNUSED, void *ds_)
562 struct datasheet *ds = ds_;
563 datasheet_destroy (ds);
566 /* "advance" function for the datasheet random casereader. */
568 datasheet_reader_advance (struct casereader *reader UNUSED, void *ds_,
571 struct datasheet *ds = ds_;
572 datasheet_delete_rows (ds, 0, case_cnt);
575 /* Random casereader class for a datasheet. */
576 static const struct casereader_random_class datasheet_reader_class =
578 datasheet_reader_read,
579 datasheet_reader_destroy,
580 datasheet_reader_advance,
583 /* Removes SI from DS's set of sources and destroys its
586 free_source_info (struct datasheet *ds, struct source_info *si)
588 range_map_delete (&ds->sources, &si->column_range);
589 source_destroy (si->source);
593 static struct source_info *
594 source_info_from_range_map (struct range_map_node *node)
596 return range_map_data (node, struct source_info, column_range);
599 /* Reads (if OP is OP_READ) or writes (if op is OP_WRITE) the
600 COLUMN_CNT columns starting from column START_COLUMN in row
601 LROW to/from the COLUMN_CNT values in DATA. */
603 rw_case (struct datasheet *ds, enum rw_op op,
604 casenumber lrow, size_t start_column, size_t column_cnt,
610 assert (lrow < datasheet_get_row_cnt (ds));
611 assert (column_cnt <= datasheet_get_column_cnt (ds));
612 assert (start_column + column_cnt <= datasheet_get_column_cnt (ds));
614 prow = axis_map (ds->rows, lrow);
615 for (lcol = start_column; lcol < start_column + column_cnt; lcol++, data++)
617 size_t pcol = axis_map (ds->columns, lcol);
618 struct range_map_node *r = range_map_lookup (&ds->sources, pcol);
619 struct source_info *s = source_info_from_range_map (r);
620 size_t pcol_ofs = pcol - range_map_node_get_start (r);
622 ? source_read (s->source, prow, pcol_ofs, data, 1)
623 : source_write (s->source, prow, pcol_ofs, data, 1)))
625 taint_set_taint (ds->taint);
634 An axis has two functions. First, it maintains a mapping from
635 logical (client-visible) to physical (storage) ordinates. The
636 axis_map and axis_get_size functions inspect this mapping, and
637 the axis_insert, axis_remove, and axis_move functions modify
638 it. Second, it tracks the set of ordinates that are unused
639 and available for reuse. (Not all unused ordinates are
640 available for reuse: in particular, unused columns that are
641 backed by a casereader are never reused.) The axis_allocate,
642 axis_make_available, and axis_extend functions affect the set
643 of available ordinates. */
646 struct tower log_to_phy; /* Map from logical to physical ordinates;
647 contains "struct axis_group"s. */
648 struct range_set *available; /* Set of unused, available ordinates. */
649 unsigned long int phy_size; /* Current physical length of axis. */
652 /* A mapping from logical to physical ordinates. */
655 struct tower_node logical; /* Range of logical ordinates. */
656 unsigned long phy_start; /* First corresponding physical ordinate. */
659 static struct axis_group *axis_group_from_tower_node (struct tower_node *);
660 static struct tower_node *make_axis_group (unsigned long int phy_start);
661 static struct tower_node *split_axis (struct axis *, unsigned long int where);
662 static void merge_axis_nodes (struct axis *, struct tower_node *,
663 struct tower_node **other_node);
664 static void check_axis_merged (const struct axis *axis UNUSED);
666 /* Creates and returns a new, initially empty axis. */
670 struct axis *axis = xmalloc (sizeof *axis);
671 tower_init (&axis->log_to_phy);
672 axis->available = range_set_create ();
677 /* Returns a clone of existing axis OLD.
679 Currently this is used only by the datasheet model checker
680 driver, but it could be otherwise useful. */
682 axis_clone (const struct axis *old)
684 const struct tower_node *node;
687 new = xmalloc (sizeof *new);
688 tower_init (&new->log_to_phy);
689 new->available = range_set_clone (old->available, NULL);
690 new->phy_size = old->phy_size;
692 for (node = tower_first (&old->log_to_phy); node != NULL;
693 node = tower_next (&old->log_to_phy, node))
695 unsigned long int size = tower_node_get_height (node);
696 struct axis_group *group = tower_data (node, struct axis_group, logical);
697 tower_insert (&new->log_to_phy, size, make_axis_group (group->phy_start),
704 /* Adds the state of AXIS to the MD4 hash context CTX.
706 This is only used by the datasheet model checker driver. It
707 is unlikely to be otherwise useful. */
709 axis_hash (const struct axis *axis, struct md4_ctx *ctx)
711 const struct tower_node *tn;
712 const struct range_set_node *rsn;
714 for (tn = tower_first (&axis->log_to_phy); tn != NULL;
715 tn = tower_next (&axis->log_to_phy, tn))
717 struct axis_group *group = tower_data (tn, struct axis_group, logical);
718 unsigned long int phy_start = group->phy_start;
719 unsigned long int size = tower_node_get_height (tn);
721 md4_process_bytes (&phy_start, sizeof phy_start, ctx);
722 md4_process_bytes (&size, sizeof size, ctx);
725 for (rsn = range_set_first (axis->available); rsn != NULL;
726 rsn = range_set_next (axis->available, rsn))
728 unsigned long int start = range_set_node_get_start (rsn);
729 unsigned long int end = range_set_node_get_end (rsn);
731 md4_process_bytes (&start, sizeof start, ctx);
732 md4_process_bytes (&end, sizeof end, ctx);
735 md4_process_bytes (&axis->phy_size, sizeof axis->phy_size, ctx);
740 axis_destroy (struct axis *axis)
745 while (!tower_is_empty (&axis->log_to_phy))
747 struct tower_node *node = tower_first (&axis->log_to_phy);
748 struct axis_group *group = tower_data (node, struct axis_group,
750 tower_delete (&axis->log_to_phy, node);
754 range_set_destroy (axis->available);
758 /* Allocates up to REQUEST contiguous unused and available
759 ordinates from AXIS. If successful, stores the number
760 obtained into *WIDTH and the ordinate of the first into
761 *START, marks the ordinates as now unavailable return true.
762 On failure, which occurs only if AXIS has no unused and
763 available ordinates, returns false without modifying AXIS. */
765 axis_allocate (struct axis *axis, unsigned long int request,
766 unsigned long int *start, unsigned long int *width)
768 return range_set_allocate (axis->available, request, start, width);
771 /* Marks the WIDTH contiguous ordinates in AXIS, starting from
772 START, as unused and available. */
774 axis_make_available (struct axis *axis,
775 unsigned long int start, unsigned long int width)
777 range_set_insert (axis->available, start, width);
780 /* Extends the total physical length of AXIS by WIDTH and returns
781 the first ordinate in the new physical region. */
782 static unsigned long int
783 axis_extend (struct axis *axis, unsigned long int width)
785 unsigned long int start = axis->phy_size;
786 axis->phy_size += width;
790 /* Returns the physical ordinate in AXIS corresponding to logical
791 ordinate LOG_POS. LOG_POS must be less than the logical
793 static unsigned long int
794 axis_map (const struct axis *axis, unsigned long log_pos)
796 struct tower_node *node;
797 struct axis_group *group;
798 unsigned long int group_start;
800 node = tower_lookup (&axis->log_to_phy, log_pos, &group_start);
801 group = tower_data (node, struct axis_group, logical);
802 return group->phy_start + (log_pos - group_start);
805 /* Returns the logical length of AXIS. */
807 axis_get_size (const struct axis *axis)
809 return tower_height (&axis->log_to_phy);
812 /* Inserts the CNT contiguous physical ordinates starting at
813 PHY_START into AXIS's logical-to-physical mapping, starting at
814 logical position LOG_START. */
816 axis_insert (struct axis *axis,
817 unsigned long int log_start, unsigned long int phy_start,
818 unsigned long int cnt)
820 struct tower_node *before = split_axis (axis, log_start);
821 struct tower_node *new = make_axis_group (phy_start);
822 tower_insert (&axis->log_to_phy, cnt, new, before);
823 merge_axis_nodes (axis, new, NULL);
824 check_axis_merged (axis);
827 /* Removes CNT ordinates from AXIS's logical-to-physical mapping
828 starting at logical position START. */
830 axis_remove (struct axis *axis,
831 unsigned long int start, unsigned long int cnt)
835 struct tower_node *last = split_axis (axis, start + cnt);
836 struct tower_node *cur, *next;
837 for (cur = split_axis (axis, start); cur != last; cur = next)
839 next = tower_delete (&axis->log_to_phy, cur);
840 free (axis_group_from_tower_node (cur));
842 merge_axis_nodes (axis, last, NULL);
843 check_axis_merged (axis);
847 /* Moves the CNT ordinates in AXIS's logical-to-mapping starting
848 at logical position OLD_START so that they then start at
849 position NEW_START. */
851 axis_move (struct axis *axis,
852 unsigned long int old_start, unsigned long int new_start,
853 unsigned long int cnt)
855 if (cnt > 0 && old_start != new_start)
857 struct tower_node *old_first, *old_last, *new_first;
858 struct tower_node *merge1, *merge2;
859 struct tower tmp_array;
861 /* Move ordinates OLD_START...(OLD_START + CNT) into new,
862 separate TMP_ARRAY. */
863 old_first = split_axis (axis, old_start);
864 old_last = split_axis (axis, old_start + cnt);
865 tower_init (&tmp_array);
866 tower_splice (&tmp_array, NULL,
867 &axis->log_to_phy, old_first, old_last);
868 merge_axis_nodes (axis, old_last, NULL);
869 check_axis_merged (axis);
871 /* Move TMP_ARRAY to position NEW_START. */
872 new_first = split_axis (axis, new_start);
873 merge1 = tower_first (&tmp_array);
874 merge2 = tower_last (&tmp_array);
875 if (merge2 == merge1)
877 tower_splice (&axis->log_to_phy, new_first, &tmp_array, old_first, NULL);
878 merge_axis_nodes (axis, merge1, &merge2);
879 merge_axis_nodes (axis, merge2, NULL);
880 check_axis_merged (axis);
884 /* Returns the axis_group in which NODE is embedded. */
885 static struct axis_group *
886 axis_group_from_tower_node (struct tower_node *node)
888 return tower_data (node, struct axis_group, logical);
891 /* Creates and returns a new axis_group at physical position
893 static struct tower_node *
894 make_axis_group (unsigned long phy_start)
896 struct axis_group *group = xmalloc (sizeof *group);
897 group->phy_start = phy_start;
898 return &group->logical;
901 /* Returns the tower_node in AXIS's logical-to-physical map whose
902 bottom edge is at exact level WHERE. If there is no such
903 tower_node in AXIS's logical-to-physical map, then split_axis
904 creates one by breaking an existing tower_node into two
905 separate ones, unless WHERE is equal to the tower height, in
906 which case it simply returns a null pointer. */
907 static struct tower_node *
908 split_axis (struct axis *axis, unsigned long int where)
910 unsigned long int group_start;
911 struct tower_node *group_node;
912 struct axis_group *group;
914 assert (where <= tower_height (&axis->log_to_phy));
915 if (where >= tower_height (&axis->log_to_phy))
918 group_node = tower_lookup (&axis->log_to_phy, where, &group_start);
919 group = axis_group_from_tower_node (group_node);
920 if (where > group_start)
922 unsigned long int size_1 = where - group_start;
923 unsigned long int size_2 = tower_node_get_height (group_node) - size_1;
924 struct tower_node *next = tower_next (&axis->log_to_phy, group_node);
925 struct tower_node *new = make_axis_group (group->phy_start + size_1);
926 tower_resize (&axis->log_to_phy, group_node, size_1);
927 tower_insert (&axis->log_to_phy, size_2, new, next);
931 return &group->logical;
934 /* Within AXIS, attempts to merge NODE (or the last node in AXIS,
935 if NODE is null) with its neighbor nodes. This is possible
936 when logically adjacent nodes are also adjacent physically (in
939 When a merge occurs, and OTHER_NODE is non-null and points to
940 the node to be deleted, this function also updates
941 *OTHER_NODE, if necessary, to ensure that it remains a valid
944 merge_axis_nodes (struct axis *axis, struct tower_node *node,
945 struct tower_node **other_node)
947 struct tower *t = &axis->log_to_phy;
948 struct axis_group *group;
949 struct tower_node *next, *prev;
951 /* Find node to potentially merge with neighbors. */
953 node = tower_last (t);
956 group = axis_group_from_tower_node (node);
958 /* Try to merge NODE with successor. */
959 next = tower_next (t, node);
962 struct axis_group *next_group = axis_group_from_tower_node (next);
963 unsigned long this_height = tower_node_get_height (node);
965 if (group->phy_start + this_height == next_group->phy_start)
967 unsigned long next_height = tower_node_get_height (next);
968 tower_resize (t, node, this_height + next_height);
969 if (other_node != NULL && *other_node == next)
970 *other_node = tower_next (t, *other_node);
971 tower_delete (t, next);
976 /* Try to merge NODE with predecessor. */
977 prev = tower_prev (t, node);
980 struct axis_group *prev_group = axis_group_from_tower_node (prev);
981 unsigned long prev_height = tower_node_get_height (prev);
983 if (prev_group->phy_start + prev_height == group->phy_start)
985 unsigned long this_height = tower_node_get_height (node);
986 group->phy_start = prev_group->phy_start;
987 tower_resize (t, node, this_height + prev_height);
988 if (other_node != NULL && *other_node == prev)
989 *other_node = tower_next (t, *other_node);
990 tower_delete (t, prev);
996 /* Verify that all potentially merge-able nodes in AXIS are
999 check_axis_merged (const struct axis *axis UNUSED)
1001 #if ASSERT_LEVEL >= 10
1002 struct tower_node *prev, *node;
1004 for (prev = NULL, node = tower_first (&axis->log_to_phy); node != NULL;
1005 prev = node, node = tower_next (&axis->log_to_phy, node))
1008 struct axis_group *prev_group = axis_group_from_tower_node (prev);
1009 unsigned long prev_height = tower_node_get_height (prev);
1010 struct axis_group *node_group = axis_group_from_tower_node (node);
1011 assert (prev_group->phy_start + prev_height != node_group->phy_start);
1019 size_t columns_used; /* Number of columns in use by client. */
1020 struct sparse_cases *data; /* Data at top level, atop the backing. */
1021 struct casereader *backing; /* Backing casereader (or null). */
1022 casenumber backing_rows; /* Number of rows in backing (if nonnull). */
1025 /* Creates and returns an empty, unbacked source with COLUMN_CNT
1026 columns and an initial "columns_used" of 0. */
1027 static struct source *
1028 source_create_empty (size_t column_cnt)
1030 struct source *source = xmalloc (sizeof *source);
1031 source->columns_used = 0;
1032 source->data = sparse_cases_create (column_cnt);
1033 source->backing = NULL;
1034 source->backing_rows = 0;
1038 /* Creates and returns a new source backed by READER and with the
1039 same initial dimensions and content. */
1040 static struct source *
1041 source_create_casereader (struct casereader *reader)
1043 struct source *source
1044 = source_create_empty (casereader_get_value_cnt (reader));
1045 source->backing = reader;
1046 source->backing_rows = casereader_count_cases (reader);
1050 /* Returns a clone of source OLD with the same data and backing
1053 Currently this is used only by the datasheet model checker
1054 driver, but it could be otherwise useful. */
1055 static struct source *
1056 source_clone (const struct source *old)
1058 struct source *new = xmalloc (sizeof *new);
1059 new->columns_used = old->columns_used;
1060 new->data = sparse_cases_clone (old->data);
1061 new->backing = old->backing != NULL ? casereader_clone (old->backing) : NULL;
1062 new->backing_rows = old->backing_rows;
1063 if (new->data == NULL)
1065 source_destroy (new);
1071 /* Increases the columns_used count of SOURCE by DELTA.
1072 The new value must not exceed SOURCE's number of columns. */
1074 source_increase_use (struct source *source, size_t delta)
1076 source->columns_used += delta;
1077 assert (source->columns_used <= sparse_cases_get_value_cnt (source->data));
1080 /* Decreases the columns_used count of SOURCE by DELTA.
1081 This must not attempt to decrease the columns_used count below
1084 source_decrease_use (struct source *source, size_t delta)
1086 assert (delta <= source->columns_used);
1087 source->columns_used -= delta;
1090 /* Returns true if SOURCE has any columns in use,
1093 source_in_use (const struct source *source)
1095 return source->columns_used > 0;
1098 /* Destroys SOURCE and its data and backing, if any. */
1100 source_destroy (struct source *source)
1104 sparse_cases_destroy (source->data);
1105 casereader_destroy (source->backing);
1110 /* Returns the number of rows in SOURCE's backing casereader
1111 (SOURCE must have a backing casereader). */
1113 source_get_backing_row_cnt (const struct source *source)
1115 assert (source_has_backing (source));
1116 return source->backing_rows;
1119 /* Returns the number of columns in SOURCE. */
1121 source_get_column_cnt (const struct source *source)
1123 return sparse_cases_get_value_cnt (source->data);
1126 /* Reads VALUE_CNT columns from SOURCE in the given ROW, starting
1127 from COLUMN, into VALUES. Returns true if successful, false
1130 source_read (const struct source *source,
1131 casenumber row, size_t column,
1132 union value values[], size_t value_cnt)
1134 if (source->backing == NULL || sparse_cases_contains_row (source->data, row))
1135 return sparse_cases_read (source->data, row, column, values, value_cnt);
1141 assert (source->backing != NULL);
1142 ok = casereader_peek (source->backing, row, &c);
1145 case_copy_out (&c, column, values, value_cnt);
1152 /* Writes the VALUE_CNT values in VALUES to SOURCE in the given
1153 ROW, starting at ROW. Returns true if successful, false on
1154 I/O error. On error, the row's data may be completely or
1155 partially corrupted, both inside and outside the region to be
1158 source_write (struct source *source,
1159 casenumber row, size_t column,
1160 const union value values[], size_t value_cnt)
1162 size_t column_cnt = sparse_cases_get_value_cnt (source->data);
1165 if (source->backing == NULL
1166 || (column == 0 && value_cnt == column_cnt)
1167 || sparse_cases_contains_row (source->data, row))
1168 ok = sparse_cases_write (source->data, row, column, values, value_cnt);
1172 if (row < source->backing_rows)
1173 ok = casereader_peek (source->backing, row, &c);
1176 /* It's not one of the backed rows. Ideally, this
1177 should never happen: we'd always be writing the full
1178 contents of new, unbacked rows in a single call to
1179 this function, so that the first case above would
1180 trigger. But that's a little difficult at higher
1181 levels, so that we in fact usually write the full
1182 contents of new, unbacked rows in multiple calls to
1183 this function. Make this work. */
1184 case_create (&c, column_cnt);
1189 case_copy_in (&c, column, values, value_cnt);
1190 ok = sparse_cases_write (source->data, row, 0,
1191 case_data_all (&c), column_cnt);
1198 /* Within SOURCE, which must not have a backing casereader,
1199 writes the VALUE_CNT values in VALUES_CNT to the VALUE_CNT
1200 columns starting from START_COLUMN, in every row, even in rows
1201 not yet otherwise initialized. Returns true if successful,
1202 false if an I/O error occurs.
1204 We don't support backing != NULL because (1) it's harder and
1205 (2) source_write_columns is only called by
1206 datasheet_insert_columns, which doesn't reuse columns from
1207 sources that are backed by casereaders. */
1209 source_write_columns (struct source *source, size_t start_column,
1210 const union value values[], size_t value_cnt)
1212 assert (source->backing == NULL);
1214 return sparse_cases_write_columns (source->data, start_column,
1218 /* Returns true if SOURCE has a backing casereader, false
1221 source_has_backing (const struct source *source)
1223 return source->backing != NULL;
1226 /* Datasheet model checker test driver. */
1228 /* Maximum size of datasheet supported for model checking
1233 /* Hashes the structure of datasheet DS and returns the hash.
1234 We use MD4 because it is much faster than MD5 or SHA-1 but its
1235 collision resistance is just as good. */
1237 hash_datasheet (const struct datasheet *ds)
1239 unsigned int hash[DIV_RND_UP (20, sizeof (unsigned int))];
1241 struct range_map_node *r;
1243 md4_init_ctx (&ctx);
1244 axis_hash (ds->columns, &ctx);
1245 axis_hash (ds->rows, &ctx);
1246 for (r = range_map_first (&ds->sources); r != NULL;
1247 r = range_map_next (&ds->sources, r))
1249 unsigned long int start = range_map_node_get_start (r);
1250 unsigned long int end = range_map_node_get_end (r);
1251 md4_process_bytes (&start, sizeof start, &ctx);
1252 md4_process_bytes (&end, sizeof end, &ctx);
1254 md4_process_bytes (&ds->column_min_alloc, sizeof ds->column_min_alloc,
1256 md4_finish_ctx (&ctx, hash);
1260 /* Checks that datasheet DS contains has ROW_CNT rows, COLUMN_CNT
1261 columns, and the same contents as ARRAY, reporting any
1262 mismatches via mc_error. Then, adds DS to MC as a new state. */
1264 check_datasheet (struct mc *mc, struct datasheet *ds,
1265 double array[MAX_ROWS][MAX_COLS],
1266 size_t row_cnt, size_t column_cnt)
1268 assert (row_cnt < MAX_ROWS);
1269 assert (column_cnt < MAX_COLS);
1271 /* If it is a duplicate hash, discard the state before checking
1272 its consistency, to save time. */
1273 if (mc_discard_dup_state (mc, hash_datasheet (ds)))
1275 datasheet_destroy (ds);
1279 if (row_cnt != datasheet_get_row_cnt (ds))
1280 mc_error (mc, "row count (%lu) does not match expected (%zu)",
1281 (unsigned long int) datasheet_get_row_cnt (ds), row_cnt);
1282 else if (column_cnt != datasheet_get_column_cnt (ds))
1283 mc_error (mc, "column count (%lu) does not match expected (%zu)",
1284 (unsigned long int) datasheet_get_column_cnt (ds), column_cnt);
1289 for (row = 0; row < row_cnt; row++)
1290 for (col = 0; col < column_cnt; col++)
1293 if (!datasheet_get_value (ds, row, col, &v, 1))
1295 if (v.f != array[row][col])
1296 mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: %g != %g",
1297 row, col, row_cnt, column_cnt, v.f, array[row][col]);
1301 mc_add_state (mc, ds);
1304 /* Extracts the contents of DS into DATA. */
1306 extract_data (const struct datasheet *ds, double data[MAX_ROWS][MAX_COLS])
1308 size_t column_cnt = datasheet_get_column_cnt (ds);
1309 size_t row_cnt = datasheet_get_row_cnt (ds);
1312 assert (row_cnt < MAX_ROWS);
1313 assert (column_cnt < MAX_COLS);
1314 for (row = 0; row < row_cnt; row++)
1315 for (col = 0; col < column_cnt; col++)
1318 if (!datasheet_get_value (ds, row, col, &v, 1))
1320 data[row][col] = v.f;
1324 /* Clones the structure and contents of ODS into *DS,
1325 and the contents of ODATA into DATA. */
1327 clone_model (const struct datasheet *ods, double odata[MAX_ROWS][MAX_COLS],
1328 struct datasheet **ds_, double data[MAX_ROWS][MAX_COLS])
1330 struct datasheet *ds;
1331 struct range_map_node *r;
1333 /* Clone ODS into DS. */
1334 ds = *ds_ = xmalloc (sizeof *ds);
1335 ds->columns = axis_clone (ods->columns);
1336 ds->rows = axis_clone (ods->rows);
1337 range_map_init (&ds->sources);
1338 for (r = range_map_first (&ods->sources); r != NULL;
1339 r = range_map_next (&ods->sources, r))
1341 const struct source_info *osi = source_info_from_range_map (r);
1342 struct source_info *si = xmalloc (sizeof *si);
1343 si->source = source_clone (osi->source);
1344 range_map_insert (&ds->sources, range_map_node_get_start (r),
1345 range_map_node_get_width (r), &si->column_range);
1347 ds->column_min_alloc = ods->column_min_alloc;
1348 ds->taint = taint_create ();
1350 /* Clone ODATA into DATA. */
1351 memcpy (data, odata, MAX_ROWS * MAX_COLS * sizeof **data);
1354 /* "init" function for struct mc_class. */
1356 datasheet_mc_init (struct mc *mc)
1358 struct datasheet_test_params *params = mc_get_aux (mc);
1359 struct datasheet *ds;
1361 if (params->backing_rows == 0 && params->backing_cols == 0)
1363 /* Create unbacked datasheet. */
1364 ds = datasheet_create (NULL);
1365 mc_name_operation (mc, "empty datasheet");
1366 check_datasheet (mc, ds, NULL, 0, 0);
1370 /* Create datasheet with backing. */
1371 struct casewriter *writer;
1372 struct casereader *reader;
1373 double data[MAX_ROWS][MAX_COLS];
1376 assert (params->backing_rows > 0 && params->backing_rows <= MAX_ROWS);
1377 assert (params->backing_cols > 0 && params->backing_cols <= MAX_COLS);
1379 writer = mem_writer_create (params->backing_cols);
1380 for (row = 0; row < params->backing_rows; row++)
1385 case_create (&c, params->backing_cols);
1386 for (col = 0; col < params->backing_cols; col++)
1388 double value = params->next_value++;
1389 data[row][col] = value;
1390 case_data_rw_idx (&c, col)->f = value;
1392 casewriter_write (writer, &c);
1394 reader = casewriter_make_reader (writer);
1395 assert (reader != NULL);
1397 ds = datasheet_create (reader);
1398 mc_name_operation (mc, "datasheet with (%d,%d) backing",
1399 params->backing_rows, params->backing_cols);
1400 check_datasheet (mc, ds, data,
1401 params->backing_rows, params->backing_cols);
1405 /* "mutate" function for struct mc_class. */
1407 datasheet_mc_mutate (struct mc *mc, const void *ods_)
1409 struct datasheet_test_params *params = mc_get_aux (mc);
1411 const struct datasheet *ods = ods_;
1412 double odata[MAX_ROWS][MAX_COLS];
1413 double data[MAX_ROWS][MAX_COLS];
1414 size_t column_cnt = datasheet_get_column_cnt (ods);
1415 size_t row_cnt = datasheet_get_row_cnt (ods);
1416 size_t pos, new_pos, cnt;
1418 extract_data (ods, odata);
1420 /* Insert all possible numbers of columns in all possible
1422 for (pos = 0; pos <= column_cnt; pos++)
1423 for (cnt = 0; cnt <= params->max_cols - column_cnt; cnt++)
1424 if (mc_include_state (mc))
1426 struct datasheet *ds;
1427 union value new[MAX_COLS];
1430 mc_name_operation (mc, "insert %zu columns at %zu", cnt, pos);
1431 clone_model (ods, odata, &ds, data);
1433 for (i = 0; i < cnt; i++)
1434 new[i].f = params->next_value++;
1436 if (!datasheet_insert_columns (ds, new, cnt, pos))
1437 mc_error (mc, "datasheet_insert_columns failed");
1439 for (i = 0; i < row_cnt; i++)
1441 insert_range (&data[i][0], column_cnt, sizeof data[i][0],
1443 for (j = 0; j < cnt; j++)
1444 data[i][pos + j] = new[j].f;
1447 check_datasheet (mc, ds, data, row_cnt, column_cnt + cnt);
1450 /* Delete all possible numbers of columns from all possible
1452 for (pos = 0; pos < column_cnt; pos++)
1453 for (cnt = 0; cnt < column_cnt - pos; cnt++)
1454 if (mc_include_state (mc))
1456 struct datasheet *ds;
1459 mc_name_operation (mc, "delete %zu columns at %zu", cnt, pos);
1460 clone_model (ods, odata, &ds, data);
1462 datasheet_delete_columns (ds, pos, cnt);
1464 for (i = 0; i < row_cnt; i++)
1465 remove_range (&data[i], column_cnt, sizeof *data[i], pos, cnt);
1467 check_datasheet (mc, ds, data, row_cnt, column_cnt - cnt);
1470 /* Move all possible numbers of columns from all possible
1471 existing positions to all possible new positions. */
1472 for (pos = 0; pos < column_cnt; pos++)
1473 for (cnt = 0; cnt < column_cnt - pos; cnt++)
1474 for (new_pos = 0; new_pos < column_cnt - cnt; new_pos++)
1475 if (mc_include_state (mc))
1477 struct datasheet *ds;
1480 clone_model (ods, odata, &ds, data);
1481 mc_name_operation (mc, "move %zu columns from %zu to %zu",
1484 datasheet_move_columns (ds, pos, new_pos, cnt);
1486 for (i = 0; i < row_cnt; i++)
1487 move_range (&data[i], column_cnt, sizeof data[i][0],
1490 check_datasheet (mc, ds, data, row_cnt, column_cnt);
1493 /* Insert all possible numbers of rows in all possible
1495 for (pos = 0; pos <= row_cnt; pos++)
1496 for (cnt = 0; cnt <= params->max_rows - row_cnt; cnt++)
1497 if (mc_include_state (mc))
1499 struct datasheet *ds;
1500 struct ccase c[MAX_ROWS];
1503 clone_model (ods, odata, &ds, data);
1504 mc_name_operation (mc, "insert %zu rows at %zu", cnt, pos);
1506 for (i = 0; i < cnt; i++)
1508 case_create (&c[i], column_cnt);
1509 for (j = 0; j < column_cnt; j++)
1510 case_data_rw_idx (&c[i], j)->f = params->next_value++;
1513 insert_range (data, row_cnt, sizeof data[pos], pos, cnt);
1514 for (i = 0; i < cnt; i++)
1515 for (j = 0; j < column_cnt; j++)
1516 data[i + pos][j] = case_num_idx (&c[i], j);
1518 if (!datasheet_insert_rows (ds, pos, c, cnt))
1519 mc_error (mc, "datasheet_insert_rows failed");
1521 check_datasheet (mc, ds, data, row_cnt + cnt, column_cnt);
1524 /* Delete all possible numbers of rows from all possible
1526 for (pos = 0; pos < row_cnt; pos++)
1527 for (cnt = 0; cnt < row_cnt - pos; cnt++)
1528 if (mc_include_state (mc))
1530 struct datasheet *ds;
1532 clone_model (ods, odata, &ds, data);
1533 mc_name_operation (mc, "delete %zu rows at %zu", cnt, pos);
1535 datasheet_delete_rows (ds, pos, cnt);
1537 remove_range (&data[0], row_cnt, sizeof data[0], pos, cnt);
1539 check_datasheet (mc, ds, data, row_cnt - cnt, column_cnt);
1542 /* Move all possible numbers of rows from all possible existing
1543 positions to all possible new positions. */
1544 for (pos = 0; pos < row_cnt; pos++)
1545 for (cnt = 0; cnt < row_cnt - pos; cnt++)
1546 for (new_pos = 0; new_pos < row_cnt - cnt; new_pos++)
1547 if (mc_include_state (mc))
1549 struct datasheet *ds;
1551 clone_model (ods, odata, &ds, data);
1552 mc_name_operation (mc, "move %zu rows from %zu to %zu",
1555 datasheet_move_rows (ds, pos, new_pos, cnt);
1557 move_range (&data[0], row_cnt, sizeof data[0],
1560 check_datasheet (mc, ds, data, row_cnt, column_cnt);
1564 /* "destroy" function for struct mc_class. */
1566 datasheet_mc_destroy (const struct mc *mc UNUSED, void *ds_)
1568 struct datasheet *ds = ds_;
1569 datasheet_destroy (ds);
1572 /* Executes the model checker on the datasheet test driver with
1573 the given OPTIONS and passing in the given PARAMS, which must
1574 point to a modifiable "struct datasheet_test_params". If any
1575 value in PARAMS is out of range, it will be adjusted into the
1576 valid range before running the test.
1578 Returns the results of the model checking run. */
1580 datasheet_test (struct mc_options *options, void *params_)
1582 struct datasheet_test_params *params = params_;
1583 static const struct mc_class datasheet_mc_class =
1586 datasheet_mc_mutate,
1587 datasheet_mc_destroy,
1590 params->next_value = 1;
1591 params->max_rows = MIN (params->max_rows, MAX_ROWS);
1592 params->max_cols = MIN (params->max_cols, MAX_COLS);
1593 params->backing_rows = MIN (params->backing_rows, params->max_rows);
1594 params->backing_cols = MIN (params->backing_cols, params->max_cols);
1596 mc_options_set_aux (options, params);
1597 return mc_run (&datasheet_mc_class, options);