1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2007 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <data/datasheet.h>
24 #include <data/casereader-provider.h>
25 #include <data/casereader.h>
26 #include <data/casewriter.h>
27 #include <data/sparse-cases.h>
28 #include <libpspp/array.h>
29 #include <libpspp/assertion.h>
30 #include <libpspp/model-checker.h>
31 #include <libpspp/range-map.h>
32 #include <libpspp/range-set.h>
33 #include <libpspp/taint.h>
34 #include <libpspp/tower.h>
40 static struct axis *axis_create (void);
41 static struct axis *axis_clone (const struct axis *);
42 static void axis_destroy (struct axis *);
44 static void axis_hash (const struct axis *, struct md4_ctx *);
46 static bool axis_allocate (struct axis *, unsigned long int request,
47 unsigned long int *start,
48 unsigned long int *width);
49 static void axis_make_available (struct axis *,
50 unsigned long int start,
51 unsigned long int width);
52 static unsigned long int axis_extend (struct axis *, unsigned long int width);
54 static unsigned long int axis_map (const struct axis *, unsigned long log_pos);
56 static unsigned long axis_get_size (const struct axis *);
57 static void axis_insert (struct axis *,
58 unsigned long int log_start,
59 unsigned long int phy_start,
60 unsigned long int cnt);
61 static void axis_remove (struct axis *,
62 unsigned long int start, unsigned long int cnt);
63 static void axis_move (struct axis *,
64 unsigned long int old_start,
65 unsigned long int new_start,
66 unsigned long int cnt);
68 static struct source *source_create_empty (size_t column_cnt);
69 static struct source *source_create_casereader (struct casereader *);
70 static struct source *source_clone (const struct source *);
71 static void source_destroy (struct source *);
73 static casenumber source_get_backing_row_cnt (const struct source *);
74 static size_t source_get_column_cnt (const struct source *);
76 static bool source_read (const struct source *,
77 casenumber row, size_t column,
78 union value[], size_t value_cnt);
79 static bool source_write (struct source *,
80 casenumber row, size_t column,
81 const union value[], size_t value_cnt);
82 static bool source_write_columns (struct source *, size_t start_column,
83 const union value[], size_t value_cnt);
84 static bool source_has_backing (const struct source *);
85 static void source_increase_use (struct source *, size_t delta);
86 static void source_decrease_use (struct source *, size_t delta);
87 static bool source_in_use (const struct source *);
89 /* A datasheet is internally composed from a set of data files,
90 called "sources". The sources that make up a datasheet must
91 have the same number of rows (cases), but their numbers of
92 columns (variables) may vary.
94 A datasheet's external view is produced by mapping (permuting
95 and selecting) its internal data. Thus, we can rearrange or
96 delete rows or columns simply by modifying the mapping. We
97 add rows by adding rows to each source and to the row mapping.
98 We add columns by adding a new source, then adding that source
99 to the column mapping.
101 Each source in a datasheet can be a casereader or a
102 sparse_cases. Casereaders are read-only, so when sources made
103 from casereaders need to be modified, it is done "virtually"
104 through being overlaid by a sparse_cases. */
109 /* Mappings from logical to physical columns/rows. */
110 struct axis *columns;
113 /* Mapping from physical columns to "source_info"s. */
114 struct range_map sources;
116 /* Minimum number of columns to put in a new source when we
117 need new columns and none are free. We double it whenever
118 we add a new source to keep the number of file descriptors
119 needed by the datasheet to a minimum, reducing the
120 likelihood of running out. */
121 unsigned column_min_alloc;
123 /* Indicates corrupted data in the datasheet. */
127 /* Maps from a range of physical columns to a source. */
130 struct range_map_node column_range;
131 struct source *source;
134 /* Is this operation a read or a write? */
141 static void free_source_info (struct datasheet *, struct source_info *);
142 static struct source_info *source_info_from_range_map (
143 struct range_map_node *);
144 static bool rw_case (struct datasheet *ds, enum rw_op op,
145 casenumber lrow, size_t start_column, size_t column_cnt,
148 /* Creates and returns a new datasheet.
150 If READER is nonnull, then the datasheet initially contains
151 the contents of READER. */
153 datasheet_create (struct casereader *reader)
155 /* Create datasheet. */
156 struct datasheet *ds = xmalloc (sizeof *ds);
157 ds->columns = axis_create ();
158 ds->rows = axis_create ();
159 range_map_init (&ds->sources);
160 ds->column_min_alloc = 1;
161 ds->taint = taint_create ();
168 struct source_info *si;
170 si = xmalloc (sizeof *si);
171 si->source = source_create_casereader (reader);
172 column_cnt = source_get_column_cnt (si->source);
173 row_cnt = source_get_backing_row_cnt (si->source);
174 source_increase_use (si->source, column_cnt);
176 if ( column_cnt > 0 )
178 unsigned long int column_start;
179 column_start = axis_extend (ds->columns, column_cnt);
180 axis_insert (ds->columns, 0, column_start, column_cnt);
181 range_map_insert (&ds->sources, column_start, column_cnt,
187 unsigned long int row_start;
188 row_start = axis_extend (ds->rows, row_cnt);
189 axis_insert (ds->rows, 0, row_start, row_cnt);
196 /* Destroys datasheet DS. */
198 datasheet_destroy (struct datasheet *ds)
203 axis_destroy (ds->columns);
204 axis_destroy (ds->rows);
205 while (!range_map_is_empty (&ds->sources))
207 struct range_map_node *r = range_map_first (&ds->sources);
208 struct source_info *si = source_info_from_range_map (r);
209 free_source_info (ds, si);
211 taint_destroy (ds->taint);
215 /* Moves datasheet DS to a new location in memory, and returns
216 the new location. Afterward, the datasheet must not be
217 accessed at its former location.
219 This function is useful for ensuring that all references to a
220 datasheet have been dropped, especially in conjunction with
221 tools like Valgrind. */
223 datasheet_rename (struct datasheet *ds)
225 struct datasheet *new = xmemdup (ds, sizeof *ds);
230 /* Returns true if datasheet DS is tainted.
231 A datasheet is tainted by an I/O error or by taint
232 propagation to the datasheet. */
234 datasheet_error (const struct datasheet *ds)
236 return taint_is_tainted (ds->taint);
239 /* Marks datasheet DS tainted. */
241 datasheet_force_error (struct datasheet *ds)
243 taint_set_taint (ds->taint);
246 /* Returns datasheet DS's taint object. */
248 datasheet_get_taint (const struct datasheet *ds)
253 /* Returns the number of rows in DS. */
255 datasheet_get_row_cnt (const struct datasheet *ds)
257 return axis_get_size (ds->rows);
260 /* Returns the number of columns in DS. */
262 datasheet_get_column_cnt (const struct datasheet *ds)
264 return axis_get_size (ds->columns);
267 /* Inserts CNT columns into datasheet DS just before column
268 BEFORE. Initializes the contents of each row in the inserted
269 columns to the CNT values in INIT_VALUES.
271 Returns true if successful, false on failure. In case of
272 failure, the datasheet is unchanged. */
274 datasheet_insert_columns (struct datasheet *ds,
275 const union value init_values[], size_t cnt,
281 unsigned long first_phy; /* First allocated physical column. */
282 unsigned long phy_cnt; /* Number of allocated physical columns. */
284 /* Allocate physical columns from the pool of available
286 if (!axis_allocate (ds->columns, cnt, &first_phy, &phy_cnt))
288 /* No columns were available. Create a new source and
289 extend the axis to make some new ones available. */
290 struct source_info *si;
292 phy_cnt = MAX (cnt, ds->column_min_alloc);
293 first_phy = axis_extend (ds->columns, phy_cnt);
294 ds->column_min_alloc = MIN (65536, ds->column_min_alloc * 2);
296 si = xmalloc (sizeof *si);
297 si->source = source_create_empty (phy_cnt);
298 range_map_insert (&ds->sources, first_phy, phy_cnt,
302 axis_make_available (ds->columns, first_phy + cnt,
308 /* Initialize the columns and insert them into the columns
312 struct range_map_node *r; /* Range map holding FIRST_PHY column. */
313 struct source_info *s; /* Source containing FIRST_PHY column. */
314 size_t source_avail; /* Number of phys columns available. */
315 size_t source_cnt; /* Number of phys columns to use. */
317 /* Figure out how many columns we can and want to take
318 starting at FIRST_PHY, and then insert them into the
320 r = range_map_lookup (&ds->sources, first_phy);
321 s = source_info_from_range_map (r);
322 source_avail = range_map_node_get_end (r) - first_phy;
323 source_cnt = MIN (phy_cnt, source_avail);
324 axis_insert (ds->columns, before, first_phy, source_cnt);
326 /* Initialize the data for those columns in the
328 if (!source_write_columns (s->source,
329 first_phy - range_map_node_get_start (r),
330 init_values, source_cnt))
332 datasheet_delete_columns (ds, before - added,
334 taint_set_taint (ds->taint);
337 source_increase_use (s->source, source_cnt);
340 phy_cnt -= source_cnt;
341 first_phy += source_cnt;
342 init_values += source_cnt;
344 before += source_cnt;
351 /* Deletes the CNT columns in DS starting from column START. */
353 datasheet_delete_columns (struct datasheet *ds, size_t start, size_t cnt)
357 /* Free up columns for reuse. */
358 for (lcol = start; lcol < start + cnt; lcol++)
360 size_t pcol = axis_map (ds->columns, lcol);
361 struct range_map_node *r = range_map_lookup (&ds->sources, pcol);
362 struct source_info *si = source_info_from_range_map (r);
364 source_decrease_use (si->source, 1);
365 if (source_has_backing (si->source))
367 if (!source_in_use (si->source))
368 free_source_info (ds, si);
371 axis_make_available (ds->columns, pcol, 1);
374 /* Remove columns from logical-to-physical mapping. */
375 axis_remove (ds->columns, start, cnt);
378 /* Moves the CNT columns in DS starting at position OLD_START so
379 that they then start at position NEW_START. Equivalent to
380 deleting the column rows, then inserting them at what becomes
381 position NEW_START after the deletion.*/
383 datasheet_move_columns (struct datasheet *ds,
384 size_t old_start, size_t new_start,
387 axis_move (ds->columns, old_start, new_start, cnt);
390 /* Retrieves the contents of the given ROW in datasheet DS into
391 newly created case C. Returns true if successful, false on
394 datasheet_get_row (const struct datasheet *ds, casenumber row, struct ccase *c)
396 size_t column_cnt = datasheet_get_column_cnt (ds);
397 case_create (c, column_cnt);
398 if (rw_case ((struct datasheet *) ds, OP_READ,
399 row, 0, column_cnt, case_data_all_rw (c)))
408 /* Stores the contents of case C, which is destroyed, into the
409 given ROW in DS. Returns true on success, false on I/O error.
410 On failure, the given ROW might be partially modified or
413 datasheet_put_row (struct datasheet *ds, casenumber row, struct ccase *c)
415 size_t column_cnt = datasheet_get_column_cnt (ds);
416 bool ok = rw_case (ds, OP_WRITE, row, 0, column_cnt,
417 (union value *) case_data_all (c));
422 /* Stores the values of the WIDTH columns in DS in the given ROW
423 starting at COLUMN in DS into VALUES. Returns true if
424 successful, false on I/O error. */
426 datasheet_get_value (const struct datasheet *ds, casenumber row, size_t column,
427 union value *value, int width)
429 return rw_case ((struct datasheet *) ds,
430 OP_READ, row, column, value_cnt_from_width (width), value);
433 /* Stores the WIDTH given VALUES into the given ROW in DS
434 starting at COLUMN. Returns true if successful, false on I/O
435 error. On failure, the given ROW might be partially modified
438 datasheet_put_value (struct datasheet *ds, casenumber row, size_t column,
439 const union value *value, int width)
441 return rw_case (ds, OP_WRITE, row, column, value_cnt_from_width (width),
442 (union value *) value);
445 /* Inserts the CNT cases at C, which are destroyed, into
446 datasheet DS just before row BEFORE. Returns true if
447 successful, false on I/O error. On failure, datasheet DS is
450 datasheet_insert_rows (struct datasheet *ds,
451 casenumber before, struct ccase c[],
454 casenumber added = 0;
457 unsigned long first_phy;
458 unsigned long phy_cnt;
461 /* Allocate physical rows from the pool of available
463 if (!axis_allocate (ds->rows, cnt, &first_phy, &phy_cnt))
465 /* No rows were available. Extend the row axis to make
466 some new ones available. */
468 first_phy = axis_extend (ds->rows, cnt);
471 /* Insert the new rows into the row mapping. */
472 axis_insert (ds->rows, before, first_phy, phy_cnt);
474 /* Initialize the new rows. */
475 for (i = 0; i < phy_cnt; i++)
476 if (!datasheet_put_row (ds, before + i, &c[i]))
479 case_destroy (&c[i]);
480 datasheet_delete_rows (ds, before - added, phy_cnt + added);
493 /* Deletes the CNT rows in DS starting from row FIRST. */
495 datasheet_delete_rows (struct datasheet *ds,
496 casenumber first, casenumber cnt)
500 /* Free up rows for reuse.
502 for (lrow = first; lrow < first + cnt; lrow++)
503 axis_make_available (ds->rows, axis_map (ds->rows, lrow), 1);
505 /* Remove rows from logical-to-physical mapping. */
506 axis_remove (ds->rows, first, cnt);
509 /* Moves the CNT rows in DS starting at position OLD_START so
510 that they then start at position NEW_START. Equivalent to
511 deleting the given rows, then inserting them at what becomes
512 position NEW_START after the deletion. */
514 datasheet_move_rows (struct datasheet *ds,
515 size_t old_start, size_t new_start,
518 axis_move (ds->rows, old_start, new_start, cnt);
521 static const struct casereader_random_class datasheet_reader_class;
523 /* Creates and returns a casereader whose input cases are the
524 rows in datasheet DS. From the caller's perspective, DS is
525 effectively destroyed by this operation, such that the caller
526 must not reference it again. */
528 datasheet_make_reader (struct datasheet *ds)
530 struct casereader *reader;
531 ds = datasheet_rename (ds);
532 reader = casereader_create_random (datasheet_get_column_cnt (ds),
533 datasheet_get_row_cnt (ds),
534 &datasheet_reader_class, ds);
535 taint_propagate (datasheet_get_taint (ds), casereader_get_taint (reader));
539 /* "read" function for the datasheet random casereader. */
541 datasheet_reader_read (struct casereader *reader UNUSED, void *ds_,
542 casenumber case_idx, struct ccase *c)
544 struct datasheet *ds = ds_;
545 if (case_idx >= datasheet_get_row_cnt (ds))
547 else if (datasheet_get_row (ds, case_idx, c))
551 taint_set_taint (ds->taint);
556 /* "destroy" function for the datasheet random casereader. */
558 datasheet_reader_destroy (struct casereader *reader UNUSED, void *ds_)
560 struct datasheet *ds = ds_;
561 datasheet_destroy (ds);
564 /* "advance" function for the datasheet random casereader. */
566 datasheet_reader_advance (struct casereader *reader UNUSED, void *ds_,
569 struct datasheet *ds = ds_;
570 datasheet_delete_rows (ds, 0, case_cnt);
573 /* Random casereader class for a datasheet. */
574 static const struct casereader_random_class datasheet_reader_class =
576 datasheet_reader_read,
577 datasheet_reader_destroy,
578 datasheet_reader_advance,
581 /* Removes SI from DS's set of sources and destroys its
584 free_source_info (struct datasheet *ds, struct source_info *si)
586 range_map_delete (&ds->sources, &si->column_range);
587 source_destroy (si->source);
591 static struct source_info *
592 source_info_from_range_map (struct range_map_node *node)
594 return range_map_data (node, struct source_info, column_range);
597 /* Reads (if OP is OP_READ) or writes (if op is OP_WRITE) the
598 COLUMN_CNT columns starting from column START_COLUMN in row
599 LROW to/from the COLUMN_CNT values in DATA. */
601 rw_case (struct datasheet *ds, enum rw_op op,
602 casenumber lrow, size_t start_column, size_t column_cnt,
608 assert (lrow < datasheet_get_row_cnt (ds));
609 assert (column_cnt <= datasheet_get_column_cnt (ds));
610 assert (start_column + column_cnt <= datasheet_get_column_cnt (ds));
612 prow = axis_map (ds->rows, lrow);
613 for (lcol = start_column; lcol < start_column + column_cnt; lcol++, data++)
615 size_t pcol = axis_map (ds->columns, lcol);
616 struct range_map_node *r = range_map_lookup (&ds->sources, pcol);
617 struct source_info *s = source_info_from_range_map (r);
618 size_t pcol_ofs = pcol - range_map_node_get_start (r);
620 ? source_read (s->source, prow, pcol_ofs, data, 1)
621 : source_write (s->source, prow, pcol_ofs, data, 1)))
623 taint_set_taint (ds->taint);
632 An axis has two functions. First, it maintains a mapping from
633 logical (client-visible) to physical (storage) ordinates. The
634 axis_map and axis_get_size functions inspect this mapping, and
635 the axis_insert, axis_remove, and axis_move functions modify
636 it. Second, it tracks the set of ordinates that are unused
637 and available for reuse. (Not all unused ordinates are
638 available for reuse: in particular, unused columns that are
639 backed by a casereader are never reused.) The axis_allocate,
640 axis_make_available, and axis_extend functions affect the set
641 of available ordinates. */
644 struct tower log_to_phy; /* Map from logical to physical ordinates;
645 contains "struct axis_group"s. */
646 struct range_set *available; /* Set of unused, available ordinates. */
647 unsigned long int phy_size; /* Current physical length of axis. */
650 /* A mapping from logical to physical ordinates. */
653 struct tower_node logical; /* Range of logical ordinates. */
654 unsigned long phy_start; /* First corresponding physical ordinate. */
657 static struct axis_group *axis_group_from_tower_node (struct tower_node *);
658 static struct tower_node *make_axis_group (unsigned long int phy_start);
659 static struct tower_node *split_axis (struct axis *, unsigned long int where);
660 static void merge_axis_nodes (struct axis *, struct tower_node *,
661 struct tower_node **other_node);
662 static void check_axis_merged (const struct axis *axis UNUSED);
664 /* Creates and returns a new, initially empty axis. */
668 struct axis *axis = xmalloc (sizeof *axis);
669 tower_init (&axis->log_to_phy);
670 axis->available = range_set_create ();
675 /* Returns a clone of existing axis OLD.
677 Currently this is used only by the datasheet model checker
678 driver, but it could be otherwise useful. */
680 axis_clone (const struct axis *old)
682 const struct tower_node *node;
685 new = xmalloc (sizeof *new);
686 tower_init (&new->log_to_phy);
687 new->available = range_set_clone (old->available, NULL);
688 new->phy_size = old->phy_size;
690 for (node = tower_first (&old->log_to_phy); node != NULL;
691 node = tower_next (&old->log_to_phy, node))
693 unsigned long int size = tower_node_get_height (node);
694 struct axis_group *group = tower_data (node, struct axis_group, logical);
695 tower_insert (&new->log_to_phy, size, make_axis_group (group->phy_start),
702 /* Adds the state of AXIS to the MD4 hash context CTX.
704 This is only used by the datasheet model checker driver. It
705 is unlikely to be otherwise useful. */
707 axis_hash (const struct axis *axis, struct md4_ctx *ctx)
709 const struct tower_node *tn;
710 const struct range_set_node *rsn;
712 for (tn = tower_first (&axis->log_to_phy); tn != NULL;
713 tn = tower_next (&axis->log_to_phy, tn))
715 struct axis_group *group = tower_data (tn, struct axis_group, logical);
716 unsigned long int phy_start = group->phy_start;
717 unsigned long int size = tower_node_get_height (tn);
719 md4_process_bytes (&phy_start, sizeof phy_start, ctx);
720 md4_process_bytes (&size, sizeof size, ctx);
723 for (rsn = range_set_first (axis->available); rsn != NULL;
724 rsn = range_set_next (axis->available, rsn))
726 unsigned long int start = range_set_node_get_start (rsn);
727 unsigned long int end = range_set_node_get_end (rsn);
729 md4_process_bytes (&start, sizeof start, ctx);
730 md4_process_bytes (&end, sizeof end, ctx);
733 md4_process_bytes (&axis->phy_size, sizeof axis->phy_size, ctx);
738 axis_destroy (struct axis *axis)
743 while (!tower_is_empty (&axis->log_to_phy))
745 struct tower_node *node = tower_first (&axis->log_to_phy);
746 struct axis_group *group = tower_data (node, struct axis_group,
748 tower_delete (&axis->log_to_phy, node);
752 range_set_destroy (axis->available);
756 /* Allocates up to REQUEST contiguous unused and available
757 ordinates from AXIS. If successful, stores the number
758 obtained into *WIDTH and the ordinate of the first into
759 *START, marks the ordinates as now unavailable return true.
760 On failure, which occurs only if AXIS has no unused and
761 available ordinates, returns false without modifying AXIS. */
763 axis_allocate (struct axis *axis, unsigned long int request,
764 unsigned long int *start, unsigned long int *width)
766 return range_set_allocate (axis->available, request, start, width);
769 /* Marks the WIDTH contiguous ordinates in AXIS, starting from
770 START, as unused and available. */
772 axis_make_available (struct axis *axis,
773 unsigned long int start, unsigned long int width)
775 range_set_insert (axis->available, start, width);
778 /* Extends the total physical length of AXIS by WIDTH and returns
779 the first ordinate in the new physical region. */
780 static unsigned long int
781 axis_extend (struct axis *axis, unsigned long int width)
783 unsigned long int start = axis->phy_size;
784 axis->phy_size += width;
788 /* Returns the physical ordinate in AXIS corresponding to logical
789 ordinate LOG_POS. LOG_POS must be less than the logical
791 static unsigned long int
792 axis_map (const struct axis *axis, unsigned long log_pos)
794 struct tower_node *node;
795 struct axis_group *group;
796 unsigned long int group_start;
798 node = tower_lookup (&axis->log_to_phy, log_pos, &group_start);
799 group = tower_data (node, struct axis_group, logical);
800 return group->phy_start + (log_pos - group_start);
803 /* Returns the logical length of AXIS. */
805 axis_get_size (const struct axis *axis)
807 return tower_height (&axis->log_to_phy);
810 /* Inserts the CNT contiguous physical ordinates starting at
811 PHY_START into AXIS's logical-to-physical mapping, starting at
812 logical position LOG_START. */
814 axis_insert (struct axis *axis,
815 unsigned long int log_start, unsigned long int phy_start,
816 unsigned long int cnt)
818 struct tower_node *before = split_axis (axis, log_start);
819 struct tower_node *new = make_axis_group (phy_start);
820 tower_insert (&axis->log_to_phy, cnt, new, before);
821 merge_axis_nodes (axis, new, NULL);
822 check_axis_merged (axis);
825 /* Removes CNT ordinates from AXIS's logical-to-physical mapping
826 starting at logical position START. */
828 axis_remove (struct axis *axis,
829 unsigned long int start, unsigned long int cnt)
833 struct tower_node *last = split_axis (axis, start + cnt);
834 struct tower_node *cur, *next;
835 for (cur = split_axis (axis, start); cur != last; cur = next)
837 next = tower_delete (&axis->log_to_phy, cur);
838 free (axis_group_from_tower_node (cur));
840 merge_axis_nodes (axis, last, NULL);
841 check_axis_merged (axis);
845 /* Moves the CNT ordinates in AXIS's logical-to-mapping starting
846 at logical position OLD_START so that they then start at
847 position NEW_START. */
849 axis_move (struct axis *axis,
850 unsigned long int old_start, unsigned long int new_start,
851 unsigned long int cnt)
853 if (cnt > 0 && old_start != new_start)
855 struct tower_node *old_first, *old_last, *new_first;
856 struct tower_node *merge1, *merge2;
857 struct tower tmp_array;
859 /* Move ordinates OLD_START...(OLD_START + CNT) into new,
860 separate TMP_ARRAY. */
861 old_first = split_axis (axis, old_start);
862 old_last = split_axis (axis, old_start + cnt);
863 tower_init (&tmp_array);
864 tower_splice (&tmp_array, NULL,
865 &axis->log_to_phy, old_first, old_last);
866 merge_axis_nodes (axis, old_last, NULL);
867 check_axis_merged (axis);
869 /* Move TMP_ARRAY to position NEW_START. */
870 new_first = split_axis (axis, new_start);
871 merge1 = tower_first (&tmp_array);
872 merge2 = tower_last (&tmp_array);
873 if (merge2 == merge1)
875 tower_splice (&axis->log_to_phy, new_first, &tmp_array, old_first, NULL);
876 merge_axis_nodes (axis, merge1, &merge2);
877 merge_axis_nodes (axis, merge2, NULL);
878 check_axis_merged (axis);
882 /* Returns the axis_group in which NODE is embedded. */
883 static struct axis_group *
884 axis_group_from_tower_node (struct tower_node *node)
886 return tower_data (node, struct axis_group, logical);
889 /* Creates and returns a new axis_group at physical position
891 static struct tower_node *
892 make_axis_group (unsigned long phy_start)
894 struct axis_group *group = xmalloc (sizeof *group);
895 group->phy_start = phy_start;
896 return &group->logical;
899 /* Returns the tower_node in AXIS's logical-to-physical map whose
900 bottom edge is at exact level WHERE. If there is no such
901 tower_node in AXIS's logical-to-physical map, then split_axis
902 creates one by breaking an existing tower_node into two
903 separate ones, unless WHERE is equal to the tower height, in
904 which case it simply returns a null pointer. */
905 static struct tower_node *
906 split_axis (struct axis *axis, unsigned long int where)
908 unsigned long int group_start;
909 struct tower_node *group_node;
910 struct axis_group *group;
912 assert (where <= tower_height (&axis->log_to_phy));
913 if (where >= tower_height (&axis->log_to_phy))
916 group_node = tower_lookup (&axis->log_to_phy, where, &group_start);
917 group = axis_group_from_tower_node (group_node);
918 if (where > group_start)
920 unsigned long int size_1 = where - group_start;
921 unsigned long int size_2 = tower_node_get_height (group_node) - size_1;
922 struct tower_node *next = tower_next (&axis->log_to_phy, group_node);
923 struct tower_node *new = make_axis_group (group->phy_start + size_1);
924 tower_resize (&axis->log_to_phy, group_node, size_1);
925 tower_insert (&axis->log_to_phy, size_2, new, next);
929 return &group->logical;
932 /* Within AXIS, attempts to merge NODE (or the last node in AXIS,
933 if NODE is null) with its neighbor nodes. This is possible
934 when logically adjacent nodes are also adjacent physically (in
937 When a merge occurs, and OTHER_NODE is non-null and points to
938 the node to be deleted, this function also updates
939 *OTHER_NODE, if necessary, to ensure that it remains a valid
942 merge_axis_nodes (struct axis *axis, struct tower_node *node,
943 struct tower_node **other_node)
945 struct tower *t = &axis->log_to_phy;
946 struct axis_group *group;
947 struct tower_node *next, *prev;
949 /* Find node to potentially merge with neighbors. */
951 node = tower_last (t);
954 group = axis_group_from_tower_node (node);
956 /* Try to merge NODE with successor. */
957 next = tower_next (t, node);
960 struct axis_group *next_group = axis_group_from_tower_node (next);
961 unsigned long this_height = tower_node_get_height (node);
963 if (group->phy_start + this_height == next_group->phy_start)
965 unsigned long next_height = tower_node_get_height (next);
966 tower_resize (t, node, this_height + next_height);
967 if (other_node != NULL && *other_node == next)
968 *other_node = tower_next (t, *other_node);
969 tower_delete (t, next);
974 /* Try to merge NODE with predecessor. */
975 prev = tower_prev (t, node);
978 struct axis_group *prev_group = axis_group_from_tower_node (prev);
979 unsigned long prev_height = tower_node_get_height (prev);
981 if (prev_group->phy_start + prev_height == group->phy_start)
983 unsigned long this_height = tower_node_get_height (node);
984 group->phy_start = prev_group->phy_start;
985 tower_resize (t, node, this_height + prev_height);
986 if (other_node != NULL && *other_node == prev)
987 *other_node = tower_next (t, *other_node);
988 tower_delete (t, prev);
994 /* Verify that all potentially merge-able nodes in AXIS are
997 check_axis_merged (const struct axis *axis UNUSED)
999 #if ASSERT_LEVEL >= 10
1000 struct tower_node *prev, *node;
1002 for (prev = NULL, node = tower_first (&axis->log_to_phy); node != NULL;
1003 prev = node, node = tower_next (&axis->log_to_phy, node))
1006 struct axis_group *prev_group = axis_group_from_tower_node (prev);
1007 unsigned long prev_height = tower_node_get_height (prev);
1008 struct axis_group *node_group = axis_group_from_tower_node (node);
1009 assert (prev_group->phy_start + prev_height != node_group->phy_start);
1017 size_t columns_used; /* Number of columns in use by client. */
1018 struct sparse_cases *data; /* Data at top level, atop the backing. */
1019 struct casereader *backing; /* Backing casereader (or null). */
1020 casenumber backing_rows; /* Number of rows in backing (if nonnull). */
1023 /* Creates and returns an empty, unbacked source with COLUMN_CNT
1024 columns and an initial "columns_used" of 0. */
1025 static struct source *
1026 source_create_empty (size_t column_cnt)
1028 struct source *source = xmalloc (sizeof *source);
1029 source->columns_used = 0;
1030 source->data = sparse_cases_create (column_cnt);
1031 source->backing = NULL;
1032 source->backing_rows = 0;
1036 /* Creates and returns a new source backed by READER and with the
1037 same initial dimensions and content. */
1038 static struct source *
1039 source_create_casereader (struct casereader *reader)
1041 struct source *source
1042 = source_create_empty (casereader_get_value_cnt (reader));
1043 source->backing = reader;
1044 source->backing_rows = casereader_count_cases (reader);
1048 /* Returns a clone of source OLD with the same data and backing
1051 Currently this is used only by the datasheet model checker
1052 driver, but it could be otherwise useful. */
1053 static struct source *
1054 source_clone (const struct source *old)
1056 struct source *new = xmalloc (sizeof *new);
1057 new->columns_used = old->columns_used;
1058 new->data = sparse_cases_clone (old->data);
1059 new->backing = old->backing != NULL ? casereader_clone (old->backing) : NULL;
1060 new->backing_rows = old->backing_rows;
1061 if (new->data == NULL)
1063 source_destroy (new);
1069 /* Increases the columns_used count of SOURCE by DELTA.
1070 The new value must not exceed SOURCE's number of columns. */
1072 source_increase_use (struct source *source, size_t delta)
1074 source->columns_used += delta;
1075 assert (source->columns_used <= sparse_cases_get_value_cnt (source->data));
1078 /* Decreases the columns_used count of SOURCE by DELTA.
1079 This must not attempt to decrease the columns_used count below
1082 source_decrease_use (struct source *source, size_t delta)
1084 assert (delta <= source->columns_used);
1085 source->columns_used -= delta;
1088 /* Returns true if SOURCE has any columns in use,
1091 source_in_use (const struct source *source)
1093 return source->columns_used > 0;
1096 /* Destroys SOURCE and its data and backing, if any. */
1098 source_destroy (struct source *source)
1102 sparse_cases_destroy (source->data);
1103 casereader_destroy (source->backing);
1108 /* Returns the number of rows in SOURCE's backing casereader
1109 (SOURCE must have a backing casereader). */
1111 source_get_backing_row_cnt (const struct source *source)
1113 assert (source_has_backing (source));
1114 return source->backing_rows;
1117 /* Returns the number of columns in SOURCE. */
1119 source_get_column_cnt (const struct source *source)
1121 return sparse_cases_get_value_cnt (source->data);
1124 /* Reads VALUE_CNT columns from SOURCE in the given ROW, starting
1125 from COLUMN, into VALUES. Returns true if successful, false
1128 source_read (const struct source *source,
1129 casenumber row, size_t column,
1130 union value values[], size_t value_cnt)
1132 if (source->backing == NULL || sparse_cases_contains_row (source->data, row))
1133 return sparse_cases_read (source->data, row, column, values, value_cnt);
1139 assert (source->backing != NULL);
1140 ok = casereader_peek (source->backing, row, &c);
1143 case_copy_out (&c, column, values, value_cnt);
1150 /* Writes the VALUE_CNT values in VALUES to SOURCE in the given
1151 ROW, starting at ROW. Returns true if successful, false on
1152 I/O error. On error, the row's data may be completely or
1153 partially corrupted, both inside and outside the region to be
1156 source_write (struct source *source,
1157 casenumber row, size_t column,
1158 const union value values[], size_t value_cnt)
1160 size_t column_cnt = sparse_cases_get_value_cnt (source->data);
1163 if (source->backing == NULL
1164 || (column == 0 && value_cnt == column_cnt)
1165 || sparse_cases_contains_row (source->data, row))
1166 ok = sparse_cases_write (source->data, row, column, values, value_cnt);
1170 if (row < source->backing_rows)
1171 ok = casereader_peek (source->backing, row, &c);
1174 /* It's not one of the backed rows. Ideally, this
1175 should never happen: we'd always be writing the full
1176 contents of new, unbacked rows in a single call to
1177 this function, so that the first case above would
1178 trigger. But that's a little difficult at higher
1179 levels, so that we in fact usually write the full
1180 contents of new, unbacked rows in multiple calls to
1181 this function. Make this work. */
1182 case_create (&c, column_cnt);
1187 case_copy_in (&c, column, values, value_cnt);
1188 ok = sparse_cases_write (source->data, row, 0,
1189 case_data_all (&c), column_cnt);
1196 /* Within SOURCE, which must not have a backing casereader,
1197 writes the VALUE_CNT values in VALUES_CNT to the VALUE_CNT
1198 columns starting from START_COLUMN, in every row, even in rows
1199 not yet otherwise initialized. Returns true if successful,
1200 false if an I/O error occurs.
1202 We don't support backing != NULL because (1) it's harder and
1203 (2) source_write_columns is only called by
1204 datasheet_insert_columns, which doesn't reuse columns from
1205 sources that are backed by casereaders. */
1207 source_write_columns (struct source *source, size_t start_column,
1208 const union value values[], size_t value_cnt)
1210 assert (source->backing == NULL);
1212 return sparse_cases_write_columns (source->data, start_column,
1216 /* Returns true if SOURCE has a backing casereader, false
1219 source_has_backing (const struct source *source)
1221 return source->backing != NULL;
1224 /* Datasheet model checker test driver. */
1226 /* Maximum size of datasheet supported for model checking
1231 /* Hashes the structure of datasheet DS and returns the hash.
1232 We use MD4 because it is much faster than MD5 or SHA-1 but its
1233 collision resistance is just as good. */
1235 hash_datasheet (const struct datasheet *ds)
1237 unsigned int hash[DIV_RND_UP (20, sizeof (unsigned int))];
1239 struct range_map_node *r;
1241 md4_init_ctx (&ctx);
1242 axis_hash (ds->columns, &ctx);
1243 axis_hash (ds->rows, &ctx);
1244 for (r = range_map_first (&ds->sources); r != NULL;
1245 r = range_map_next (&ds->sources, r))
1247 unsigned long int start = range_map_node_get_start (r);
1248 unsigned long int end = range_map_node_get_end (r);
1249 md4_process_bytes (&start, sizeof start, &ctx);
1250 md4_process_bytes (&end, sizeof end, &ctx);
1252 md4_process_bytes (&ds->column_min_alloc, sizeof ds->column_min_alloc,
1254 md4_finish_ctx (&ctx, hash);
1258 /* Checks that datasheet DS contains has ROW_CNT rows, COLUMN_CNT
1259 columns, and the same contents as ARRAY, reporting any
1260 mismatches via mc_error. Then, adds DS to MC as a new state. */
1262 check_datasheet (struct mc *mc, struct datasheet *ds,
1263 double array[MAX_ROWS][MAX_COLS],
1264 size_t row_cnt, size_t column_cnt)
1266 assert (row_cnt < MAX_ROWS);
1267 assert (column_cnt < MAX_COLS);
1269 /* If it is a duplicate hash, discard the state before checking
1270 its consistency, to save time. */
1271 if (mc_discard_dup_state (mc, hash_datasheet (ds)))
1273 datasheet_destroy (ds);
1277 if (row_cnt != datasheet_get_row_cnt (ds))
1278 mc_error (mc, "row count (%lu) does not match expected (%zu)",
1279 (unsigned long int) datasheet_get_row_cnt (ds), row_cnt);
1280 else if (column_cnt != datasheet_get_column_cnt (ds))
1281 mc_error (mc, "column count (%lu) does not match expected (%zu)",
1282 (unsigned long int) datasheet_get_column_cnt (ds), column_cnt);
1287 for (row = 0; row < row_cnt; row++)
1288 for (col = 0; col < column_cnt; col++)
1291 if (!datasheet_get_value (ds, row, col, &v, 1))
1293 if (v.f != array[row][col])
1294 mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: %g != %g",
1295 row, col, row_cnt, column_cnt, v.f, array[row][col]);
1299 mc_add_state (mc, ds);
1302 /* Extracts the contents of DS into DATA. */
1304 extract_data (const struct datasheet *ds, double data[MAX_ROWS][MAX_COLS])
1306 size_t column_cnt = datasheet_get_column_cnt (ds);
1307 size_t row_cnt = datasheet_get_row_cnt (ds);
1310 assert (row_cnt < MAX_ROWS);
1311 assert (column_cnt < MAX_COLS);
1312 for (row = 0; row < row_cnt; row++)
1313 for (col = 0; col < column_cnt; col++)
1316 if (!datasheet_get_value (ds, row, col, &v, 1))
1318 data[row][col] = v.f;
1322 /* Clones the structure and contents of ODS into *DS,
1323 and the contents of ODATA into DATA. */
1325 clone_model (const struct datasheet *ods, double odata[MAX_ROWS][MAX_COLS],
1326 struct datasheet **ds_, double data[MAX_ROWS][MAX_COLS])
1328 struct datasheet *ds;
1329 struct range_map_node *r;
1331 /* Clone ODS into DS. */
1332 ds = *ds_ = xmalloc (sizeof *ds);
1333 ds->columns = axis_clone (ods->columns);
1334 ds->rows = axis_clone (ods->rows);
1335 range_map_init (&ds->sources);
1336 for (r = range_map_first (&ods->sources); r != NULL;
1337 r = range_map_next (&ods->sources, r))
1339 const struct source_info *osi = source_info_from_range_map (r);
1340 struct source_info *si = xmalloc (sizeof *si);
1341 si->source = source_clone (osi->source);
1342 range_map_insert (&ds->sources, range_map_node_get_start (r),
1343 range_map_node_get_width (r), &si->column_range);
1345 ds->column_min_alloc = ods->column_min_alloc;
1346 ds->taint = taint_create ();
1348 /* Clone ODATA into DATA. */
1349 memcpy (data, odata, MAX_ROWS * MAX_COLS * sizeof **data);
1352 /* "init" function for struct mc_class. */
1354 datasheet_mc_init (struct mc *mc)
1356 struct datasheet_test_params *params = mc_get_aux (mc);
1357 struct datasheet *ds;
1359 if (params->backing_rows == 0 && params->backing_cols == 0)
1361 /* Create unbacked datasheet. */
1362 ds = datasheet_create (NULL);
1363 mc_name_operation (mc, "empty datasheet");
1364 check_datasheet (mc, ds, NULL, 0, 0);
1368 /* Create datasheet with backing. */
1369 struct casewriter *writer;
1370 struct casereader *reader;
1371 double data[MAX_ROWS][MAX_COLS];
1374 assert (params->backing_rows > 0 && params->backing_rows <= MAX_ROWS);
1375 assert (params->backing_cols > 0 && params->backing_cols <= MAX_COLS);
1377 writer = mem_writer_create (params->backing_cols);
1378 for (row = 0; row < params->backing_rows; row++)
1383 case_create (&c, params->backing_cols);
1384 for (col = 0; col < params->backing_cols; col++)
1386 double value = params->next_value++;
1387 data[row][col] = value;
1388 case_data_rw_idx (&c, col)->f = value;
1390 casewriter_write (writer, &c);
1392 reader = casewriter_make_reader (writer);
1393 assert (reader != NULL);
1395 ds = datasheet_create (reader);
1396 mc_name_operation (mc, "datasheet with (%d,%d) backing",
1397 params->backing_rows, params->backing_cols);
1398 check_datasheet (mc, ds, data,
1399 params->backing_rows, params->backing_cols);
1403 /* "mutate" function for struct mc_class. */
1405 datasheet_mc_mutate (struct mc *mc, const void *ods_)
1407 struct datasheet_test_params *params = mc_get_aux (mc);
1409 const struct datasheet *ods = ods_;
1410 double odata[MAX_ROWS][MAX_COLS];
1411 double data[MAX_ROWS][MAX_COLS];
1412 size_t column_cnt = datasheet_get_column_cnt (ods);
1413 size_t row_cnt = datasheet_get_row_cnt (ods);
1414 size_t pos, new_pos, cnt;
1416 extract_data (ods, odata);
1418 /* Insert all possible numbers of columns in all possible
1420 for (pos = 0; pos <= column_cnt; pos++)
1421 for (cnt = 0; cnt <= params->max_cols - column_cnt; cnt++)
1422 if (mc_include_state (mc))
1424 struct datasheet *ds;
1425 union value new[MAX_COLS];
1428 mc_name_operation (mc, "insert %zu columns at %zu", cnt, pos);
1429 clone_model (ods, odata, &ds, data);
1431 for (i = 0; i < cnt; i++)
1432 new[i].f = params->next_value++;
1434 if (!datasheet_insert_columns (ds, new, cnt, pos))
1435 mc_error (mc, "datasheet_insert_columns failed");
1437 for (i = 0; i < row_cnt; i++)
1439 insert_range (&data[i][0], column_cnt, sizeof data[i][0],
1441 for (j = 0; j < cnt; j++)
1442 data[i][pos + j] = new[j].f;
1445 check_datasheet (mc, ds, data, row_cnt, column_cnt + cnt);
1448 /* Delete all possible numbers of columns from all possible
1450 for (pos = 0; pos < column_cnt; pos++)
1451 for (cnt = 0; cnt < column_cnt - pos; cnt++)
1452 if (mc_include_state (mc))
1454 struct datasheet *ds;
1457 mc_name_operation (mc, "delete %zu columns at %zu", cnt, pos);
1458 clone_model (ods, odata, &ds, data);
1460 datasheet_delete_columns (ds, pos, cnt);
1462 for (i = 0; i < row_cnt; i++)
1463 remove_range (&data[i], column_cnt, sizeof *data[i], pos, cnt);
1465 check_datasheet (mc, ds, data, row_cnt, column_cnt - cnt);
1468 /* Move all possible numbers of columns from all possible
1469 existing positions to all possible new positions. */
1470 for (pos = 0; pos < column_cnt; pos++)
1471 for (cnt = 0; cnt < column_cnt - pos; cnt++)
1472 for (new_pos = 0; new_pos < column_cnt - cnt; new_pos++)
1473 if (mc_include_state (mc))
1475 struct datasheet *ds;
1478 clone_model (ods, odata, &ds, data);
1479 mc_name_operation (mc, "move %zu columns from %zu to %zu",
1482 datasheet_move_columns (ds, pos, new_pos, cnt);
1484 for (i = 0; i < row_cnt; i++)
1485 move_range (&data[i], column_cnt, sizeof data[i][0],
1488 check_datasheet (mc, ds, data, row_cnt, column_cnt);
1491 /* Insert all possible numbers of rows in all possible
1493 for (pos = 0; pos <= row_cnt; pos++)
1494 for (cnt = 0; cnt <= params->max_rows - row_cnt; cnt++)
1495 if (mc_include_state (mc))
1497 struct datasheet *ds;
1498 struct ccase c[MAX_ROWS];
1501 clone_model (ods, odata, &ds, data);
1502 mc_name_operation (mc, "insert %zu rows at %zu", cnt, pos);
1504 for (i = 0; i < cnt; i++)
1506 case_create (&c[i], column_cnt);
1507 for (j = 0; j < column_cnt; j++)
1508 case_data_rw_idx (&c[i], j)->f = params->next_value++;
1511 insert_range (data, row_cnt, sizeof data[pos], pos, cnt);
1512 for (i = 0; i < cnt; i++)
1513 for (j = 0; j < column_cnt; j++)
1514 data[i + pos][j] = case_num_idx (&c[i], j);
1516 if (!datasheet_insert_rows (ds, pos, c, cnt))
1517 mc_error (mc, "datasheet_insert_rows failed");
1519 check_datasheet (mc, ds, data, row_cnt + cnt, column_cnt);
1522 /* Delete all possible numbers of rows from all possible
1524 for (pos = 0; pos < row_cnt; pos++)
1525 for (cnt = 0; cnt < row_cnt - pos; cnt++)
1526 if (mc_include_state (mc))
1528 struct datasheet *ds;
1530 clone_model (ods, odata, &ds, data);
1531 mc_name_operation (mc, "delete %zu rows at %zu", cnt, pos);
1533 datasheet_delete_rows (ds, pos, cnt);
1535 remove_range (&data[0], row_cnt, sizeof data[0], pos, cnt);
1537 check_datasheet (mc, ds, data, row_cnt - cnt, column_cnt);
1540 /* Move all possible numbers of rows from all possible existing
1541 positions to all possible new positions. */
1542 for (pos = 0; pos < row_cnt; pos++)
1543 for (cnt = 0; cnt < row_cnt - pos; cnt++)
1544 for (new_pos = 0; new_pos < row_cnt - cnt; new_pos++)
1545 if (mc_include_state (mc))
1547 struct datasheet *ds;
1549 clone_model (ods, odata, &ds, data);
1550 mc_name_operation (mc, "move %zu rows from %zu to %zu",
1553 datasheet_move_rows (ds, pos, new_pos, cnt);
1555 move_range (&data[0], row_cnt, sizeof data[0],
1558 check_datasheet (mc, ds, data, row_cnt, column_cnt);
1562 /* "destroy" function for struct mc_class. */
1564 datasheet_mc_destroy (const struct mc *mc UNUSED, void *ds_)
1566 struct datasheet *ds = ds_;
1567 datasheet_destroy (ds);
1570 /* Executes the model checker on the datasheet test driver with
1571 the given OPTIONS and passing in the given PARAMS, which must
1572 point to a modifiable "struct datasheet_test_params". If any
1573 value in PARAMS is out of range, it will be adjusted into the
1574 valid range before running the test.
1576 Returns the results of the model checking run. */
1578 datasheet_test (struct mc_options *options, void *params_)
1580 struct datasheet_test_params *params = params_;
1581 static const struct mc_class datasheet_mc_class =
1584 datasheet_mc_mutate,
1585 datasheet_mc_destroy,
1588 params->next_value = 1;
1589 params->max_rows = MIN (params->max_rows, MAX_ROWS);
1590 params->max_cols = MIN (params->max_cols, MAX_COLS);
1591 params->backing_rows = MIN (params->backing_rows, params->max_rows);
1592 params->backing_cols = MIN (params->backing_cols, params->max_cols);
1594 mc_options_set_aux (options, params);
1595 return mc_run (&datasheet_mc_class, options);