1 /* PSPP - computes sample statistics.
2 Copyright (C) 2007 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 #include <data/datasheet.h>
26 #include <data/casereader-provider.h>
27 #include <data/casereader.h>
28 #include <data/casewriter.h>
29 #include <data/sparse-cases.h>
30 #include <libpspp/array.h>
31 #include <libpspp/assertion.h>
32 #include <libpspp/model-checker.h>
33 #include <libpspp/range-map.h>
34 #include <libpspp/range-set.h>
35 #include <libpspp/taint.h>
36 #include <libpspp/tower.h>
42 static struct axis *axis_create (void);
43 static struct axis *axis_clone (const struct axis *);
44 static void axis_destroy (struct axis *);
46 static void axis_hash (const struct axis *, struct md4_ctx *);
48 static bool axis_allocate (struct axis *, unsigned long int request,
49 unsigned long int *start,
50 unsigned long int *width);
51 static void axis_make_available (struct axis *,
52 unsigned long int start,
53 unsigned long int width);
54 static unsigned long int axis_extend (struct axis *, unsigned long int width);
56 static unsigned long int axis_map (const struct axis *, unsigned long log_pos);
58 static unsigned long axis_get_size (const struct axis *);
59 static void axis_insert (struct axis *,
60 unsigned long int log_start,
61 unsigned long int phy_start,
62 unsigned long int cnt);
63 static void axis_remove (struct axis *,
64 unsigned long int start, unsigned long int cnt);
65 static void axis_move (struct axis *,
66 unsigned long int old_start,
67 unsigned long int new_start,
68 unsigned long int cnt);
70 static struct source *source_create_empty (size_t column_cnt);
71 static struct source *source_create_casereader (struct casereader *);
72 static struct source *source_clone (const struct source *);
73 static void source_destroy (struct source *);
75 static casenumber source_get_backing_row_cnt (const struct source *);
76 static size_t source_get_column_cnt (const struct source *);
78 static bool source_read (const struct source *,
79 casenumber row, size_t column,
80 union value[], size_t value_cnt);
81 static bool source_write (struct source *,
82 casenumber row, size_t column,
83 const union value[], size_t value_cnt);
84 static bool source_write_columns (struct source *, size_t start_column,
85 const union value[], size_t value_cnt);
86 static bool source_has_backing (const struct source *);
87 static void source_increase_use (struct source *, size_t delta);
88 static void source_decrease_use (struct source *, size_t delta);
89 static bool source_in_use (const struct source *);
91 /* A datasheet is internally composed from a set of data files,
92 called "sources". The sources that make up a datasheet must
93 have the same number of rows (cases), but their numbers of
94 columns (variables) may vary.
96 A datasheet's external view is produced by mapping (permuting
97 and selecting) its internal data. Thus, we can rearrange or
98 delete rows or columns simply by modifying the mapping. We
99 add rows by adding rows to each source and to the row mapping.
100 We add columns by adding a new source, then adding that source
101 to the column mapping.
103 Each source in a datasheet can be a casereader or a
104 sparse_cases. Casereaders are read-only, so when sources made
105 from casereaders need to be modified, it is done "virtually"
106 through being overlaid by a sparse_cases. */
111 /* Mappings from logical to physical columns/rows. */
112 struct axis *columns;
115 /* Mapping from physical columns to "source_info"s. */
116 struct range_map sources;
118 /* Minimum number of columns to put in a new source when we
119 need new columns and none are free. We double it whenever
120 we add a new source to keep the number of file descriptors
121 needed by the datasheet to a minimum, reducing the
122 likelihood of running out. */
123 unsigned column_min_alloc;
125 /* Indicates corrupted data in the datasheet. */
129 /* Maps from a range of physical columns to a source. */
132 struct range_map_node column_range;
133 struct source *source;
136 /* Is this operation a read or a write? */
143 static void free_source_info (struct datasheet *, struct source_info *);
144 static struct source_info *source_info_from_range_map (
145 struct range_map_node *);
146 static bool rw_case (struct datasheet *ds, enum rw_op op,
147 casenumber lrow, size_t start_column, size_t column_cnt,
150 /* Creates and returns a new datasheet.
152 If READER is nonnull, then the datasheet initially contains
153 the contents of READER. */
155 datasheet_create (struct casereader *reader)
157 /* Create datasheet. */
158 struct datasheet *ds = xmalloc (sizeof *ds);
159 ds->columns = axis_create ();
160 ds->rows = axis_create ();
161 range_map_init (&ds->sources);
162 ds->column_min_alloc = 1;
163 ds->taint = taint_create ();
170 struct source_info *si;
172 si = xmalloc (sizeof *si);
173 si->source = source_create_casereader (reader);
174 column_cnt = source_get_column_cnt (si->source);
175 row_cnt = source_get_backing_row_cnt (si->source);
176 source_increase_use (si->source, column_cnt);
178 if ( column_cnt > 0 )
180 unsigned long int column_start;
181 column_start = axis_extend (ds->columns, column_cnt);
182 axis_insert (ds->columns, 0, column_start, column_cnt);
183 range_map_insert (&ds->sources, column_start, column_cnt,
189 unsigned long int row_start;
190 row_start = axis_extend (ds->rows, row_cnt);
191 axis_insert (ds->rows, 0, row_start, row_cnt);
198 /* Destroys datasheet DS. */
200 datasheet_destroy (struct datasheet *ds)
205 axis_destroy (ds->columns);
206 axis_destroy (ds->rows);
207 while (!range_map_is_empty (&ds->sources))
209 struct range_map_node *r = range_map_first (&ds->sources);
210 struct source_info *si = source_info_from_range_map (r);
211 free_source_info (ds, si);
213 taint_destroy (ds->taint);
217 /* Moves datasheet DS to a new location in memory, and returns
218 the new location. Afterward, the datasheet must not be
219 accessed at its former location.
221 This function is useful for ensuring that all references to a
222 datasheet have been dropped, especially in conjunction with
223 tools like Valgrind. */
225 datasheet_rename (struct datasheet *ds)
227 struct datasheet *new = xmemdup (ds, sizeof *ds);
232 /* Returns true if a I/O error has occurred while processing a
233 datasheet operation. */
235 datasheet_error (const struct datasheet *ds)
237 return taint_is_tainted (ds->taint);
241 datasheet_force_error (struct datasheet *ds)
243 taint_set_taint (ds->taint);
247 datasheet_get_taint (const struct datasheet *ds)
252 /* Returns the number of rows in DS. */
254 datasheet_get_row_cnt (const struct datasheet *ds)
256 return axis_get_size (ds->rows);
259 /* Returns the number of columns in DS. */
261 datasheet_get_column_cnt (const struct datasheet *ds)
263 return axis_get_size (ds->columns);
266 /* Inserts CNT columns into datasheet DS just before column
267 BEFORE. Initializes the contents of each row in the inserted
268 columns to the CNT values in INIT_VALUES.
270 Returns true if successful, false on failure. In case of
271 failure, the datasheet is unchanged. */
273 datasheet_insert_columns (struct datasheet *ds,
274 const union value init_values[], size_t cnt,
280 unsigned long first_phy; /* First allocated physical column. */
281 unsigned long phy_cnt; /* Number of allocated physical columns. */
283 /* Allocate physical columns from the pool of available
285 if (!axis_allocate (ds->columns, cnt, &first_phy, &phy_cnt))
287 /* No columns were available. Create a new source and
288 extend the axis to make some new ones available. */
289 struct source_info *si;
291 phy_cnt = MAX (cnt, ds->column_min_alloc);
292 first_phy = axis_extend (ds->columns, phy_cnt);
293 ds->column_min_alloc = MIN (65536, ds->column_min_alloc * 2);
295 si = xmalloc (sizeof *si);
296 si->source = source_create_empty (phy_cnt);
297 range_map_insert (&ds->sources, first_phy, phy_cnt,
301 axis_make_available (ds->columns, first_phy + cnt,
307 /* Initialize the columns and insert them into the columns
311 struct range_map_node *r; /* Range map holding FIRST_PHY column. */
312 struct source_info *s; /* Source containing FIRST_PHY column. */
313 size_t source_avail; /* Number of phys columns available. */
314 size_t source_cnt; /* Number of phys columns to use. */
316 /* Figure out how many columns we can and want to take
317 starting at FIRST_PHY, and then insert them into the
319 r = range_map_lookup (&ds->sources, first_phy);
320 s = source_info_from_range_map (r);
321 source_avail = range_map_node_get_end (r) - first_phy;
322 source_cnt = MIN (phy_cnt, source_avail);
323 axis_insert (ds->columns, before, first_phy, source_cnt);
325 /* Initialize the data for those columns in the
327 if (!source_write_columns (s->source,
328 first_phy - range_map_node_get_start (r),
329 init_values, source_cnt))
331 datasheet_delete_columns (ds, before - added,
333 taint_set_taint (ds->taint);
336 source_increase_use (s->source, source_cnt);
339 phy_cnt -= source_cnt;
340 first_phy += source_cnt;
341 init_values += source_cnt;
343 before += source_cnt;
350 /* Deletes the CNT columns in DS starting from column START. */
352 datasheet_delete_columns (struct datasheet *ds, size_t start, size_t cnt)
356 /* Free up columns for reuse. */
357 for (lcol = start; lcol < start + cnt; lcol++)
359 size_t pcol = axis_map (ds->columns, lcol);
360 struct range_map_node *r = range_map_lookup (&ds->sources, pcol);
361 struct source_info *si = source_info_from_range_map (r);
363 source_decrease_use (si->source, 1);
364 if (source_has_backing (si->source))
366 if (!source_in_use (si->source))
367 free_source_info (ds, si);
370 axis_make_available (ds->columns, pcol, 1);
373 /* Remove columns from logical-to-physical mapping. */
374 axis_remove (ds->columns, start, cnt);
377 /* Moves the CNT columns in DS starting at position OLD_START so
378 that they then start at position NEW_START. Equivalent to
379 deleting the column rows, then inserting them at what becomes
380 position NEW_START after the deletion.*/
382 datasheet_move_columns (struct datasheet *ds,
383 size_t old_start, size_t new_start,
386 axis_move (ds->columns, old_start, new_start, cnt);
389 /* Retrieves the contents of the given ROW in datasheet DS into
390 newly created case C. Returns true if successful, false on
393 datasheet_get_row (const struct datasheet *ds, casenumber row, struct ccase *c)
395 size_t column_cnt = datasheet_get_column_cnt (ds);
396 case_create (c, column_cnt);
397 if (rw_case ((struct datasheet *) ds, OP_READ,
398 row, 0, column_cnt, case_data_all_rw (c)))
407 /* Stores the contents of case C, which is destroyed, into the
408 given ROW in DS. Returns true on success, false on I/O error.
409 On failure, the given ROW might be partially modified or
412 datasheet_put_row (struct datasheet *ds, casenumber row, struct ccase *c)
414 size_t column_cnt = datasheet_get_column_cnt (ds);
415 bool ok = rw_case (ds, OP_WRITE, row, 0, column_cnt,
416 (union value *) case_data_all (c));
421 /* Stores the values of the WIDTH columns in DS in the given ROW
422 starting at COLUMN in DS into VALUES. Returns true if
423 successful, false on I/O error. */
425 datasheet_get_value (const struct datasheet *ds, casenumber row, size_t column,
426 union value *value, int width)
428 return rw_case ((struct datasheet *) ds,
429 OP_READ, row, column, value_cnt_from_width (width), value);
432 /* Stores the WIDTH given VALUES into the given ROW in DS
433 starting at COLUMN. Returns true if successful, false on I/O
434 error. On failure, the given ROW might be partially modified
437 datasheet_put_value (struct datasheet *ds, casenumber row, size_t column,
438 const union value *value, int width)
440 return rw_case (ds, OP_WRITE, row, column, value_cnt_from_width (width),
441 (union value *) value);
444 /* Inserts the CNT cases at C, which are destroyed, into
445 datasheet DS just before row BEFORE. Returns true if
446 successful, false on I/O error. On failure, datasheet DS is
449 datasheet_insert_rows (struct datasheet *ds,
450 casenumber before, struct ccase c[],
453 casenumber added = 0;
456 unsigned long first_phy;
457 unsigned long phy_cnt;
460 /* Allocate physical rows from the pool of available
462 if (!axis_allocate (ds->rows, cnt, &first_phy, &phy_cnt))
464 /* No rows were available. Extend the row axis to make
465 some new ones available. */
467 first_phy = axis_extend (ds->rows, cnt);
470 /* Insert the new rows into the row mapping. */
471 axis_insert (ds->rows, before, first_phy, phy_cnt);
473 /* Initialize the new rows. */
474 for (i = 0; i < phy_cnt; i++)
475 if (!datasheet_put_row (ds, before + i, &c[i]))
478 case_destroy (&c[i]);
479 datasheet_delete_rows (ds, before - added, phy_cnt + added);
492 /* Deletes the CNT rows in DS starting from row FIRST. */
494 datasheet_delete_rows (struct datasheet *ds,
495 casenumber first, casenumber cnt)
499 /* Free up rows for reuse.
501 for (lrow = first; lrow < first + cnt; lrow++)
502 axis_make_available (ds->rows, axis_map (ds->rows, lrow), 1);
504 /* Remove rows from logical-to-physical mapping. */
505 axis_remove (ds->rows, first, cnt);
508 /* Moves the CNT rows in DS starting at position OLD_START so
509 that they then start at position NEW_START. Equivalent to
510 deleting the given rows, then inserting them at what becomes
511 position NEW_START after the deletion. */
513 datasheet_move_rows (struct datasheet *ds,
514 size_t old_start, size_t new_start,
517 axis_move (ds->rows, old_start, new_start, cnt);
520 static const struct casereader_random_class datasheet_reader_class;
522 /* Creates and returns a casereader whose input cases are the
523 rows in datasheet DS. From the caller's perspective, DS is
524 effectively destroyed by this operation, such that the caller
525 must not reference it again. */
527 datasheet_make_reader (struct datasheet *ds)
529 struct casereader *reader;
530 ds = datasheet_rename (ds);
531 reader = casereader_create_random (datasheet_get_column_cnt (ds),
532 datasheet_get_row_cnt (ds),
533 &datasheet_reader_class, ds);
534 taint_propagate (datasheet_get_taint (ds), casereader_get_taint (reader));
539 datasheet_reader_read (struct casereader *reader UNUSED, void *ds_,
540 casenumber case_idx, struct ccase *c)
542 struct datasheet *ds = ds_;
543 if (case_idx >= datasheet_get_row_cnt (ds))
545 else if (datasheet_get_row (ds, case_idx, c))
549 taint_set_taint (ds->taint);
555 datasheet_reader_destroy (struct casereader *reader UNUSED, void *ds_)
557 struct datasheet *ds = ds_;
558 datasheet_destroy (ds);
562 datasheet_reader_advance (struct casereader *reader UNUSED, void *ds_,
565 struct datasheet *ds = ds_;
566 datasheet_delete_rows (ds, 0, case_cnt);
569 static const struct casereader_random_class datasheet_reader_class =
571 datasheet_reader_read,
572 datasheet_reader_destroy,
573 datasheet_reader_advance,
576 /* Removes SI from DS's set of sources and destroys its
579 free_source_info (struct datasheet *ds, struct source_info *si)
581 range_map_delete (&ds->sources, &si->column_range);
582 source_destroy (si->source);
586 static struct source_info *
587 source_info_from_range_map (struct range_map_node *node)
589 return range_map_data (node, struct source_info, column_range);
592 /* Reads (if OP is OP_READ) or writes (if op is OP_WRITE) the
593 COLUMN_CNT columns starting from column START_COLUMN in row
594 LROW to/from the COLUMN_CNT values in DATA. */
596 rw_case (struct datasheet *ds, enum rw_op op,
597 casenumber lrow, size_t start_column, size_t column_cnt,
603 assert (lrow < datasheet_get_row_cnt (ds));
604 assert (column_cnt <= datasheet_get_column_cnt (ds));
605 assert (start_column + column_cnt <= datasheet_get_column_cnt (ds));
607 prow = axis_map (ds->rows, lrow);
608 for (lcol = start_column; lcol < start_column + column_cnt; lcol++, data++)
610 size_t pcol = axis_map (ds->columns, lcol);
611 struct range_map_node *r = range_map_lookup (&ds->sources, pcol);
612 struct source_info *s = source_info_from_range_map (r);
613 size_t pcol_ofs = pcol - range_map_node_get_start (r);
615 ? source_read (s->source, prow, pcol_ofs, data, 1)
616 : source_write (s->source, prow, pcol_ofs, data, 1)))
618 taint_set_taint (ds->taint);
627 An axis has two functions. First, it maintains a mapping from
628 logical (client-visible) to physical (storage) ordinates. The
629 axis_map and axis_get_size functions inspect this mapping, and
630 the axis_insert, axis_remove, and axis_move functions modify
631 it. Second, it tracks the set of ordinates that are unused
632 and available for reuse. (Not all unused ordinates are
633 available for reuse: in particular, unused columns that are
634 backed by a casereader are never reused.) The axis_allocate,
635 axis_make_available, and axis_extend functions affect the set
636 of available ordinates. */
639 struct tower log_to_phy; /* Map from logical to physical ordinates;
640 contains "struct axis_group"s. */
641 struct range_set *available; /* Set of unused, available ordinates. */
642 unsigned long int phy_size; /* Current physical length of axis. */
645 /* A mapping from logical to physical ordinates. */
648 struct tower_node logical; /* Range of logical ordinates. */
649 unsigned long phy_start; /* First corresponding physical ordinate. */
652 static struct axis_group *axis_group_from_tower_node (struct tower_node *);
653 static struct tower_node *make_axis_group (unsigned long int phy_start);
654 static struct tower_node *split_axis (struct axis *, unsigned long int where);
655 static void merge_axis_nodes (struct axis *, struct tower_node *,
656 struct tower_node **other_node);
657 static void check_axis_merged (const struct axis *axis UNUSED);
659 /* Creates and returns a new, initially empty axis. */
663 struct axis *axis = xmalloc (sizeof *axis);
664 tower_init (&axis->log_to_phy);
665 axis->available = range_set_create ();
670 /* Returns a clone of existing axis OLD.
672 Currently this is used only by the datasheet model checker
673 driver, but it could be otherwise useful. */
675 axis_clone (const struct axis *old)
677 const struct tower_node *node;
680 new = xmalloc (sizeof *new);
681 tower_init (&new->log_to_phy);
682 new->available = range_set_clone (old->available, NULL);
683 new->phy_size = old->phy_size;
685 for (node = tower_first (&old->log_to_phy); node != NULL;
686 node = tower_next (&old->log_to_phy, node))
688 unsigned long int size = tower_node_get_height (node);
689 struct axis_group *group = tower_data (node, struct axis_group, logical);
690 tower_insert (&new->log_to_phy, size, make_axis_group (group->phy_start),
697 /* Adds the state of AXIS to the MD4 hash context CTX.
699 This is only used by the datasheet model checker driver. It
700 is unlikely to be otherwise useful. */
702 axis_hash (const struct axis *axis, struct md4_ctx *ctx)
704 const struct tower_node *tn;
705 const struct range_set_node *rsn;
707 for (tn = tower_first (&axis->log_to_phy); tn != NULL;
708 tn = tower_next (&axis->log_to_phy, tn))
710 struct axis_group *group = tower_data (tn, struct axis_group, logical);
711 unsigned long int phy_start = group->phy_start;
712 unsigned long int size = tower_node_get_height (tn);
714 md4_process_bytes (&phy_start, sizeof phy_start, ctx);
715 md4_process_bytes (&size, sizeof size, ctx);
718 for (rsn = range_set_first (axis->available); rsn != NULL;
719 rsn = range_set_next (axis->available, rsn))
721 unsigned long int start = range_set_node_get_start (rsn);
722 unsigned long int end = range_set_node_get_end (rsn);
724 md4_process_bytes (&start, sizeof start, ctx);
725 md4_process_bytes (&end, sizeof end, ctx);
728 md4_process_bytes (&axis->phy_size, sizeof axis->phy_size, ctx);
733 axis_destroy (struct axis *axis)
738 while (!tower_is_empty (&axis->log_to_phy))
740 struct tower_node *node = tower_first (&axis->log_to_phy);
741 struct axis_group *group = tower_data (node, struct axis_group,
743 tower_delete (&axis->log_to_phy, node);
747 range_set_destroy (axis->available);
751 /* Allocates up to REQUEST contiguous unused and available
752 ordinates from AXIS. If successful, stores the number
753 obtained into *WIDTH and the ordinate of the first into
754 *START, marks the ordinates as now unavailable return true.
755 On failure, which occurs only if AXIS has no unused and
756 available ordinates, returns false without modifying AXIS. */
758 axis_allocate (struct axis *axis, unsigned long int request,
759 unsigned long int *start, unsigned long int *width)
761 return range_set_allocate (axis->available, request, start, width);
764 /* Marks the WIDTH contiguous ordinates in AXIS, starting from
765 START, as unused and available. */
767 axis_make_available (struct axis *axis,
768 unsigned long int start, unsigned long int width)
770 range_set_insert (axis->available, start, width);
773 /* Extends the total physical length of AXIS by WIDTH and returns
774 the first ordinate in the new physical region. */
775 static unsigned long int
776 axis_extend (struct axis *axis, unsigned long int width)
778 unsigned long int start = axis->phy_size;
779 axis->phy_size += width;
783 /* Returns the physical ordinate in AXIS corresponding to logical
784 ordinate LOG_POS. LOG_POS must be less than the logical
786 static unsigned long int
787 axis_map (const struct axis *axis, unsigned long log_pos)
789 struct tower_node *node;
790 struct axis_group *group;
791 unsigned long int group_start;
793 node = tower_lookup (&axis->log_to_phy, log_pos, &group_start);
794 group = tower_data (node, struct axis_group, logical);
795 return group->phy_start + (log_pos - group_start);
798 /* Returns the logical length of AXIS. */
800 axis_get_size (const struct axis *axis)
802 return tower_height (&axis->log_to_phy);
805 /* Inserts the CNT contiguous physical ordinates starting at
806 PHY_START into AXIS's logical-to-physical mapping, starting at
807 logical position LOG_START. */
809 axis_insert (struct axis *axis,
810 unsigned long int log_start, unsigned long int phy_start,
811 unsigned long int cnt)
813 struct tower_node *before = split_axis (axis, log_start);
814 struct tower_node *new = make_axis_group (phy_start);
815 tower_insert (&axis->log_to_phy, cnt, new, before);
816 merge_axis_nodes (axis, new, NULL);
817 check_axis_merged (axis);
820 /* Removes CNT ordinates from AXIS's logical-to-physical mapping
821 starting at logical position START. */
823 axis_remove (struct axis *axis,
824 unsigned long int start, unsigned long int cnt)
828 struct tower_node *last = split_axis (axis, start + cnt);
829 struct tower_node *cur, *next;
830 for (cur = split_axis (axis, start); cur != last; cur = next)
832 next = tower_delete (&axis->log_to_phy, cur);
833 free (axis_group_from_tower_node (cur));
835 merge_axis_nodes (axis, last, NULL);
836 check_axis_merged (axis);
840 /* Moves the CNT ordinates in AXIS's logical-to-mapping starting
841 at logical position OLD_START so that they then start at
842 position NEW_START. */
844 axis_move (struct axis *axis,
845 unsigned long int old_start, unsigned long int new_start,
846 unsigned long int cnt)
848 if (cnt > 0 && old_start != new_start)
850 struct tower_node *old_first, *old_last, *new_first;
851 struct tower_node *merge1, *merge2;
852 struct tower tmp_array;
854 /* Move ordinates OLD_START...(OLD_START + CNT) into new,
855 separate TMP_ARRAY. */
856 old_first = split_axis (axis, old_start);
857 old_last = split_axis (axis, old_start + cnt);
858 tower_init (&tmp_array);
859 tower_splice (&tmp_array, NULL,
860 &axis->log_to_phy, old_first, old_last);
861 merge_axis_nodes (axis, old_last, NULL);
862 check_axis_merged (axis);
864 /* Move TMP_ARRAY to position NEW_START. */
865 new_first = split_axis (axis, new_start);
866 merge1 = tower_first (&tmp_array);
867 merge2 = tower_last (&tmp_array);
868 if (merge2 == merge1)
870 tower_splice (&axis->log_to_phy, new_first, &tmp_array, old_first, NULL);
871 merge_axis_nodes (axis, merge1, &merge2);
872 merge_axis_nodes (axis, merge2, NULL);
873 check_axis_merged (axis);
877 /* Returns the axis_group in which NODE is embedded. */
878 static struct axis_group *
879 axis_group_from_tower_node (struct tower_node *node)
881 return tower_data (node, struct axis_group, logical);
884 /* Creates and returns a new axis_group at physical position
886 static struct tower_node *
887 make_axis_group (unsigned long phy_start)
889 struct axis_group *group = xmalloc (sizeof *group);
890 group->phy_start = phy_start;
891 return &group->logical;
894 /* Returns the tower_node in AXIS's logical-to-physical map whose
895 bottom edge is at exact level WHERE. If there is no such
896 tower_node in AXIS's logical-to-physical map, then split_axis
897 creates one by breaking an existing tower_node into two
898 separate ones, unless WHERE is equal to the tower height, in
899 which case it simply returns a null pointer. */
900 static struct tower_node *
901 split_axis (struct axis *axis, unsigned long int where)
903 unsigned long int group_start;
904 struct tower_node *group_node;
905 struct axis_group *group;
907 assert (where <= tower_height (&axis->log_to_phy));
908 if (where >= tower_height (&axis->log_to_phy))
911 group_node = tower_lookup (&axis->log_to_phy, where, &group_start);
912 group = axis_group_from_tower_node (group_node);
913 if (where > group_start)
915 unsigned long int size_1 = where - group_start;
916 unsigned long int size_2 = tower_node_get_height (group_node) - size_1;
917 struct tower_node *next = tower_next (&axis->log_to_phy, group_node);
918 struct tower_node *new = make_axis_group (group->phy_start + size_1);
919 tower_resize (&axis->log_to_phy, group_node, size_1);
920 tower_insert (&axis->log_to_phy, size_2, new, next);
924 return &group->logical;
927 /* Within AXIS, attempts to merge NODE (or the last node in AXIS,
928 if NODE is null) with its neighbor nodes. This is possible
929 when logically adjacent nodes are also adjacent physically (in
932 When a merge occurs, and OTHER_NODE is non-null and points to
933 the node to be deleted, this function also updates
934 *OTHER_NODE, if necessary, to ensure that it remains a valid
937 merge_axis_nodes (struct axis *axis, struct tower_node *node,
938 struct tower_node **other_node)
940 struct tower *t = &axis->log_to_phy;
941 struct axis_group *group;
942 struct tower_node *next, *prev;
944 /* Find node to potentially merge with neighbors. */
946 node = tower_last (t);
949 group = axis_group_from_tower_node (node);
951 /* Try to merge NODE with successor. */
952 next = tower_next (t, node);
955 struct axis_group *next_group = axis_group_from_tower_node (next);
956 unsigned long this_height = tower_node_get_height (node);
958 if (group->phy_start + this_height == next_group->phy_start)
960 unsigned long next_height = tower_node_get_height (next);
961 tower_resize (t, node, this_height + next_height);
962 if (other_node != NULL && *other_node == next)
963 *other_node = tower_next (t, *other_node);
964 tower_delete (t, next);
969 /* Try to merge NODE with predecessor. */
970 prev = tower_prev (t, node);
973 struct axis_group *prev_group = axis_group_from_tower_node (prev);
974 unsigned long prev_height = tower_node_get_height (prev);
976 if (prev_group->phy_start + prev_height == group->phy_start)
978 unsigned long this_height = tower_node_get_height (node);
979 group->phy_start = prev_group->phy_start;
980 tower_resize (t, node, this_height + prev_height);
981 if (other_node != NULL && *other_node == prev)
982 *other_node = tower_next (t, *other_node);
983 tower_delete (t, prev);
989 /* Verify that all potentially merge-able nodes in AXIS are
992 check_axis_merged (const struct axis *axis UNUSED)
994 #if ASSERT_LEVEL >= 10
995 struct tower_node *prev, *node;
997 for (prev = NULL, node = tower_first (&axis->log_to_phy); node != NULL;
998 prev = node, node = tower_next (&axis->log_to_phy, node))
1001 struct axis_group *prev_group = axis_group_from_tower_node (prev);
1002 unsigned long prev_height = tower_node_get_height (prev);
1003 struct axis_group *node_group = axis_group_from_tower_node (node);
1004 assert (prev_group->phy_start + prev_height != node_group->phy_start);
1012 size_t columns_used; /* Number of columns in use by client. */
1013 struct sparse_cases *data; /* Data at top level, atop the backing. */
1014 struct casereader *backing; /* Backing casereader (or null). */
1015 casenumber backing_rows; /* Number of rows in backing (if nonnull). */
1018 /* Creates and returns an empty, unbacked source with COLUMN_CNT
1019 columns and an initial "columns_used" of 0. */
1020 static struct source *
1021 source_create_empty (size_t column_cnt)
1023 struct source *source = xmalloc (sizeof *source);
1024 source->columns_used = 0;
1025 source->data = sparse_cases_create (column_cnt);
1026 source->backing = NULL;
1027 source->backing_rows = 0;
1031 /* Creates and returns a new source backed by READER and with the
1032 same initial dimensions and content. */
1033 static struct source *
1034 source_create_casereader (struct casereader *reader)
1036 struct source *source
1037 = source_create_empty (casereader_get_value_cnt (reader));
1038 source->backing = reader;
1039 source->backing_rows = casereader_count_cases (reader);
1043 /* Returns a clone of source OLD with the same data and backing
1046 Currently this is used only by the datasheet model checker
1047 driver, but it could be otherwise useful. */
1048 static struct source *
1049 source_clone (const struct source *old)
1051 struct source *new = xmalloc (sizeof *new);
1052 new->columns_used = old->columns_used;
1053 new->data = sparse_cases_clone (old->data);
1054 new->backing = old->backing != NULL ? casereader_clone (old->backing) : NULL;
1055 new->backing_rows = old->backing_rows;
1056 if (new->data == NULL)
1058 source_destroy (new);
1064 /* Increases the columns_used count of SOURCE by DELTA.
1065 The new value must not exceed SOURCE's number of columns. */
1067 source_increase_use (struct source *source, size_t delta)
1069 source->columns_used += delta;
1070 assert (source->columns_used <= sparse_cases_get_value_cnt (source->data));
1073 /* Decreases the columns_used count of SOURCE by DELTA.
1074 This must not attempt to decrease the columns_used count below
1077 source_decrease_use (struct source *source, size_t delta)
1079 assert (delta <= source->columns_used);
1080 source->columns_used -= delta;
1083 /* Returns true if SOURCE has any columns in use,
1086 source_in_use (const struct source *source)
1088 return source->columns_used > 0;
1091 /* Destroys SOURCE and its data and backing, if any. */
1093 source_destroy (struct source *source)
1097 sparse_cases_destroy (source->data);
1098 casereader_destroy (source->backing);
1103 /* Returns the number of rows in SOURCE's backing casereader
1104 (SOURCE must have a backing casereader). */
1106 source_get_backing_row_cnt (const struct source *source)
1108 assert (source_has_backing (source));
1109 return source->backing_rows;
1112 /* Returns the number of columns in SOURCE. */
1114 source_get_column_cnt (const struct source *source)
1116 return sparse_cases_get_value_cnt (source->data);
1119 /* Reads VALUE_CNT columns from SOURCE in the given ROW, starting
1120 from COLUMN, into VALUES. Returns true if successful, false
1123 source_read (const struct source *source,
1124 casenumber row, size_t column,
1125 union value values[], size_t value_cnt)
1127 if (source->backing == NULL || sparse_cases_contains_row (source->data, row))
1128 return sparse_cases_read (source->data, row, column, values, value_cnt);
1134 assert (source->backing != NULL);
1135 ok = casereader_peek (source->backing, row, &c);
1138 case_copy_out (&c, column, values, value_cnt);
1145 /* Writes the VALUE_CNT values in VALUES to SOURCE in the given
1146 ROW, starting at ROW. Returns true if successful, false on
1147 I/O error. On error, the row's data may be completely or
1148 partially corrupted, both inside and outside the region to be
1151 source_write (struct source *source,
1152 casenumber row, size_t column,
1153 const union value values[], size_t value_cnt)
1155 size_t column_cnt = sparse_cases_get_value_cnt (source->data);
1158 if (source->backing == NULL
1159 || (column == 0 && value_cnt == column_cnt)
1160 || sparse_cases_contains_row (source->data, row))
1161 ok = sparse_cases_write (source->data, row, column, values, value_cnt);
1165 if (row < source->backing_rows)
1166 ok = casereader_peek (source->backing, row, &c);
1169 /* It's not one of the backed rows. Ideally, this
1170 should never happen: we'd always be writing the full
1171 contents of new, unbacked rows in a single call to
1172 this function, so that the first case above would
1173 trigger. But that's a little difficult at higher
1174 levels, so that we in fact usually write the full
1175 contents of new, unbacked rows in multiple calls to
1176 this function. Make this work. */
1177 case_create (&c, column_cnt);
1182 case_copy_in (&c, column, values, value_cnt);
1183 ok = sparse_cases_write (source->data, row, 0,
1184 case_data_all (&c), column_cnt);
1191 /* Within SOURCE, which must not have a backing casereader,
1192 writes the VALUE_CNT values in VALUES_CNT to the VALUE_CNT
1193 columns starting from START_COLUMN, in every row, even in rows
1194 not yet otherwise initialized. Returns true if successful,
1195 false if an I/O error occurs.
1197 We don't support backing != NULL because (1) it's harder and
1198 (2) source_write_columns is only called by
1199 datasheet_insert_columns, which doesn't reuse columns from
1200 sources that are backed by casereaders. */
1202 source_write_columns (struct source *source, size_t start_column,
1203 const union value values[], size_t value_cnt)
1205 assert (source->backing == NULL);
1207 return sparse_cases_write_columns (source->data, start_column,
1211 /* Returns true if SOURCE has a backing casereader, false
1214 source_has_backing (const struct source *source)
1216 return source->backing != NULL;
1219 /* Datasheet model checker test driver. */
1221 /* Maximum size of datasheet supported for model checking
1226 /* Hashes the structure of datasheet DS and returns the hash.
1227 We use MD4 because it is much faster than MD5 or SHA-1 but its
1228 collision resistance is just as good. */
1230 hash_datasheet (const struct datasheet *ds)
1232 unsigned int hash[DIV_RND_UP (20, sizeof (unsigned int))];
1234 struct range_map_node *r;
1236 md4_init_ctx (&ctx);
1237 axis_hash (ds->columns, &ctx);
1238 axis_hash (ds->rows, &ctx);
1239 for (r = range_map_first (&ds->sources); r != NULL;
1240 r = range_map_next (&ds->sources, r))
1242 unsigned long int start = range_map_node_get_start (r);
1243 unsigned long int end = range_map_node_get_end (r);
1244 md4_process_bytes (&start, sizeof start, &ctx);
1245 md4_process_bytes (&end, sizeof end, &ctx);
1247 md4_process_bytes (&ds->column_min_alloc, sizeof ds->column_min_alloc,
1249 md4_finish_ctx (&ctx, hash);
1253 /* Checks that datasheet DS contains has ROW_CNT rows, COLUMN_CNT
1254 columns, and the same contents as ARRAY, reporting any
1255 mismatches via mc_error. Then, adds DS to MC as a new state. */
1257 check_datasheet (struct mc *mc, struct datasheet *ds,
1258 double array[MAX_ROWS][MAX_COLS],
1259 size_t row_cnt, size_t column_cnt)
1261 assert (row_cnt < MAX_ROWS);
1262 assert (column_cnt < MAX_COLS);
1264 /* If it is a duplicate hash, discard the state before checking
1265 its consistency, to save time. */
1266 if (mc_discard_dup_state (mc, hash_datasheet (ds)))
1268 datasheet_destroy (ds);
1272 if (row_cnt != datasheet_get_row_cnt (ds))
1273 mc_error (mc, "row count (%lu) does not match expected (%zu)",
1274 (unsigned long int) datasheet_get_row_cnt (ds), row_cnt);
1275 else if (column_cnt != datasheet_get_column_cnt (ds))
1276 mc_error (mc, "column count (%lu) does not match expected (%zu)",
1277 (unsigned long int) datasheet_get_column_cnt (ds), column_cnt);
1282 for (row = 0; row < row_cnt; row++)
1283 for (col = 0; col < column_cnt; col++)
1286 if (!datasheet_get_value (ds, row, col, &v, 1))
1288 if (v.f != array[row][col])
1289 mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: %g != %g",
1290 row, col, row_cnt, column_cnt, v.f, array[row][col]);
1294 mc_add_state (mc, ds);
1297 /* Extracts the contents of DS into DATA. */
1299 extract_data (const struct datasheet *ds, double data[MAX_ROWS][MAX_COLS])
1301 size_t column_cnt = datasheet_get_column_cnt (ds);
1302 size_t row_cnt = datasheet_get_row_cnt (ds);
1305 assert (row_cnt < MAX_ROWS);
1306 assert (column_cnt < MAX_COLS);
1307 for (row = 0; row < row_cnt; row++)
1308 for (col = 0; col < column_cnt; col++)
1311 if (!datasheet_get_value (ds, row, col, &v, 1))
1313 data[row][col] = v.f;
1317 /* Clones the structure and contents of ODS into *DS,
1318 and the contents of ODATA into DATA. */
1320 clone_model (const struct datasheet *ods, double odata[MAX_ROWS][MAX_COLS],
1321 struct datasheet **ds_, double data[MAX_ROWS][MAX_COLS])
1323 struct datasheet *ds;
1324 struct range_map_node *r;
1326 /* Clone ODS into DS. */
1327 ds = *ds_ = xmalloc (sizeof *ds);
1328 ds->columns = axis_clone (ods->columns);
1329 ds->rows = axis_clone (ods->rows);
1330 range_map_init (&ds->sources);
1331 for (r = range_map_first (&ods->sources); r != NULL;
1332 r = range_map_next (&ods->sources, r))
1334 const struct source_info *osi = source_info_from_range_map (r);
1335 struct source_info *si = xmalloc (sizeof *si);
1336 si->source = source_clone (osi->source);
1337 range_map_insert (&ds->sources, range_map_node_get_start (r),
1338 range_map_node_get_width (r), &si->column_range);
1340 ds->column_min_alloc = ods->column_min_alloc;
1341 ds->taint = taint_create ();
1343 /* Clone ODATA into DATA. */
1344 memcpy (data, odata, MAX_ROWS * MAX_COLS * sizeof **data);
1347 /* "init" function for struct mc_class. */
1349 datasheet_mc_init (struct mc *mc)
1351 struct datasheet_test_params *params = mc_get_aux (mc);
1352 struct datasheet *ds;
1354 if (params->backing_rows == 0 && params->backing_cols == 0)
1356 /* Create unbacked datasheet. */
1357 ds = datasheet_create (NULL);
1358 mc_name_operation (mc, "empty datasheet");
1359 check_datasheet (mc, ds, NULL, 0, 0);
1363 /* Create datasheet with backing. */
1364 struct casewriter *writer;
1365 struct casereader *reader;
1366 double data[MAX_ROWS][MAX_COLS];
1369 assert (params->backing_rows > 0 && params->backing_rows <= MAX_ROWS);
1370 assert (params->backing_cols > 0 && params->backing_cols <= MAX_COLS);
1372 writer = mem_writer_create (params->backing_cols);
1373 for (row = 0; row < params->backing_rows; row++)
1378 case_create (&c, params->backing_cols);
1379 for (col = 0; col < params->backing_cols; col++)
1381 double value = params->next_value++;
1382 data[row][col] = value;
1383 case_data_rw_idx (&c, col)->f = value;
1385 casewriter_write (writer, &c);
1387 reader = casewriter_make_reader (writer);
1388 assert (reader != NULL);
1390 ds = datasheet_create (reader);
1391 mc_name_operation (mc, "datasheet with (%d,%d) backing",
1392 params->backing_rows, params->backing_cols);
1393 check_datasheet (mc, ds, data,
1394 params->backing_rows, params->backing_cols);
1398 /* "mutate" function for struct mc_class. */
1400 datasheet_mc_mutate (struct mc *mc, const void *ods_)
1402 struct datasheet_test_params *params = mc_get_aux (mc);
1404 const struct datasheet *ods = ods_;
1405 double odata[MAX_ROWS][MAX_COLS];
1406 double data[MAX_ROWS][MAX_COLS];
1407 size_t column_cnt = datasheet_get_column_cnt (ods);
1408 size_t row_cnt = datasheet_get_row_cnt (ods);
1409 size_t pos, new_pos, cnt;
1411 extract_data (ods, odata);
1413 /* Insert all possible numbers of columns in all possible
1415 for (pos = 0; pos <= column_cnt; pos++)
1416 for (cnt = 0; cnt <= params->max_cols - column_cnt; cnt++)
1417 if (mc_include_state (mc))
1419 struct datasheet *ds;
1420 union value new[MAX_COLS];
1423 mc_name_operation (mc, "insert %zu columns at %zu", cnt, pos);
1424 clone_model (ods, odata, &ds, data);
1426 for (i = 0; i < cnt; i++)
1427 new[i].f = params->next_value++;
1429 if (!datasheet_insert_columns (ds, new, cnt, pos))
1430 mc_error (mc, "datasheet_insert_columns failed");
1432 for (i = 0; i < row_cnt; i++)
1434 insert_range (&data[i][0], column_cnt, sizeof data[i][0],
1436 for (j = 0; j < cnt; j++)
1437 data[i][pos + j] = new[j].f;
1440 check_datasheet (mc, ds, data, row_cnt, column_cnt + cnt);
1443 /* Delete all possible numbers of columns from all possible
1445 for (pos = 0; pos < column_cnt; pos++)
1446 for (cnt = 0; cnt < column_cnt - pos; cnt++)
1447 if (mc_include_state (mc))
1449 struct datasheet *ds;
1452 mc_name_operation (mc, "delete %zu columns at %zu", cnt, pos);
1453 clone_model (ods, odata, &ds, data);
1455 datasheet_delete_columns (ds, pos, cnt);
1457 for (i = 0; i < row_cnt; i++)
1458 remove_range (&data[i], column_cnt, sizeof *data[i], pos, cnt);
1460 check_datasheet (mc, ds, data, row_cnt, column_cnt - cnt);
1463 /* Move all possible numbers of columns from all possible
1464 existing positions to all possible new positions. */
1465 for (pos = 0; pos < column_cnt; pos++)
1466 for (cnt = 0; cnt < column_cnt - pos; cnt++)
1467 for (new_pos = 0; new_pos < column_cnt - cnt; new_pos++)
1468 if (mc_include_state (mc))
1470 struct datasheet *ds;
1473 clone_model (ods, odata, &ds, data);
1474 mc_name_operation (mc, "move %zu columns from %zu to %zu",
1477 datasheet_move_columns (ds, pos, new_pos, cnt);
1479 for (i = 0; i < row_cnt; i++)
1480 move_range (&data[i], column_cnt, sizeof data[i][0],
1483 check_datasheet (mc, ds, data, row_cnt, column_cnt);
1486 /* Insert all possible numbers of rows in all possible
1488 for (pos = 0; pos <= row_cnt; pos++)
1489 for (cnt = 0; cnt <= params->max_rows - row_cnt; cnt++)
1490 if (mc_include_state (mc))
1492 struct datasheet *ds;
1493 struct ccase c[MAX_ROWS];
1496 clone_model (ods, odata, &ds, data);
1497 mc_name_operation (mc, "insert %zu rows at %zu", cnt, pos);
1499 for (i = 0; i < cnt; i++)
1501 case_create (&c[i], column_cnt);
1502 for (j = 0; j < column_cnt; j++)
1503 case_data_rw_idx (&c[i], j)->f = params->next_value++;
1506 insert_range (data, row_cnt, sizeof data[pos], pos, cnt);
1507 for (i = 0; i < cnt; i++)
1508 for (j = 0; j < column_cnt; j++)
1509 data[i + pos][j] = case_num_idx (&c[i], j);
1511 if (!datasheet_insert_rows (ds, pos, c, cnt))
1512 mc_error (mc, "datasheet_insert_rows failed");
1514 check_datasheet (mc, ds, data, row_cnt + cnt, column_cnt);
1517 /* Delete all possible numbers of rows from all possible
1519 for (pos = 0; pos < row_cnt; pos++)
1520 for (cnt = 0; cnt < row_cnt - pos; cnt++)
1521 if (mc_include_state (mc))
1523 struct datasheet *ds;
1525 clone_model (ods, odata, &ds, data);
1526 mc_name_operation (mc, "delete %zu rows at %zu", cnt, pos);
1528 datasheet_delete_rows (ds, pos, cnt);
1530 remove_range (&data[0], row_cnt, sizeof data[0], pos, cnt);
1532 check_datasheet (mc, ds, data, row_cnt - cnt, column_cnt);
1535 /* Move all possible numbers of rows from all possible existing
1536 positions to all possible new positions. */
1537 for (pos = 0; pos < row_cnt; pos++)
1538 for (cnt = 0; cnt < row_cnt - pos; cnt++)
1539 for (new_pos = 0; new_pos < row_cnt - cnt; new_pos++)
1540 if (mc_include_state (mc))
1542 struct datasheet *ds;
1544 clone_model (ods, odata, &ds, data);
1545 mc_name_operation (mc, "move %zu rows from %zu to %zu",
1548 datasheet_move_rows (ds, pos, new_pos, cnt);
1550 move_range (&data[0], row_cnt, sizeof data[0],
1553 check_datasheet (mc, ds, data, row_cnt, column_cnt);
1557 /* "destroy" function for struct mc_class. */
1559 datasheet_mc_destroy (const struct mc *mc UNUSED, void *ds_)
1561 struct datasheet *ds = ds_;
1562 datasheet_destroy (ds);
1565 /* Executes the model checker on the datasheet test driver with
1566 the given OPTIONS and passing in the given PARAMS, which must
1567 point to a modifiable "struct datasheet_test_params". If any
1568 value in PARAMS is out of range, it will be adjusted into the
1569 valid range before running the test.
1571 Returns the results of the model checking run. */
1573 datasheet_test (struct mc_options *options, void *params_)
1575 struct datasheet_test_params *params = params_;
1576 static const struct mc_class datasheet_mc_class =
1579 datasheet_mc_mutate,
1580 datasheet_mc_destroy,
1583 params->next_value = 1;
1584 params->max_rows = MIN (params->max_rows, MAX_ROWS);
1585 params->max_cols = MIN (params->max_cols, MAX_COLS);
1586 params->backing_rows = MIN (params->backing_rows, params->max_rows);
1587 params->backing_cols = MIN (params->backing_cols, params->max_cols);
1589 mc_options_set_aux (options, params);
1590 return mc_run (&datasheet_mc_class, options);