1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2007, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <data/datasheet.h>
20 #include "datasheet-check.h"
25 #include <data/casereader-provider.h>
26 #include <data/casereader.h>
27 #include <data/casewriter.h>
28 #include <data/lazy-casereader.h>
29 #include <libpspp/array.h>
30 #include <libpspp/assertion.h>
31 #include <libpspp/hash-functions.h>
32 #include <libpspp/model-checker.h>
33 #include <libpspp/range-map.h>
34 #include <libpspp/range-set.h>
35 #include <libpspp/str.h>
36 #include <libpspp/taint.h>
37 #include <libpspp/tower.h>
43 /* lazy_casereader callback function to instantiate a casereader
44 from the datasheet. */
45 static struct casereader *
46 lazy_callback (void *ds_)
48 struct datasheet *ds = ds_;
49 return datasheet_make_reader (ds);
53 /* Maximum size of datasheet supported for model checking
60 check_caseproto (struct mc *mc, const struct caseproto *benchmark,
61 const struct caseproto *test, const char *test_name)
63 size_t n_columns = caseproto_get_n_widths (benchmark);
67 if (n_columns != caseproto_get_n_widths (test))
69 mc_error (mc, "%s column count (%zu) does not match expected (%zu)",
70 test_name, caseproto_get_n_widths (test), n_columns);
75 for (col = 0; col < n_columns; col++)
77 int benchmark_width = caseproto_get_width (benchmark, col);
78 int test_width = caseproto_get_width (test, col);
79 if (benchmark_width != test_width)
81 mc_error (mc, "%s column %zu width (%d) differs from expected (%d)",
82 test_name, col, test_width, benchmark_width);
89 /* Checks that READER contains the N_ROWS rows and N_COLUMNS
90 columns of data in ARRAY, reporting any errors via MC. */
92 check_datasheet_casereader (struct mc *mc, struct casereader *reader,
93 union value array[MAX_ROWS][MAX_COLS],
94 size_t n_rows, const struct caseproto *proto)
96 size_t n_columns = caseproto_get_n_widths (proto);
98 if (!check_caseproto (mc, proto, casereader_get_proto (reader),
101 else if (casereader_get_case_cnt (reader) != n_rows)
103 if (casereader_get_case_cnt (reader) == CASENUMBER_MAX
104 && casereader_count_cases (reader) == n_rows)
105 mc_error (mc, "datasheet casereader has unknown case count");
107 mc_error (mc, "casereader row count (%lu) does not match "
109 (unsigned long int) casereader_get_case_cnt (reader),
117 for (row = 0; row < n_rows; row++)
121 c = casereader_read (reader);
124 mc_error (mc, "casereader_read failed reading row %zu of %zu "
125 "(%zu columns)", row, n_rows, n_columns);
129 for (col = 0; col < n_columns; col++)
131 int width = caseproto_get_width (proto, col);
132 if (!value_equal (case_data_idx (c, col), &array[row][col],
136 mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: "
138 row, col, n_rows, n_columns,
139 case_num_idx (c, col), array[row][col].f);
141 mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: "
143 row, col, n_rows, n_columns,
144 width, case_str_idx (c, col),
145 width, value_str (&array[row][col], width));
152 c = casereader_read (reader);
154 mc_error (mc, "casereader has extra cases (expected %zu)", n_rows);
158 /* Checks that datasheet DS contains has N_ROWS rows, N_COLUMNS
159 columns, and the same contents as ARRAY, reporting any
160 mismatches via mc_error. Then, adds DS to MC as a new state. */
162 check_datasheet (struct mc *mc, struct datasheet *ds,
163 union value array[MAX_ROWS][MAX_COLS],
164 size_t n_rows, const struct caseproto *proto)
166 size_t n_columns = caseproto_get_n_widths (proto);
167 struct datasheet *ds2;
168 struct casereader *reader;
169 unsigned long int serial = 0;
171 assert (n_rows < MAX_ROWS);
172 assert (n_columns < MAX_COLS);
174 /* If it is a duplicate hash, discard the state before checking
175 its consistency, to save time. */
176 if (mc_discard_dup_state (mc, hash_datasheet (ds)))
178 datasheet_destroy (ds);
182 /* Check contents of datasheet via datasheet functions. */
183 if (!check_caseproto (mc, proto, datasheet_get_proto (ds), "datasheet"))
185 /* check_caseproto emitted errors already. */
187 else if (n_rows != datasheet_get_n_rows (ds))
188 mc_error (mc, "row count (%lu) does not match expected (%zu)",
189 (unsigned long int) datasheet_get_n_rows (ds), n_rows);
193 bool difference = false;
195 for (row = 0; row < n_rows; row++)
196 for (col = 0; col < n_columns; col++)
198 int width = caseproto_get_width (proto, col);
199 union value *av = &array[row][col];
202 value_init (&v, width);
203 if (!datasheet_get_value (ds, row, col, &v))
205 if (!value_equal (&v, av, width))
208 mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: "
209 "%g != %g", row, col, n_rows, n_columns,
212 mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: "
214 row, col, n_rows, n_columns,
215 width, value_str (&v, width),
216 width, value_str (av, width));
219 value_destroy (&v, width);
226 mc_error (mc, "expected:");
228 for (row = 0; row < n_rows; row++)
231 ds_put_format (&s, "row %zu:", row);
232 for (col = 0; col < n_columns; col++)
234 const union value *v = &array[row][col];
235 int width = caseproto_get_width (proto, col);
237 ds_put_format (&s, " %g", v->f);
239 ds_put_format (&s, " '%.*s'", width, value_str (v, width));
241 mc_error (mc, "%s", ds_cstr (&s));
244 mc_error (mc, "actual:");
246 for (row = 0; row < n_rows; row++)
249 ds_put_format (&s, "row %zu:", row);
250 for (col = 0; col < n_columns; col++)
254 if (!datasheet_get_value (ds, row, col, &v))
256 ds_put_format (&s, " %g", v.f);
258 mc_error (mc, "%s", ds_cstr (&s));
265 /* Check that datasheet contents are correct when read through
267 ds2 = clone_datasheet (ds);
268 reader = datasheet_make_reader (ds2);
269 check_datasheet_casereader (mc, reader, array, n_rows, proto);
270 casereader_destroy (reader);
272 /* Check that datasheet contents are correct when read through
273 casereader with lazy_casereader wrapped around it. This is
274 valuable because otherwise there is no non-GUI code that
275 uses the lazy_casereader. */
276 ds2 = clone_datasheet (ds);
277 reader = lazy_casereader_create (datasheet_get_proto (ds2), n_rows,
278 lazy_callback, ds2, &serial);
279 check_datasheet_casereader (mc, reader, array, n_rows, proto);
280 if (lazy_casereader_destroy (reader, serial))
282 /* Lazy casereader was never instantiated. This will
283 only happen if there are no rows (because in that case
284 casereader_read never gets called). */
285 datasheet_destroy (ds2);
287 mc_error (mc, "lazy casereader not instantiated, but should "
288 "have been (size %zu,%zu)", n_rows, n_columns);
292 /* Lazy casereader was instantiated. This is the common
293 case, in which some casereader operation
294 (casereader_read in this case) was performed on the
296 casereader_destroy (reader);
298 mc_error (mc, "lazy casereader instantiated, but should not "
299 "have been (size %zu,%zu)", n_rows, n_columns);
302 mc_add_state (mc, ds);
305 /* Extracts the contents of DS into DATA. */
307 extract_data (const struct datasheet *ds, union value data[MAX_ROWS][MAX_COLS])
309 const struct caseproto *proto = datasheet_get_proto (ds);
310 size_t n_columns = datasheet_get_n_columns (ds);
311 size_t n_rows = datasheet_get_n_rows (ds);
314 assert (n_rows < MAX_ROWS);
315 assert (n_columns < MAX_COLS);
316 for (row = 0; row < n_rows; row++)
317 for (col = 0; col < n_columns; col++)
319 int width = caseproto_get_width (proto, col);
320 union value *v = &data[row][col];
321 value_init (v, width);
322 if (!datasheet_get_value (ds, row, col, v))
327 /* Copies the contents of ODATA into DATA. Each of the N_ROWS
328 rows of ODATA and DATA must have prototype PROTO. */
330 clone_data (size_t n_rows, const struct caseproto *proto,
331 union value odata[MAX_ROWS][MAX_COLS],
332 union value data[MAX_ROWS][MAX_COLS])
334 size_t n_columns = caseproto_get_n_widths (proto);
337 assert (n_rows < MAX_ROWS);
338 assert (n_columns < MAX_COLS);
339 for (row = 0; row < n_rows; row++)
340 for (col = 0; col < n_columns; col++)
342 int width = caseproto_get_width (proto, col);
343 const union value *ov = &odata[row][col];
344 union value *v = &data[row][col];
345 value_init (v, width);
346 value_copy (v, ov, width);
351 release_data (size_t n_rows, const struct caseproto *proto,
352 union value data[MAX_ROWS][MAX_COLS])
354 size_t n_columns = caseproto_get_n_widths (proto);
357 assert (n_rows < MAX_ROWS);
358 assert (n_columns < MAX_COLS);
359 for (col = 0; col < n_columns; col++)
361 int width = caseproto_get_width (proto, col);
362 if (value_needs_init (width))
363 for (row = 0; row < n_rows; row++)
364 value_destroy (&data[row][col], width);
368 /* Clones the structure and contents of ODS into *DS,
369 and the contents of ODATA into DATA. */
371 clone_model (const struct datasheet *ods,
372 union value odata[MAX_ROWS][MAX_COLS],
373 struct datasheet **ds,
374 union value data[MAX_ROWS][MAX_COLS])
376 *ds = clone_datasheet (ods);
377 clone_data (datasheet_get_n_rows (ods), datasheet_get_proto (ods),
381 /* "init" function for struct mc_class. */
383 datasheet_mc_init (struct mc *mc)
385 struct datasheet_test_params *params = mc_get_aux (mc);
386 struct datasheet *ds;
388 if (params->backing_rows == 0 && params->backing_cols == 0)
390 /* Create unbacked datasheet. */
391 ds = datasheet_create (NULL);
392 mc_name_operation (mc, "empty datasheet");
393 check_datasheet (mc, ds, NULL, 0, caseproto_create ());
397 /* Create datasheet with backing. */
398 struct casewriter *writer;
399 struct casereader *reader;
400 union value data[MAX_ROWS][MAX_COLS];
401 struct caseproto *proto;
404 assert (params->backing_rows > 0 && params->backing_rows <= MAX_ROWS);
405 assert (params->backing_cols > 0 && params->backing_cols <= MAX_COLS);
407 /* XXX support different backing column widths */
408 proto = caseproto_create ();
409 for (col = 0; col < params->backing_cols; col++)
410 proto = caseproto_add_width (proto, 0);
412 writer = mem_writer_create (proto);
413 for (row = 0; row < params->backing_rows; row++)
417 c = case_create (proto);
418 for (col = 0; col < params->backing_cols; col++)
420 double value = params->next_value++;
421 data[row][col].f = value;
422 case_data_rw_idx (c, col)->f = value;
424 casewriter_write (writer, c);
426 caseproto_unref (proto);
428 reader = casewriter_make_reader (writer);
429 assert (reader != NULL);
431 ds = datasheet_create (reader);
432 mc_name_operation (mc, "datasheet with (%d,%d) backing",
433 params->backing_rows, params->backing_cols);
434 check_datasheet (mc, ds, data,
435 params->backing_rows, proto);
440 value_from_param (union value *value, int width, int idx)
446 unsigned int hash = hash_int (idx, 0);
447 char *string = value_str_rw (value, width);
451 for (offset = 0; offset < width; offset++)
452 string[offset] = "ABCDEFGHIJ"[(hash >> offset) % 10];
456 /* "mutate" function for struct mc_class. */
458 datasheet_mc_mutate (struct mc *mc, const void *ods_)
460 struct datasheet_test_params *params = mc_get_aux (mc);
462 static const int widths[] = {0, 1, 11};
463 const size_t n_widths = sizeof widths / sizeof *widths;
465 const struct datasheet *ods = ods_;
466 union value odata[MAX_ROWS][MAX_COLS];
467 union value data[MAX_ROWS][MAX_COLS];
468 const struct caseproto *oproto = datasheet_get_proto (ods);
469 size_t n_columns = datasheet_get_n_columns (ods);
470 size_t n_rows = datasheet_get_n_rows (ods);
471 size_t pos, new_pos, cnt, width_idx;
473 extract_data (ods, odata);
475 /* Insert a column in each possible position. */
476 if (n_columns < params->max_cols)
477 for (pos = 0; pos <= n_columns; pos++)
478 for (width_idx = 0; width_idx < n_widths; width_idx++)
479 if (mc_include_state (mc))
481 int width = widths[width_idx];
482 struct caseproto *proto;
483 struct datasheet *ds;
487 mc_name_operation (mc, "insert column at %zu "
488 "(from %zu to %zu columns)",
489 pos, n_columns, n_columns + 1);
490 clone_model (ods, odata, &ds, data);
492 value_init (&new, width);
493 value_from_param (&new, width, params->next_value++);
494 if (!datasheet_insert_column (ds, &new, width, pos))
495 mc_error (mc, "datasheet_insert_column failed");
496 proto = caseproto_insert_width (caseproto_ref (oproto),
499 for (i = 0; i < n_rows; i++)
501 insert_element (&data[i][0], n_columns, sizeof data[i][0],
503 value_init (&data[i][pos], width);
504 value_copy (&data[i][pos], &new, width);
506 value_destroy (&new, width);
508 check_datasheet (mc, ds, data, n_rows, proto);
509 release_data (n_rows, proto, data);
510 caseproto_unref (proto);
513 /* Delete all possible numbers of columns from all possible
515 for (pos = 0; pos < n_columns; pos++)
516 for (cnt = 0; cnt < n_columns - pos; cnt++)
517 if (mc_include_state (mc))
519 struct caseproto *proto;
520 struct datasheet *ds;
523 mc_name_operation (mc, "delete %zu columns at %zu "
524 "(from %zu to %zu columns)",
525 cnt, pos, n_columns, n_columns - cnt);
526 clone_model (ods, odata, &ds, data);
528 datasheet_delete_columns (ds, pos, cnt);
529 proto = caseproto_remove_widths (caseproto_ref (oproto), pos, cnt);
531 for (i = 0; i < n_rows; i++)
533 for (j = pos; j < pos + cnt; j++)
534 value_destroy (&data[i][j], caseproto_get_width (oproto, j));
535 remove_range (&data[i], n_columns, sizeof *data[i], pos, cnt);
538 check_datasheet (mc, ds, data, n_rows, proto);
539 release_data (n_rows, proto, data);
540 caseproto_unref (proto);
543 /* Move all possible numbers of columns from all possible
544 existing positions to all possible new positions. */
545 for (pos = 0; pos < n_columns; pos++)
546 for (cnt = 0; cnt < n_columns - pos; cnt++)
547 for (new_pos = 0; new_pos < n_columns - cnt; new_pos++)
548 if (mc_include_state (mc))
550 struct caseproto *proto;
551 struct datasheet *ds;
554 clone_model (ods, odata, &ds, data);
555 mc_name_operation (mc, "move %zu columns (of %zu) from %zu to %zu",
556 cnt, n_columns, pos, new_pos);
558 datasheet_move_columns (ds, pos, new_pos, cnt);
560 for (i = 0; i < n_rows; i++)
561 move_range (&data[i], n_columns, sizeof data[i][0],
563 proto = caseproto_move_widths (caseproto_ref (oproto),
566 check_datasheet (mc, ds, data, n_rows, proto);
567 release_data (n_rows, proto, data);
568 caseproto_unref (proto);
571 /* Insert all possible numbers of rows in all possible
573 for (pos = 0; pos <= n_rows; pos++)
574 for (cnt = 0; cnt <= params->max_rows - n_rows; cnt++)
575 if (mc_include_state (mc))
577 struct datasheet *ds;
578 struct ccase *c[MAX_ROWS];
581 clone_model (ods, odata, &ds, data);
582 mc_name_operation (mc, "insert %zu rows at %zu "
583 "(from %zu to %zu rows)",
584 cnt, pos, n_rows, n_rows + cnt);
586 for (i = 0; i < cnt; i++)
588 c[i] = case_create (oproto);
589 for (j = 0; j < n_columns; j++)
590 value_from_param (case_data_rw_idx (c[i], j),
591 caseproto_get_width (oproto, j),
592 params->next_value++);
595 insert_range (data, n_rows, sizeof data[pos], pos, cnt);
596 for (i = 0; i < cnt; i++)
597 for (j = 0; j < n_columns; j++)
599 int width = caseproto_get_width (oproto, j);
600 value_init (&data[i + pos][j], width);
601 value_copy (&data[i + pos][j], case_data_idx (c[i], j), width);
604 if (!datasheet_insert_rows (ds, pos, c, cnt))
605 mc_error (mc, "datasheet_insert_rows failed");
607 check_datasheet (mc, ds, data, n_rows + cnt, oproto);
608 release_data (n_rows + cnt, oproto, data);
611 /* Delete all possible numbers of rows from all possible
613 for (pos = 0; pos < n_rows; pos++)
614 for (cnt = 0; cnt < n_rows - pos; cnt++)
615 if (mc_include_state (mc))
617 struct datasheet *ds;
619 clone_model (ods, odata, &ds, data);
620 mc_name_operation (mc, "delete %zu rows at %zu "
621 "(from %zu to %zu rows)",
622 cnt, pos, n_rows, n_rows - cnt);
624 datasheet_delete_rows (ds, pos, cnt);
626 release_data (cnt, oproto, &data[pos]);
627 remove_range (&data[0], n_rows, sizeof data[0], pos, cnt);
629 check_datasheet (mc, ds, data, n_rows - cnt, oproto);
630 release_data (n_rows - cnt, oproto, data);
633 /* Move all possible numbers of rows from all possible existing
634 positions to all possible new positions. */
635 for (pos = 0; pos < n_rows; pos++)
636 for (cnt = 0; cnt < n_rows - pos; cnt++)
637 for (new_pos = 0; new_pos < n_rows - cnt; new_pos++)
638 if (mc_include_state (mc))
640 struct datasheet *ds;
642 clone_model (ods, odata, &ds, data);
643 mc_name_operation (mc, "move %zu rows (of %zu) from %zu to %zu",
644 cnt, n_rows, pos, new_pos);
646 datasheet_move_rows (ds, pos, new_pos, cnt);
648 move_range (&data[0], n_rows, sizeof data[0],
651 check_datasheet (mc, ds, data, n_rows, oproto);
652 release_data (n_rows, oproto, data);
655 release_data (n_rows, oproto, odata);
658 /* "destroy" function for struct mc_class. */
660 datasheet_mc_destroy (const struct mc *mc UNUSED, void *ds_)
662 struct datasheet *ds = ds_;
663 datasheet_destroy (ds);
666 /* Executes the model checker on the datasheet test driver with
667 the given OPTIONS and passing in the given PARAMS, which must
668 point to a modifiable "struct datasheet_test_params". If any
669 value in PARAMS is out of range, it will be adjusted into the
670 valid range before running the test.
672 Returns the results of the model checking run. */
674 datasheet_test (struct mc_options *options UNUSED, void *params_ UNUSED)
676 struct datasheet_test_params *params = params_;
677 static const struct mc_class datasheet_mc_class =
681 datasheet_mc_destroy,
684 params->next_value = 1;
685 params->max_rows = MIN (params->max_rows, MAX_ROWS);
686 params->max_cols = MIN (params->max_cols, MAX_COLS);
687 params->backing_rows = MIN (params->backing_rows, params->max_rows);
688 params->backing_cols = MIN (params->backing_cols, params->max_cols);
690 mc_options_set_aux (options, params);
691 return mc_run (&datasheet_mc_class, options);