1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2007, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <data/datasheet.h>
24 #include <data/casereader-provider.h>
25 #include <data/casereader.h>
26 #include <data/casewriter.h>
27 #include <data/lazy-casereader.h>
28 #include <libpspp/argv-parser.h>
29 #include <libpspp/array.h>
30 #include <libpspp/assertion.h>
31 #include <libpspp/hash-functions.h>
32 #include <libpspp/model-checker.h>
33 #include <libpspp/range-map.h>
34 #include <libpspp/range-set.h>
35 #include <libpspp/str.h>
36 #include <libpspp/taint.h>
37 #include <libpspp/tower.h>
43 /* lazy_casereader callback function to instantiate a casereader
44 from the datasheet. */
45 static struct casereader *
46 lazy_callback (void *ds_)
48 struct datasheet *ds = ds_;
49 return datasheet_make_reader (ds);
53 /* Maximum size of datasheet supported for model checking
59 struct datasheet_test_params
62 int max_rows; /* Maximum number of rows. */
63 int max_cols; /* Maximum number of columns. */
64 int backing_rows; /* Number of rows of backing store. */
65 int backing_cols; /* Number of columns of backing store. */
72 check_caseproto (struct mc *mc, const struct caseproto *benchmark,
73 const struct caseproto *test, const char *test_name)
75 size_t n_columns = caseproto_get_n_widths (benchmark);
79 if (n_columns != caseproto_get_n_widths (test))
81 mc_error (mc, "%s column count (%zu) does not match expected (%zu)",
82 test_name, caseproto_get_n_widths (test), n_columns);
87 for (col = 0; col < n_columns; col++)
89 int benchmark_width = caseproto_get_width (benchmark, col);
90 int test_width = caseproto_get_width (test, col);
91 if (benchmark_width != test_width)
93 mc_error (mc, "%s column %zu width (%d) differs from expected (%d)",
94 test_name, col, test_width, benchmark_width);
101 /* Checks that READER contains the N_ROWS rows and N_COLUMNS
102 columns of data in ARRAY, reporting any errors via MC. */
104 check_datasheet_casereader (struct mc *mc, struct casereader *reader,
105 union value array[MAX_ROWS][MAX_COLS],
106 size_t n_rows, const struct caseproto *proto)
108 size_t n_columns = caseproto_get_n_widths (proto);
110 if (!check_caseproto (mc, proto, casereader_get_proto (reader),
113 else if (casereader_get_case_cnt (reader) != n_rows)
115 if (casereader_get_case_cnt (reader) == CASENUMBER_MAX
116 && casereader_count_cases (reader) == n_rows)
117 mc_error (mc, "datasheet casereader has unknown case count");
119 mc_error (mc, "casereader row count (%lu) does not match "
121 (unsigned long int) casereader_get_case_cnt (reader),
129 for (row = 0; row < n_rows; row++)
133 c = casereader_read (reader);
136 mc_error (mc, "casereader_read failed reading row %zu of %zu "
137 "(%zu columns)", row, n_rows, n_columns);
141 for (col = 0; col < n_columns; col++)
143 int width = caseproto_get_width (proto, col);
144 if (!value_equal (case_data_idx (c, col), &array[row][col],
148 mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: "
150 row, col, n_rows, n_columns,
151 case_num_idx (c, col), array[row][col].f);
153 mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: "
155 row, col, n_rows, n_columns,
156 width, case_str_idx (c, col),
157 width, value_str (&array[row][col], width));
164 c = casereader_read (reader);
166 mc_error (mc, "casereader has extra cases (expected %zu)", n_rows);
170 /* Checks that datasheet DS contains has N_ROWS rows, N_COLUMNS
171 columns, and the same contents as ARRAY, reporting any
172 mismatches via mc_error. Then, adds DS to MC as a new state. */
174 check_datasheet (struct mc *mc, struct datasheet *ds,
175 union value array[MAX_ROWS][MAX_COLS],
176 size_t n_rows, const struct caseproto *proto)
178 size_t n_columns = caseproto_get_n_widths (proto);
179 struct datasheet *ds2;
180 struct casereader *reader;
181 unsigned long int serial = 0;
183 assert (n_rows < MAX_ROWS);
184 assert (n_columns < MAX_COLS);
186 /* If it is a duplicate hash, discard the state before checking
187 its consistency, to save time. */
188 if (mc_discard_dup_state (mc, hash_datasheet (ds)))
190 datasheet_destroy (ds);
194 /* Check contents of datasheet via datasheet functions. */
195 if (!check_caseproto (mc, proto, datasheet_get_proto (ds), "datasheet"))
197 /* check_caseproto emitted errors already. */
199 else if (n_rows != datasheet_get_n_rows (ds))
200 mc_error (mc, "row count (%lu) does not match expected (%zu)",
201 (unsigned long int) datasheet_get_n_rows (ds), n_rows);
205 bool difference = false;
207 for (row = 0; row < n_rows; row++)
208 for (col = 0; col < n_columns; col++)
210 int width = caseproto_get_width (proto, col);
211 union value *av = &array[row][col];
214 value_init (&v, width);
215 if (!datasheet_get_value (ds, row, col, &v))
217 if (!value_equal (&v, av, width))
220 mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: "
221 "%g != %g", row, col, n_rows, n_columns,
224 mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: "
226 row, col, n_rows, n_columns,
227 width, value_str (&v, width),
228 width, value_str (av, width));
231 value_destroy (&v, width);
238 mc_error (mc, "expected:");
240 for (row = 0; row < n_rows; row++)
243 ds_put_format (&s, "row %zu:", row);
244 for (col = 0; col < n_columns; col++)
246 const union value *v = &array[row][col];
247 int width = caseproto_get_width (proto, col);
249 ds_put_format (&s, " %g", v->f);
251 ds_put_format (&s, " '%.*s'", width, value_str (v, width));
253 mc_error (mc, "%s", ds_cstr (&s));
256 mc_error (mc, "actual:");
258 for (row = 0; row < n_rows; row++)
261 ds_put_format (&s, "row %zu:", row);
262 for (col = 0; col < n_columns; col++)
266 if (!datasheet_get_value (ds, row, col, &v))
268 ds_put_format (&s, " %g", v.f);
270 mc_error (mc, "%s", ds_cstr (&s));
277 /* Check that datasheet contents are correct when read through
279 ds2 = clone_datasheet (ds);
280 reader = datasheet_make_reader (ds2);
281 check_datasheet_casereader (mc, reader, array, n_rows, proto);
282 casereader_destroy (reader);
284 /* Check that datasheet contents are correct when read through
285 casereader with lazy_casereader wrapped around it. This is
286 valuable because otherwise there is no non-GUI code that
287 uses the lazy_casereader. */
288 ds2 = clone_datasheet (ds);
289 reader = lazy_casereader_create (datasheet_get_proto (ds2), n_rows,
290 lazy_callback, ds2, &serial);
291 check_datasheet_casereader (mc, reader, array, n_rows, proto);
292 if (lazy_casereader_destroy (reader, serial))
294 /* Lazy casereader was never instantiated. This will
295 only happen if there are no rows (because in that case
296 casereader_read never gets called). */
297 datasheet_destroy (ds2);
299 mc_error (mc, "lazy casereader not instantiated, but should "
300 "have been (size %zu,%zu)", n_rows, n_columns);
304 /* Lazy casereader was instantiated. This is the common
305 case, in which some casereader operation
306 (casereader_read in this case) was performed on the
308 casereader_destroy (reader);
310 mc_error (mc, "lazy casereader instantiated, but should not "
311 "have been (size %zu,%zu)", n_rows, n_columns);
314 mc_add_state (mc, ds);
317 /* Extracts the contents of DS into DATA. */
319 extract_data (const struct datasheet *ds, union value data[MAX_ROWS][MAX_COLS])
321 const struct caseproto *proto = datasheet_get_proto (ds);
322 size_t n_columns = datasheet_get_n_columns (ds);
323 size_t n_rows = datasheet_get_n_rows (ds);
326 assert (n_rows < MAX_ROWS);
327 assert (n_columns < MAX_COLS);
328 for (row = 0; row < n_rows; row++)
329 for (col = 0; col < n_columns; col++)
331 int width = caseproto_get_width (proto, col);
332 union value *v = &data[row][col];
333 value_init (v, width);
334 if (!datasheet_get_value (ds, row, col, v))
339 /* Copies the contents of ODATA into DATA. Each of the N_ROWS
340 rows of ODATA and DATA must have prototype PROTO. */
342 clone_data (size_t n_rows, const struct caseproto *proto,
343 union value odata[MAX_ROWS][MAX_COLS],
344 union value data[MAX_ROWS][MAX_COLS])
346 size_t n_columns = caseproto_get_n_widths (proto);
349 assert (n_rows < MAX_ROWS);
350 assert (n_columns < MAX_COLS);
351 for (row = 0; row < n_rows; row++)
352 for (col = 0; col < n_columns; col++)
354 int width = caseproto_get_width (proto, col);
355 const union value *ov = &odata[row][col];
356 union value *v = &data[row][col];
357 value_init (v, width);
358 value_copy (v, ov, width);
363 release_data (size_t n_rows, const struct caseproto *proto,
364 union value data[MAX_ROWS][MAX_COLS])
366 size_t n_columns = caseproto_get_n_widths (proto);
369 assert (n_rows < MAX_ROWS);
370 assert (n_columns < MAX_COLS);
371 for (col = 0; col < n_columns; col++)
373 int width = caseproto_get_width (proto, col);
374 if (value_needs_init (width))
375 for (row = 0; row < n_rows; row++)
376 value_destroy (&data[row][col], width);
380 /* Clones the structure and contents of ODS into *DS,
381 and the contents of ODATA into DATA. */
383 clone_model (const struct datasheet *ods,
384 union value odata[MAX_ROWS][MAX_COLS],
385 struct datasheet **ds,
386 union value data[MAX_ROWS][MAX_COLS])
388 *ds = clone_datasheet (ods);
389 clone_data (datasheet_get_n_rows (ods), datasheet_get_proto (ods),
393 /* "init" function for struct mc_class. */
395 datasheet_mc_init (struct mc *mc)
397 struct datasheet_test_params *params = mc_get_aux (mc);
398 struct datasheet *ds;
400 if (params->backing_rows == 0 && params->backing_cols == 0)
402 /* Create unbacked datasheet. */
403 ds = datasheet_create (NULL);
404 mc_name_operation (mc, "empty datasheet");
405 check_datasheet (mc, ds, NULL, 0, caseproto_create ());
409 /* Create datasheet with backing. */
410 struct casewriter *writer;
411 struct casereader *reader;
412 union value data[MAX_ROWS][MAX_COLS];
413 struct caseproto *proto;
416 assert (params->backing_rows > 0 && params->backing_rows <= MAX_ROWS);
417 assert (params->backing_cols > 0 && params->backing_cols <= MAX_COLS);
419 /* XXX support different backing column widths */
420 proto = caseproto_create ();
421 for (col = 0; col < params->backing_cols; col++)
422 proto = caseproto_add_width (proto, 0);
424 writer = mem_writer_create (proto);
425 for (row = 0; row < params->backing_rows; row++)
429 c = case_create (proto);
430 for (col = 0; col < params->backing_cols; col++)
432 double value = params->next_value++;
433 data[row][col].f = value;
434 case_data_rw_idx (c, col)->f = value;
436 casewriter_write (writer, c);
438 caseproto_unref (proto);
440 reader = casewriter_make_reader (writer);
441 assert (reader != NULL);
443 ds = datasheet_create (reader);
444 mc_name_operation (mc, "datasheet with (%d,%d) backing",
445 params->backing_rows, params->backing_cols);
446 check_datasheet (mc, ds, data,
447 params->backing_rows, proto);
452 value_from_param (union value *value, int width, int idx)
458 unsigned int hash = hash_int (idx, 0);
459 char *string = value_str_rw (value, width);
463 for (offset = 0; offset < width; offset++)
464 string[offset] = "ABCDEFGHIJ"[(hash >> offset) % 10];
468 /* "mutate" function for struct mc_class. */
470 datasheet_mc_mutate (struct mc *mc, const void *ods_)
472 struct datasheet_test_params *params = mc_get_aux (mc);
474 static const int widths[] = {0, 1, 11};
475 const size_t n_widths = sizeof widths / sizeof *widths;
477 const struct datasheet *ods = ods_;
478 union value odata[MAX_ROWS][MAX_COLS];
479 union value data[MAX_ROWS][MAX_COLS];
480 const struct caseproto *oproto = datasheet_get_proto (ods);
481 size_t n_columns = datasheet_get_n_columns (ods);
482 size_t n_rows = datasheet_get_n_rows (ods);
483 size_t pos, new_pos, cnt, width_idx;
485 extract_data (ods, odata);
487 /* Insert a column in each possible position. */
488 if (n_columns < params->max_cols)
489 for (pos = 0; pos <= n_columns; pos++)
490 for (width_idx = 0; width_idx < n_widths; width_idx++)
491 if (mc_include_state (mc))
493 int width = widths[width_idx];
494 struct caseproto *proto;
495 struct datasheet *ds;
499 mc_name_operation (mc, "insert column at %zu "
500 "(from %zu to %zu columns)",
501 pos, n_columns, n_columns + 1);
502 clone_model (ods, odata, &ds, data);
504 value_init (&new, width);
505 value_from_param (&new, width, params->next_value++);
506 if (!datasheet_insert_column (ds, &new, width, pos))
507 mc_error (mc, "datasheet_insert_column failed");
508 proto = caseproto_insert_width (caseproto_ref (oproto),
511 for (i = 0; i < n_rows; i++)
513 insert_element (&data[i][0], n_columns, sizeof data[i][0],
515 value_init (&data[i][pos], width);
516 value_copy (&data[i][pos], &new, width);
518 value_destroy (&new, width);
520 check_datasheet (mc, ds, data, n_rows, proto);
521 release_data (n_rows, proto, data);
522 caseproto_unref (proto);
525 /* Delete all possible numbers of columns from all possible
527 for (pos = 0; pos < n_columns; pos++)
528 for (cnt = 1; cnt < n_columns - pos; cnt++)
529 if (mc_include_state (mc))
531 struct caseproto *proto;
532 struct datasheet *ds;
535 mc_name_operation (mc, "delete %zu columns at %zu "
536 "(from %zu to %zu columns)",
537 cnt, pos, n_columns, n_columns - cnt);
538 clone_model (ods, odata, &ds, data);
540 datasheet_delete_columns (ds, pos, cnt);
541 proto = caseproto_remove_widths (caseproto_ref (oproto), pos, cnt);
543 for (i = 0; i < n_rows; i++)
545 for (j = pos; j < pos + cnt; j++)
546 value_destroy (&data[i][j], caseproto_get_width (oproto, j));
547 remove_range (&data[i], n_columns, sizeof *data[i], pos, cnt);
550 check_datasheet (mc, ds, data, n_rows, proto);
551 release_data (n_rows, proto, data);
552 caseproto_unref (proto);
555 /* Move all possible numbers of columns from all possible
556 existing positions to all possible new positions. */
557 for (pos = 0; pos < n_columns; pos++)
558 for (cnt = 1; cnt < n_columns - pos; cnt++)
559 for (new_pos = 0; new_pos < n_columns - cnt; new_pos++)
560 if (mc_include_state (mc))
562 struct caseproto *proto;
563 struct datasheet *ds;
566 clone_model (ods, odata, &ds, data);
567 mc_name_operation (mc, "move %zu columns (of %zu) from %zu to %zu",
568 cnt, n_columns, pos, new_pos);
570 datasheet_move_columns (ds, pos, new_pos, cnt);
572 for (i = 0; i < n_rows; i++)
573 move_range (&data[i], n_columns, sizeof data[i][0],
575 proto = caseproto_move_widths (caseproto_ref (oproto),
578 check_datasheet (mc, ds, data, n_rows, proto);
579 release_data (n_rows, proto, data);
580 caseproto_unref (proto);
583 /* Insert all possible numbers of rows in all possible
585 for (pos = 0; pos <= n_rows; pos++)
586 for (cnt = 1; cnt <= params->max_rows - n_rows; cnt++)
587 if (mc_include_state (mc))
589 struct datasheet *ds;
590 struct ccase *c[MAX_ROWS];
593 clone_model (ods, odata, &ds, data);
594 mc_name_operation (mc, "insert %zu rows at %zu "
595 "(from %zu to %zu rows)",
596 cnt, pos, n_rows, n_rows + cnt);
598 for (i = 0; i < cnt; i++)
600 c[i] = case_create (oproto);
601 for (j = 0; j < n_columns; j++)
602 value_from_param (case_data_rw_idx (c[i], j),
603 caseproto_get_width (oproto, j),
604 params->next_value++);
607 insert_range (data, n_rows, sizeof data[pos], pos, cnt);
608 for (i = 0; i < cnt; i++)
609 for (j = 0; j < n_columns; j++)
611 int width = caseproto_get_width (oproto, j);
612 value_init (&data[i + pos][j], width);
613 value_copy (&data[i + pos][j], case_data_idx (c[i], j), width);
616 if (!datasheet_insert_rows (ds, pos, c, cnt))
617 mc_error (mc, "datasheet_insert_rows failed");
619 check_datasheet (mc, ds, data, n_rows + cnt, oproto);
620 release_data (n_rows + cnt, oproto, data);
623 /* Delete all possible numbers of rows from all possible
625 for (pos = 0; pos < n_rows; pos++)
626 for (cnt = 1; cnt < n_rows - pos; cnt++)
627 if (mc_include_state (mc))
629 struct datasheet *ds;
631 clone_model (ods, odata, &ds, data);
632 mc_name_operation (mc, "delete %zu rows at %zu "
633 "(from %zu to %zu rows)",
634 cnt, pos, n_rows, n_rows - cnt);
636 datasheet_delete_rows (ds, pos, cnt);
638 release_data (cnt, oproto, &data[pos]);
639 remove_range (&data[0], n_rows, sizeof data[0], pos, cnt);
641 check_datasheet (mc, ds, data, n_rows - cnt, oproto);
642 release_data (n_rows - cnt, oproto, data);
645 /* Move all possible numbers of rows from all possible existing
646 positions to all possible new positions. */
647 for (pos = 0; pos < n_rows; pos++)
648 for (cnt = 1; cnt < n_rows - pos; cnt++)
649 for (new_pos = 0; new_pos < n_rows - cnt; new_pos++)
650 if (mc_include_state (mc))
652 struct datasheet *ds;
654 clone_model (ods, odata, &ds, data);
655 mc_name_operation (mc, "move %zu rows (of %zu) from %zu to %zu",
656 cnt, n_rows, pos, new_pos);
658 datasheet_move_rows (ds, pos, new_pos, cnt);
660 move_range (&data[0], n_rows, sizeof data[0],
663 check_datasheet (mc, ds, data, n_rows, oproto);
664 release_data (n_rows, oproto, data);
667 release_data (n_rows, oproto, odata);
670 /* "destroy" function for struct mc_class. */
672 datasheet_mc_destroy (const struct mc *mc UNUSED, void *ds_)
674 struct datasheet *ds = ds_;
675 datasheet_destroy (ds);
688 static struct argv_option datasheet_argv_options[N_DATASHEET_OPTIONS] =
690 {"max-rows", 0, required_argument, OPT_MAX_ROWS},
691 {"max-columns", 0, required_argument, OPT_MAX_COLUMNS},
692 {"backing-rows", 0, required_argument, OPT_BACKING_ROWS},
693 {"backing-columns", 0, required_argument, OPT_BACKING_COLUMNS},
694 {"help", 'h', no_argument, OPT_HELP},
697 static void usage (void);
700 datasheet_option_callback (int id, void *params_)
702 struct datasheet_test_params *params = params_;
706 params->max_rows = atoi (optarg);
709 case OPT_MAX_COLUMNS:
710 params->max_cols = atoi (optarg);
713 case OPT_BACKING_ROWS:
714 params->backing_rows = atoi (optarg);
717 case OPT_BACKING_COLUMNS:
718 params->backing_cols = atoi (optarg);
733 printf ("%s, for testing the datasheet implementation.\n"
734 "Usage: %s [OPTION]...\n"
735 "\nTest state space parameters (min...max, default):\n"
736 " --max-rows=N Maximum number of rows (0...5, 3)\n"
737 " --max-rows=N Maximum number of columns (0...5, 3)\n"
738 " --backing-rows=N Rows of backing store (0...max_rows, 0)\n"
739 " --backing-columns=N Columns of backing store (0...max_cols, 0)\n",
740 program_name, program_name);
742 fputs ("\nOther options:\n"
743 " --help Display this help message\n"
744 "\nReport bugs to <bug-gnu-pspp@gnu.org>\n",
750 main (int argc, char *argv[])
752 static const struct mc_class datasheet_mc_class =
756 datasheet_mc_destroy,
759 struct datasheet_test_params params;
760 struct mc_options *options;
761 struct mc_results *results;
762 struct argv_parser *parser;
766 set_program_name (argv[0]);
768 /* Default parameters. */
771 params.backing_rows = 0;
772 params.backing_cols = 0;
773 params.next_value = 1;
775 /* Parse comand line. */
776 parser = argv_parser_create ();
777 options = mc_options_create ();
778 mc_options_register_argv_parser (options, parser);
779 argv_parser_add_options (parser, datasheet_argv_options, N_DATASHEET_OPTIONS,
780 datasheet_option_callback, ¶ms);
781 if (!argv_parser_run (parser, argc, argv))
783 argv_parser_destroy (parser);
784 verbosity = mc_options_get_verbosity (options);
786 /* Force parameters into allowed ranges. */
787 params.max_rows = MIN (params.max_rows, MAX_ROWS);
788 params.max_cols = MIN (params.max_cols, MAX_COLS);
789 params.backing_rows = MIN (params.backing_rows, params.max_rows);
790 params.backing_cols = MIN (params.backing_cols, params.max_cols);
791 mc_options_set_aux (options, ¶ms);
792 results = mc_run (&datasheet_mc_class, options);
794 /* Output results. */
795 success = (mc_results_get_stop_reason (results) != MC_MAX_ERROR_COUNT
796 && mc_results_get_stop_reason (results) != MC_INTERRUPTED);
797 if (verbosity > 0 || !success)
799 printf ("Parameters: "
800 "--max-rows=%d --max-columns=%d "
801 "--backing-rows=%d --backing-columns=%d\n\n",
802 params.max_rows, params.max_cols,
803 params.backing_rows, params.backing_cols);
804 mc_results_print (results, stdout);
806 mc_results_destroy (results);
808 return success ? 0 : EXIT_FAILURE;