1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2007, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <data/datasheet.h>
25 #include <data/casereader-provider.h>
26 #include <data/casereader.h>
27 #include <data/casewriter.h>
28 #include <data/lazy-casereader.h>
29 #include <libpspp/argv-parser.h>
30 #include <libpspp/array.h>
31 #include <libpspp/assertion.h>
32 #include <libpspp/hash-functions.h>
33 #include <libpspp/model-checker.h>
34 #include <libpspp/range-map.h>
35 #include <libpspp/range-set.h>
36 #include <libpspp/str.h>
37 #include <libpspp/taint.h>
38 #include <libpspp/tower.h>
45 /* lazy_casereader callback function to instantiate a casereader
46 from the datasheet. */
47 static struct casereader *
48 lazy_callback (void *ds_)
50 struct datasheet *ds = ds_;
51 return datasheet_make_reader (ds);
55 /* Maximum size of datasheet supported for model checking
62 struct datasheet_test_params
65 int max_rows; /* Maximum number of rows. */
66 int max_cols; /* Maximum number of columns. */
67 int backing_rows; /* Number of rows of backing store. */
68 int backing_cols; /* Number of columns of backing store. */
69 int widths[MAX_WIDTHS]; /* Allowed column widths. */
77 check_caseproto (struct mc *mc, const struct caseproto *benchmark,
78 const struct caseproto *test, const char *test_name)
80 size_t n_columns = caseproto_get_n_widths (benchmark);
84 if (n_columns != caseproto_get_n_widths (test))
86 mc_error (mc, "%s column count (%zu) does not match expected (%zu)",
87 test_name, caseproto_get_n_widths (test), n_columns);
92 for (col = 0; col < n_columns; col++)
94 int benchmark_width = caseproto_get_width (benchmark, col);
95 int test_width = caseproto_get_width (test, col);
96 if (benchmark_width != test_width)
98 mc_error (mc, "%s column %zu width (%d) differs from expected (%d)",
99 test_name, col, test_width, benchmark_width);
106 /* Checks that READER contains the N_ROWS rows and N_COLUMNS
107 columns of data in ARRAY, reporting any errors via MC. */
109 check_datasheet_casereader (struct mc *mc, struct casereader *reader,
110 union value array[MAX_ROWS][MAX_COLS],
111 size_t n_rows, const struct caseproto *proto)
113 size_t n_columns = caseproto_get_n_widths (proto);
115 if (!check_caseproto (mc, proto, casereader_get_proto (reader),
118 else if (casereader_get_case_cnt (reader) != n_rows)
120 if (casereader_get_case_cnt (reader) == CASENUMBER_MAX
121 && casereader_count_cases (reader) == n_rows)
122 mc_error (mc, "datasheet casereader has unknown case count");
124 mc_error (mc, "casereader row count (%lu) does not match "
126 (unsigned long int) casereader_get_case_cnt (reader),
134 for (row = 0; row < n_rows; row++)
138 c = casereader_read (reader);
141 mc_error (mc, "casereader_read failed reading row %zu of %zu "
142 "(%zu columns)", row, n_rows, n_columns);
146 for (col = 0; col < n_columns; col++)
148 int width = caseproto_get_width (proto, col);
149 if (!value_equal (case_data_idx (c, col), &array[row][col],
153 mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: "
155 row, col, n_rows, n_columns,
156 case_num_idx (c, col), array[row][col].f);
158 mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: "
160 row, col, n_rows, n_columns,
161 width, case_str_idx (c, col),
162 width, value_str (&array[row][col], width));
169 c = casereader_read (reader);
171 mc_error (mc, "casereader has extra cases (expected %zu)", n_rows);
175 /* Checks that datasheet DS contains has N_ROWS rows, N_COLUMNS
176 columns, and the same contents as ARRAY, reporting any
177 mismatches via mc_error. Then, adds DS to MC as a new state. */
179 check_datasheet (struct mc *mc, struct datasheet *ds,
180 union value array[MAX_ROWS][MAX_COLS],
181 size_t n_rows, const struct caseproto *proto)
183 size_t n_columns = caseproto_get_n_widths (proto);
184 struct datasheet *ds2;
185 struct casereader *reader;
186 unsigned long int serial = 0;
188 assert (n_rows < MAX_ROWS);
189 assert (n_columns < MAX_COLS);
191 /* Check contents of datasheet via datasheet functions. */
192 if (!check_caseproto (mc, proto, datasheet_get_proto (ds), "datasheet"))
194 /* check_caseproto emitted errors already. */
196 else if (n_rows != datasheet_get_n_rows (ds))
197 mc_error (mc, "row count (%lu) does not match expected (%zu)",
198 (unsigned long int) datasheet_get_n_rows (ds), n_rows);
202 bool difference = false;
204 for (row = 0; row < n_rows; row++)
205 for (col = 0; col < n_columns; col++)
207 int width = caseproto_get_width (proto, col);
208 union value *av = &array[row][col];
211 value_init (&v, width);
212 if (!datasheet_get_value (ds, row, col, &v))
214 if (!value_equal (&v, av, width))
217 mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: "
218 "%g != %g", row, col, n_rows, n_columns,
221 mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: "
223 row, col, n_rows, n_columns,
224 width, value_str (&v, width),
225 width, value_str (av, width));
228 value_destroy (&v, width);
235 mc_error (mc, "expected:");
237 for (row = 0; row < n_rows; row++)
240 ds_put_format (&s, "row %zu:", row);
241 for (col = 0; col < n_columns; col++)
243 const union value *v = &array[row][col];
244 int width = caseproto_get_width (proto, col);
246 ds_put_format (&s, " %g", v->f);
248 ds_put_format (&s, " '%.*s'", width, value_str (v, width));
250 mc_error (mc, "%s", ds_cstr (&s));
253 mc_error (mc, "actual:");
255 for (row = 0; row < n_rows; row++)
258 ds_put_format (&s, "row %zu:", row);
259 for (col = 0; col < n_columns; col++)
261 int width = caseproto_get_width (proto, col);
263 value_init (&v, width);
264 if (!datasheet_get_value (ds, row, col, &v))
267 ds_put_format (&s, " %g", v.f);
269 ds_put_format (&s, " '%.*s'",
270 width, value_str (&v, width));
272 mc_error (mc, "%s", ds_cstr (&s));
279 /* Check that datasheet contents are correct when read through
281 ds2 = clone_datasheet (ds);
282 reader = datasheet_make_reader (ds2);
283 check_datasheet_casereader (mc, reader, array, n_rows, proto);
284 casereader_destroy (reader);
286 /* Check that datasheet contents are correct when read through
287 casereader with lazy_casereader wrapped around it. This is
288 valuable because otherwise there is no non-GUI code that
289 uses the lazy_casereader. */
290 ds2 = clone_datasheet (ds);
291 reader = lazy_casereader_create (datasheet_get_proto (ds2), n_rows,
292 lazy_callback, ds2, &serial);
293 check_datasheet_casereader (mc, reader, array, n_rows, proto);
294 if (lazy_casereader_destroy (reader, serial))
296 /* Lazy casereader was never instantiated. This will
297 only happen if there are no rows (because in that case
298 casereader_read never gets called). */
299 datasheet_destroy (ds2);
301 mc_error (mc, "lazy casereader not instantiated, but should "
302 "have been (size %zu,%zu)", n_rows, n_columns);
306 /* Lazy casereader was instantiated. This is the common
307 case, in which some casereader operation
308 (casereader_read in this case) was performed on the
310 casereader_destroy (reader);
312 mc_error (mc, "lazy casereader instantiated, but should not "
313 "have been (size %zu,%zu)", n_rows, n_columns);
316 if (mc_discard_dup_state (mc, hash_datasheet (ds)))
317 datasheet_destroy (ds);
319 mc_add_state (mc, ds);
322 /* Extracts the contents of DS into DATA. */
324 extract_data (const struct datasheet *ds, union value data[MAX_ROWS][MAX_COLS])
326 const struct caseproto *proto = datasheet_get_proto (ds);
327 size_t n_columns = datasheet_get_n_columns (ds);
328 size_t n_rows = datasheet_get_n_rows (ds);
331 assert (n_rows < MAX_ROWS);
332 assert (n_columns < MAX_COLS);
333 for (row = 0; row < n_rows; row++)
334 for (col = 0; col < n_columns; col++)
336 int width = caseproto_get_width (proto, col);
337 union value *v = &data[row][col];
338 value_init (v, width);
339 if (!datasheet_get_value (ds, row, col, v))
344 /* Copies the contents of ODATA into DATA. Each of the N_ROWS
345 rows of ODATA and DATA must have prototype PROTO. */
347 clone_data (size_t n_rows, const struct caseproto *proto,
348 union value odata[MAX_ROWS][MAX_COLS],
349 union value data[MAX_ROWS][MAX_COLS])
351 size_t n_columns = caseproto_get_n_widths (proto);
354 assert (n_rows < MAX_ROWS);
355 assert (n_columns < MAX_COLS);
356 for (row = 0; row < n_rows; row++)
357 for (col = 0; col < n_columns; col++)
359 int width = caseproto_get_width (proto, col);
360 const union value *ov = &odata[row][col];
361 union value *v = &data[row][col];
362 value_init (v, width);
363 value_copy (v, ov, width);
368 release_data (size_t n_rows, const struct caseproto *proto,
369 union value data[MAX_ROWS][MAX_COLS])
371 size_t n_columns = caseproto_get_n_widths (proto);
374 assert (n_rows < MAX_ROWS);
375 assert (n_columns < MAX_COLS);
376 for (col = 0; col < n_columns; col++)
378 int width = caseproto_get_width (proto, col);
379 if (value_needs_init (width))
380 for (row = 0; row < n_rows; row++)
381 value_destroy (&data[row][col], width);
385 /* Clones the structure and contents of ODS into *DS,
386 and the contents of ODATA into DATA. */
388 clone_model (const struct datasheet *ods,
389 union value odata[MAX_ROWS][MAX_COLS],
390 struct datasheet **ds,
391 union value data[MAX_ROWS][MAX_COLS])
393 *ds = clone_datasheet (ods);
394 clone_data (datasheet_get_n_rows (ods), datasheet_get_proto (ods),
398 /* "init" function for struct mc_class. */
400 datasheet_mc_init (struct mc *mc)
402 struct datasheet_test_params *params = mc_get_aux (mc);
403 struct datasheet *ds;
405 if (params->backing_rows == 0 && params->backing_cols == 0)
407 /* Create unbacked datasheet. */
408 ds = datasheet_create (NULL);
409 mc_name_operation (mc, "empty datasheet");
410 check_datasheet (mc, ds, NULL, 0, caseproto_create ());
414 /* Create datasheet with backing. */
415 struct casewriter *writer;
416 struct casereader *reader;
417 union value data[MAX_ROWS][MAX_COLS];
418 struct caseproto *proto;
421 assert (params->backing_rows > 0 && params->backing_rows <= MAX_ROWS);
422 assert (params->backing_cols > 0 && params->backing_cols <= MAX_COLS);
424 /* XXX support different backing column widths */
425 proto = caseproto_create ();
426 for (col = 0; col < params->backing_cols; col++)
427 proto = caseproto_add_width (proto, 0);
429 writer = mem_writer_create (proto);
430 for (row = 0; row < params->backing_rows; row++)
434 c = case_create (proto);
435 for (col = 0; col < params->backing_cols; col++)
437 double value = params->next_value++;
438 data[row][col].f = value;
439 case_data_rw_idx (c, col)->f = value;
441 casewriter_write (writer, c);
443 caseproto_unref (proto);
445 reader = casewriter_make_reader (writer);
446 assert (reader != NULL);
448 ds = datasheet_create (reader);
449 mc_name_operation (mc, "datasheet with (%d,%d) backing",
450 params->backing_rows, params->backing_cols);
451 check_datasheet (mc, ds, data,
452 params->backing_rows, proto);
457 value_from_param (union value *value, int width, int idx)
463 unsigned int hash = hash_int (idx, 0);
464 char *string = value_str_rw (value, width);
468 for (offset = 0; offset < width; offset++)
469 string[offset] = "ABCDEFGHIJ"[(hash >> offset) % 10];
473 /* "mutate" function for struct mc_class. */
475 datasheet_mc_mutate (struct mc *mc, const void *ods_)
477 struct datasheet_test_params *params = mc_get_aux (mc);
479 const struct datasheet *ods = ods_;
480 union value odata[MAX_ROWS][MAX_COLS];
481 union value data[MAX_ROWS][MAX_COLS];
482 const struct caseproto *oproto = datasheet_get_proto (ods);
483 size_t n_columns = datasheet_get_n_columns (ods);
484 size_t n_rows = datasheet_get_n_rows (ods);
485 size_t pos, new_pos, cnt, width_idx;
487 extract_data (ods, odata);
489 /* Insert a column in each possible position. */
490 if (n_columns < params->max_cols)
491 for (pos = 0; pos <= n_columns; pos++)
492 for (width_idx = 0; width_idx < params->n_widths; width_idx++)
493 if (mc_include_state (mc))
495 int width = params->widths[width_idx];
496 struct caseproto *proto;
497 struct datasheet *ds;
501 mc_name_operation (mc, "insert column at %zu "
502 "(from %zu to %zu columns)",
503 pos, n_columns, n_columns + 1);
504 clone_model (ods, odata, &ds, data);
506 value_init (&new, width);
507 value_from_param (&new, width, params->next_value++);
508 if (!datasheet_insert_column (ds, &new, width, pos))
509 mc_error (mc, "datasheet_insert_column failed");
510 proto = caseproto_insert_width (caseproto_ref (oproto),
513 for (i = 0; i < n_rows; i++)
515 insert_element (&data[i][0], n_columns, sizeof data[i][0],
517 value_init (&data[i][pos], width);
518 value_copy (&data[i][pos], &new, width);
520 value_destroy (&new, width);
522 check_datasheet (mc, ds, data, n_rows, proto);
523 release_data (n_rows, proto, data);
524 caseproto_unref (proto);
527 /* Delete all possible numbers of columns from all possible
529 for (pos = 0; pos < n_columns; pos++)
530 for (cnt = 1; cnt < n_columns - pos; cnt++)
531 if (mc_include_state (mc))
533 struct caseproto *proto;
534 struct datasheet *ds;
537 mc_name_operation (mc, "delete %zu columns at %zu "
538 "(from %zu to %zu columns)",
539 cnt, pos, n_columns, n_columns - cnt);
540 clone_model (ods, odata, &ds, data);
542 datasheet_delete_columns (ds, pos, cnt);
543 proto = caseproto_remove_widths (caseproto_ref (oproto), pos, cnt);
545 for (i = 0; i < n_rows; i++)
547 for (j = pos; j < pos + cnt; j++)
548 value_destroy (&data[i][j], caseproto_get_width (oproto, j));
549 remove_range (&data[i], n_columns, sizeof *data[i], pos, cnt);
552 check_datasheet (mc, ds, data, n_rows, proto);
553 release_data (n_rows, proto, data);
554 caseproto_unref (proto);
557 /* Move all possible numbers of columns from all possible
558 existing positions to all possible new positions. */
559 for (pos = 0; pos < n_columns; pos++)
560 for (cnt = 1; cnt < n_columns - pos; cnt++)
561 for (new_pos = 0; new_pos < n_columns - cnt; new_pos++)
562 if (mc_include_state (mc))
564 struct caseproto *proto;
565 struct datasheet *ds;
568 clone_model (ods, odata, &ds, data);
569 mc_name_operation (mc, "move %zu columns (of %zu) from %zu to %zu",
570 cnt, n_columns, pos, new_pos);
572 datasheet_move_columns (ds, pos, new_pos, cnt);
574 for (i = 0; i < n_rows; i++)
575 move_range (&data[i], n_columns, sizeof data[i][0],
577 proto = caseproto_move_widths (caseproto_ref (oproto),
580 check_datasheet (mc, ds, data, n_rows, proto);
581 release_data (n_rows, proto, data);
582 caseproto_unref (proto);
585 /* Insert all possible numbers of rows in all possible
587 for (pos = 0; pos <= n_rows; pos++)
588 for (cnt = 1; cnt <= params->max_rows - n_rows; cnt++)
589 if (mc_include_state (mc))
591 struct datasheet *ds;
592 struct ccase *c[MAX_ROWS];
595 clone_model (ods, odata, &ds, data);
596 mc_name_operation (mc, "insert %zu rows at %zu "
597 "(from %zu to %zu rows)",
598 cnt, pos, n_rows, n_rows + cnt);
600 for (i = 0; i < cnt; i++)
602 c[i] = case_create (oproto);
603 for (j = 0; j < n_columns; j++)
604 value_from_param (case_data_rw_idx (c[i], j),
605 caseproto_get_width (oproto, j),
606 params->next_value++);
609 insert_range (data, n_rows, sizeof data[pos], pos, cnt);
610 for (i = 0; i < cnt; i++)
611 for (j = 0; j < n_columns; j++)
613 int width = caseproto_get_width (oproto, j);
614 value_init (&data[i + pos][j], width);
615 value_copy (&data[i + pos][j], case_data_idx (c[i], j), width);
618 if (!datasheet_insert_rows (ds, pos, c, cnt))
619 mc_error (mc, "datasheet_insert_rows failed");
621 check_datasheet (mc, ds, data, n_rows + cnt, oproto);
622 release_data (n_rows + cnt, oproto, data);
625 /* Delete all possible numbers of rows from all possible
627 for (pos = 0; pos < n_rows; pos++)
628 for (cnt = 1; cnt < n_rows - pos; cnt++)
629 if (mc_include_state (mc))
631 struct datasheet *ds;
633 clone_model (ods, odata, &ds, data);
634 mc_name_operation (mc, "delete %zu rows at %zu "
635 "(from %zu to %zu rows)",
636 cnt, pos, n_rows, n_rows - cnt);
638 datasheet_delete_rows (ds, pos, cnt);
640 release_data (cnt, oproto, &data[pos]);
641 remove_range (&data[0], n_rows, sizeof data[0], pos, cnt);
643 check_datasheet (mc, ds, data, n_rows - cnt, oproto);
644 release_data (n_rows - cnt, oproto, data);
647 /* Move all possible numbers of rows from all possible existing
648 positions to all possible new positions. */
649 for (pos = 0; pos < n_rows; pos++)
650 for (cnt = 1; cnt < n_rows - pos; cnt++)
651 for (new_pos = 0; new_pos < n_rows - cnt; new_pos++)
652 if (mc_include_state (mc))
654 struct datasheet *ds;
656 clone_model (ods, odata, &ds, data);
657 mc_name_operation (mc, "move %zu rows (of %zu) from %zu to %zu",
658 cnt, n_rows, pos, new_pos);
660 datasheet_move_rows (ds, pos, new_pos, cnt);
662 move_range (&data[0], n_rows, sizeof data[0],
665 check_datasheet (mc, ds, data, n_rows, oproto);
666 release_data (n_rows, oproto, data);
669 release_data (n_rows, oproto, odata);
672 /* "destroy" function for struct mc_class. */
674 datasheet_mc_destroy (const struct mc *mc UNUSED, void *ds_)
676 struct datasheet *ds = ds_;
677 datasheet_destroy (ds);
691 static struct argv_option datasheet_argv_options[N_DATASHEET_OPTIONS] =
693 {"max-rows", 0, required_argument, OPT_MAX_ROWS},
694 {"max-columns", 0, required_argument, OPT_MAX_COLUMNS},
695 {"backing-rows", 0, required_argument, OPT_BACKING_ROWS},
696 {"backing-columns", 0, required_argument, OPT_BACKING_COLUMNS},
697 {"widths", 0, required_argument, OPT_WIDTHS},
698 {"help", 'h', no_argument, OPT_HELP},
701 static void usage (void);
704 datasheet_option_callback (int id, void *params_)
706 struct datasheet_test_params *params = params_;
710 params->max_rows = atoi (optarg);
713 case OPT_MAX_COLUMNS:
714 params->max_cols = atoi (optarg);
717 case OPT_BACKING_ROWS:
718 params->backing_rows = atoi (optarg);
721 case OPT_BACKING_COLUMNS:
722 params->backing_cols = atoi (optarg);
730 params->n_widths = 0;
731 for (w = strtok (optarg, ", "); w != NULL; w = strtok (NULL, ", "))
733 int value = atoi (w);
735 if (params->n_widths >= MAX_WIDTHS)
736 error (1, 0, "Too many widths on --widths (only %d are allowed)",
738 if (!isdigit (w[0]) || value < 0 || value > 31)
739 error (1, 0, "--widths argument must be a list of 1 to %d "
740 "integers between 0 and 31 in increasing order",
743 /* This is an artificial requirement merely to ensure
744 that there are no duplicates. Duplicates aren't a
745 real problem but they would waste time. */
747 error (1, 0, "--widths arguments must be in increasing order");
749 params->widths[params->n_widths++] = value;
751 if (params->n_widths == 0)
752 error (1, 0, "at least one value must be specified on --widths");
768 printf ("%s, for testing the datasheet implementation.\n"
769 "Usage: %s [OPTION]...\n"
770 "\nTest state space parameters (min...max, default):\n"
771 " --max-rows=N Maximum number of rows (0...5, 3)\n"
772 " --max-rows=N Maximum number of columns (0...5, 3)\n"
773 " --backing-rows=N Rows of backing store (0...max_rows, 0)\n"
774 " --backing-columns=N Columns of backing store (0...max_cols, 0)\n"
775 " --widths=W[,W]... Column widths to test, where 0=numeric,\n"
776 " other values are string widths (0,1,11)\n",
777 program_name, program_name);
779 fputs ("\nOther options:\n"
780 " --help Display this help message\n"
781 "\nReport bugs to <bug-gnu-pspp@gnu.org>\n",
787 main (int argc, char *argv[])
789 static const struct mc_class datasheet_mc_class =
793 datasheet_mc_destroy,
796 struct datasheet_test_params params;
797 struct mc_options *options;
798 struct mc_results *results;
799 struct argv_parser *parser;
803 set_program_name (argv[0]);
805 /* Default parameters. */
808 params.backing_rows = 0;
809 params.backing_cols = 0;
810 params.widths[0] = 0;
811 params.widths[1] = 1;
812 params.widths[2] = 11;
814 params.next_value = 1;
816 /* Parse comand line. */
817 parser = argv_parser_create ();
818 options = mc_options_create ();
819 mc_options_register_argv_parser (options, parser);
820 argv_parser_add_options (parser, datasheet_argv_options, N_DATASHEET_OPTIONS,
821 datasheet_option_callback, ¶ms);
822 if (!argv_parser_run (parser, argc, argv))
824 argv_parser_destroy (parser);
825 verbosity = mc_options_get_verbosity (options);
827 /* Force parameters into allowed ranges. */
828 params.max_rows = MIN (params.max_rows, MAX_ROWS);
829 params.max_cols = MIN (params.max_cols, MAX_COLS);
830 params.backing_rows = MIN (params.backing_rows, params.max_rows);
831 params.backing_cols = MIN (params.backing_cols, params.max_cols);
832 mc_options_set_aux (options, ¶ms);
833 results = mc_run (&datasheet_mc_class, options);
835 /* Output results. */
836 success = (mc_results_get_stop_reason (results) != MC_MAX_ERROR_COUNT
837 && mc_results_get_stop_reason (results) != MC_INTERRUPTED);
838 if (verbosity > 0 || !success)
842 printf ("Parameters: "
843 "--max-rows=%d --max-columns=%d "
844 "--backing-rows=%d --backing-columns=%d ",
845 params.max_rows, params.max_cols,
846 params.backing_rows, params.backing_cols);
847 printf ("--widths=");
848 for (i = 0; i < params.n_widths; i++)
852 printf ("%d", params.widths[i]);
855 mc_results_print (results, stdout);
857 mc_results_destroy (results);
859 return success ? 0 : EXIT_FAILURE;