1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2007, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <data/datasheet.h>
25 #include <data/casereader-provider.h>
26 #include <data/casereader.h>
27 #include <data/casewriter.h>
28 #include <data/lazy-casereader.h>
29 #include <libpspp/argv-parser.h>
30 #include <libpspp/array.h>
31 #include <libpspp/assertion.h>
32 #include <libpspp/hash-functions.h>
33 #include <libpspp/model-checker.h>
34 #include <libpspp/range-map.h>
35 #include <libpspp/range-set.h>
36 #include <libpspp/str.h>
37 #include <libpspp/taint.h>
38 #include <libpspp/tower.h>
45 /* lazy_casereader callback function to instantiate a casereader
46 from the datasheet. */
47 static struct casereader *
48 lazy_callback (void *ds_)
50 struct datasheet *ds = ds_;
51 return datasheet_make_reader (ds);
55 /* Maximum size of datasheet supported for model checking
62 struct datasheet_test_params
65 int max_rows; /* Maximum number of rows. */
66 int max_cols; /* Maximum number of columns. */
67 int backing_rows; /* Number of rows of backing store. */
68 int backing_cols; /* Number of columns of backing store. */
69 int widths[MAX_WIDTHS]; /* Allowed column widths. */
77 check_caseproto (struct mc *mc, const struct caseproto *benchmark,
78 const struct caseproto *test, const char *test_name)
80 size_t n_columns = caseproto_get_n_widths (benchmark);
84 if (n_columns != caseproto_get_n_widths (test))
86 mc_error (mc, "%s column count (%zu) does not match expected (%zu)",
87 test_name, caseproto_get_n_widths (test), n_columns);
92 for (col = 0; col < n_columns; col++)
94 int benchmark_width = caseproto_get_width (benchmark, col);
95 int test_width = caseproto_get_width (test, col);
96 if (benchmark_width != test_width)
98 mc_error (mc, "%s column %zu width (%d) differs from expected (%d)",
99 test_name, col, test_width, benchmark_width);
106 /* Checks that READER contains the N_ROWS rows and N_COLUMNS
107 columns of data in ARRAY, reporting any errors via MC. */
109 check_datasheet_casereader (struct mc *mc, struct casereader *reader,
110 union value array[MAX_ROWS][MAX_COLS],
111 size_t n_rows, const struct caseproto *proto)
113 size_t n_columns = caseproto_get_n_widths (proto);
115 if (!check_caseproto (mc, proto, casereader_get_proto (reader),
118 else if (casereader_get_case_cnt (reader) != n_rows)
120 if (casereader_get_case_cnt (reader) == CASENUMBER_MAX
121 && casereader_count_cases (reader) == n_rows)
122 mc_error (mc, "datasheet casereader has unknown case count");
124 mc_error (mc, "casereader row count (%lu) does not match "
126 (unsigned long int) casereader_get_case_cnt (reader),
134 for (row = 0; row < n_rows; row++)
138 c = casereader_read (reader);
141 mc_error (mc, "casereader_read failed reading row %zu of %zu "
142 "(%zu columns)", row, n_rows, n_columns);
146 for (col = 0; col < n_columns; col++)
148 int width = caseproto_get_width (proto, col);
149 if (!value_equal (case_data_idx (c, col), &array[row][col],
153 mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: "
155 row, col, n_rows, n_columns,
156 case_num_idx (c, col), array[row][col].f);
158 mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: "
160 row, col, n_rows, n_columns,
161 width, case_str_idx (c, col),
162 width, value_str (&array[row][col], width));
169 c = casereader_read (reader);
171 mc_error (mc, "casereader has extra cases (expected %zu)", n_rows);
175 /* Checks that datasheet DS contains has N_ROWS rows, N_COLUMNS
176 columns, and the same contents as ARRAY, reporting any
177 mismatches via mc_error. Then, adds DS to MC as a new state. */
179 check_datasheet (struct mc *mc, struct datasheet *ds,
180 union value array[MAX_ROWS][MAX_COLS],
181 size_t n_rows, const struct caseproto *proto)
183 size_t n_columns = caseproto_get_n_widths (proto);
184 struct datasheet *ds2;
185 struct casereader *reader;
186 unsigned long int serial = 0;
188 assert (n_rows < MAX_ROWS);
189 assert (n_columns < MAX_COLS);
191 /* Check contents of datasheet via datasheet functions. */
192 if (!check_caseproto (mc, proto, datasheet_get_proto (ds), "datasheet"))
194 /* check_caseproto emitted errors already. */
196 else if (n_rows != datasheet_get_n_rows (ds))
197 mc_error (mc, "row count (%lu) does not match expected (%zu)",
198 (unsigned long int) datasheet_get_n_rows (ds), n_rows);
202 bool difference = false;
204 for (row = 0; row < n_rows; row++)
205 for (col = 0; col < n_columns; col++)
207 int width = caseproto_get_width (proto, col);
208 union value *av = &array[row][col];
211 value_init (&v, width);
212 if (!datasheet_get_value (ds, row, col, &v))
214 if (!value_equal (&v, av, width))
217 mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: "
218 "%g != %g", row, col, n_rows, n_columns,
221 mc_error (mc, "element %zu,%zu (of %zu,%zu) differs: "
223 row, col, n_rows, n_columns,
224 width, value_str (&v, width),
225 width, value_str (av, width));
228 value_destroy (&v, width);
235 mc_error (mc, "expected:");
237 for (row = 0; row < n_rows; row++)
240 ds_put_format (&s, "row %zu:", row);
241 for (col = 0; col < n_columns; col++)
243 const union value *v = &array[row][col];
244 int width = caseproto_get_width (proto, col);
246 ds_put_format (&s, " %g", v->f);
248 ds_put_format (&s, " '%.*s'", width, value_str (v, width));
250 mc_error (mc, "%s", ds_cstr (&s));
253 mc_error (mc, "actual:");
255 for (row = 0; row < n_rows; row++)
258 ds_put_format (&s, "row %zu:", row);
259 for (col = 0; col < n_columns; col++)
263 if (!datasheet_get_value (ds, row, col, &v))
265 ds_put_format (&s, " %g", v.f);
267 mc_error (mc, "%s", ds_cstr (&s));
274 /* Check that datasheet contents are correct when read through
276 ds2 = clone_datasheet (ds);
277 reader = datasheet_make_reader (ds2);
278 check_datasheet_casereader (mc, reader, array, n_rows, proto);
279 casereader_destroy (reader);
281 /* Check that datasheet contents are correct when read through
282 casereader with lazy_casereader wrapped around it. This is
283 valuable because otherwise there is no non-GUI code that
284 uses the lazy_casereader. */
285 ds2 = clone_datasheet (ds);
286 reader = lazy_casereader_create (datasheet_get_proto (ds2), n_rows,
287 lazy_callback, ds2, &serial);
288 check_datasheet_casereader (mc, reader, array, n_rows, proto);
289 if (lazy_casereader_destroy (reader, serial))
291 /* Lazy casereader was never instantiated. This will
292 only happen if there are no rows (because in that case
293 casereader_read never gets called). */
294 datasheet_destroy (ds2);
296 mc_error (mc, "lazy casereader not instantiated, but should "
297 "have been (size %zu,%zu)", n_rows, n_columns);
301 /* Lazy casereader was instantiated. This is the common
302 case, in which some casereader operation
303 (casereader_read in this case) was performed on the
305 casereader_destroy (reader);
307 mc_error (mc, "lazy casereader instantiated, but should not "
308 "have been (size %zu,%zu)", n_rows, n_columns);
311 if (mc_discard_dup_state (mc, hash_datasheet (ds)))
312 datasheet_destroy (ds);
314 mc_add_state (mc, ds);
317 /* Extracts the contents of DS into DATA. */
319 extract_data (const struct datasheet *ds, union value data[MAX_ROWS][MAX_COLS])
321 const struct caseproto *proto = datasheet_get_proto (ds);
322 size_t n_columns = datasheet_get_n_columns (ds);
323 size_t n_rows = datasheet_get_n_rows (ds);
326 assert (n_rows < MAX_ROWS);
327 assert (n_columns < MAX_COLS);
328 for (row = 0; row < n_rows; row++)
329 for (col = 0; col < n_columns; col++)
331 int width = caseproto_get_width (proto, col);
332 union value *v = &data[row][col];
333 value_init (v, width);
334 if (!datasheet_get_value (ds, row, col, v))
339 /* Copies the contents of ODATA into DATA. Each of the N_ROWS
340 rows of ODATA and DATA must have prototype PROTO. */
342 clone_data (size_t n_rows, const struct caseproto *proto,
343 union value odata[MAX_ROWS][MAX_COLS],
344 union value data[MAX_ROWS][MAX_COLS])
346 size_t n_columns = caseproto_get_n_widths (proto);
349 assert (n_rows < MAX_ROWS);
350 assert (n_columns < MAX_COLS);
351 for (row = 0; row < n_rows; row++)
352 for (col = 0; col < n_columns; col++)
354 int width = caseproto_get_width (proto, col);
355 const union value *ov = &odata[row][col];
356 union value *v = &data[row][col];
357 value_init (v, width);
358 value_copy (v, ov, width);
363 release_data (size_t n_rows, const struct caseproto *proto,
364 union value data[MAX_ROWS][MAX_COLS])
366 size_t n_columns = caseproto_get_n_widths (proto);
369 assert (n_rows < MAX_ROWS);
370 assert (n_columns < MAX_COLS);
371 for (col = 0; col < n_columns; col++)
373 int width = caseproto_get_width (proto, col);
374 if (value_needs_init (width))
375 for (row = 0; row < n_rows; row++)
376 value_destroy (&data[row][col], width);
380 /* Clones the structure and contents of ODS into *DS,
381 and the contents of ODATA into DATA. */
383 clone_model (const struct datasheet *ods,
384 union value odata[MAX_ROWS][MAX_COLS],
385 struct datasheet **ds,
386 union value data[MAX_ROWS][MAX_COLS])
388 *ds = clone_datasheet (ods);
389 clone_data (datasheet_get_n_rows (ods), datasheet_get_proto (ods),
393 /* "init" function for struct mc_class. */
395 datasheet_mc_init (struct mc *mc)
397 struct datasheet_test_params *params = mc_get_aux (mc);
398 struct datasheet *ds;
400 if (params->backing_rows == 0 && params->backing_cols == 0)
402 /* Create unbacked datasheet. */
403 ds = datasheet_create (NULL);
404 mc_name_operation (mc, "empty datasheet");
405 check_datasheet (mc, ds, NULL, 0, caseproto_create ());
409 /* Create datasheet with backing. */
410 struct casewriter *writer;
411 struct casereader *reader;
412 union value data[MAX_ROWS][MAX_COLS];
413 struct caseproto *proto;
416 assert (params->backing_rows > 0 && params->backing_rows <= MAX_ROWS);
417 assert (params->backing_cols > 0 && params->backing_cols <= MAX_COLS);
419 /* XXX support different backing column widths */
420 proto = caseproto_create ();
421 for (col = 0; col < params->backing_cols; col++)
422 proto = caseproto_add_width (proto, 0);
424 writer = mem_writer_create (proto);
425 for (row = 0; row < params->backing_rows; row++)
429 c = case_create (proto);
430 for (col = 0; col < params->backing_cols; col++)
432 double value = params->next_value++;
433 data[row][col].f = value;
434 case_data_rw_idx (c, col)->f = value;
436 casewriter_write (writer, c);
438 caseproto_unref (proto);
440 reader = casewriter_make_reader (writer);
441 assert (reader != NULL);
443 ds = datasheet_create (reader);
444 mc_name_operation (mc, "datasheet with (%d,%d) backing",
445 params->backing_rows, params->backing_cols);
446 check_datasheet (mc, ds, data,
447 params->backing_rows, proto);
452 value_from_param (union value *value, int width, int idx)
458 unsigned int hash = hash_int (idx, 0);
459 char *string = value_str_rw (value, width);
463 for (offset = 0; offset < width; offset++)
464 string[offset] = "ABCDEFGHIJ"[(hash >> offset) % 10];
468 /* "mutate" function for struct mc_class. */
470 datasheet_mc_mutate (struct mc *mc, const void *ods_)
472 struct datasheet_test_params *params = mc_get_aux (mc);
474 const struct datasheet *ods = ods_;
475 union value odata[MAX_ROWS][MAX_COLS];
476 union value data[MAX_ROWS][MAX_COLS];
477 const struct caseproto *oproto = datasheet_get_proto (ods);
478 size_t n_columns = datasheet_get_n_columns (ods);
479 size_t n_rows = datasheet_get_n_rows (ods);
480 size_t pos, new_pos, cnt, width_idx;
482 extract_data (ods, odata);
484 /* Insert a column in each possible position. */
485 if (n_columns < params->max_cols)
486 for (pos = 0; pos <= n_columns; pos++)
487 for (width_idx = 0; width_idx < params->n_widths; width_idx++)
488 if (mc_include_state (mc))
490 int width = params->widths[width_idx];
491 struct caseproto *proto;
492 struct datasheet *ds;
496 mc_name_operation (mc, "insert column at %zu "
497 "(from %zu to %zu columns)",
498 pos, n_columns, n_columns + 1);
499 clone_model (ods, odata, &ds, data);
501 value_init (&new, width);
502 value_from_param (&new, width, params->next_value++);
503 if (!datasheet_insert_column (ds, &new, width, pos))
504 mc_error (mc, "datasheet_insert_column failed");
505 proto = caseproto_insert_width (caseproto_ref (oproto),
508 for (i = 0; i < n_rows; i++)
510 insert_element (&data[i][0], n_columns, sizeof data[i][0],
512 value_init (&data[i][pos], width);
513 value_copy (&data[i][pos], &new, width);
515 value_destroy (&new, width);
517 check_datasheet (mc, ds, data, n_rows, proto);
518 release_data (n_rows, proto, data);
519 caseproto_unref (proto);
522 /* Delete all possible numbers of columns from all possible
524 for (pos = 0; pos < n_columns; pos++)
525 for (cnt = 1; cnt < n_columns - pos; cnt++)
526 if (mc_include_state (mc))
528 struct caseproto *proto;
529 struct datasheet *ds;
532 mc_name_operation (mc, "delete %zu columns at %zu "
533 "(from %zu to %zu columns)",
534 cnt, pos, n_columns, n_columns - cnt);
535 clone_model (ods, odata, &ds, data);
537 datasheet_delete_columns (ds, pos, cnt);
538 proto = caseproto_remove_widths (caseproto_ref (oproto), pos, cnt);
540 for (i = 0; i < n_rows; i++)
542 for (j = pos; j < pos + cnt; j++)
543 value_destroy (&data[i][j], caseproto_get_width (oproto, j));
544 remove_range (&data[i], n_columns, sizeof *data[i], pos, cnt);
547 check_datasheet (mc, ds, data, n_rows, proto);
548 release_data (n_rows, proto, data);
549 caseproto_unref (proto);
552 /* Move all possible numbers of columns from all possible
553 existing positions to all possible new positions. */
554 for (pos = 0; pos < n_columns; pos++)
555 for (cnt = 1; cnt < n_columns - pos; cnt++)
556 for (new_pos = 0; new_pos < n_columns - cnt; new_pos++)
557 if (mc_include_state (mc))
559 struct caseproto *proto;
560 struct datasheet *ds;
563 clone_model (ods, odata, &ds, data);
564 mc_name_operation (mc, "move %zu columns (of %zu) from %zu to %zu",
565 cnt, n_columns, pos, new_pos);
567 datasheet_move_columns (ds, pos, new_pos, cnt);
569 for (i = 0; i < n_rows; i++)
570 move_range (&data[i], n_columns, sizeof data[i][0],
572 proto = caseproto_move_widths (caseproto_ref (oproto),
575 check_datasheet (mc, ds, data, n_rows, proto);
576 release_data (n_rows, proto, data);
577 caseproto_unref (proto);
580 /* Insert all possible numbers of rows in all possible
582 for (pos = 0; pos <= n_rows; pos++)
583 for (cnt = 1; cnt <= params->max_rows - n_rows; cnt++)
584 if (mc_include_state (mc))
586 struct datasheet *ds;
587 struct ccase *c[MAX_ROWS];
590 clone_model (ods, odata, &ds, data);
591 mc_name_operation (mc, "insert %zu rows at %zu "
592 "(from %zu to %zu rows)",
593 cnt, pos, n_rows, n_rows + cnt);
595 for (i = 0; i < cnt; i++)
597 c[i] = case_create (oproto);
598 for (j = 0; j < n_columns; j++)
599 value_from_param (case_data_rw_idx (c[i], j),
600 caseproto_get_width (oproto, j),
601 params->next_value++);
604 insert_range (data, n_rows, sizeof data[pos], pos, cnt);
605 for (i = 0; i < cnt; i++)
606 for (j = 0; j < n_columns; j++)
608 int width = caseproto_get_width (oproto, j);
609 value_init (&data[i + pos][j], width);
610 value_copy (&data[i + pos][j], case_data_idx (c[i], j), width);
613 if (!datasheet_insert_rows (ds, pos, c, cnt))
614 mc_error (mc, "datasheet_insert_rows failed");
616 check_datasheet (mc, ds, data, n_rows + cnt, oproto);
617 release_data (n_rows + cnt, oproto, data);
620 /* Delete all possible numbers of rows from all possible
622 for (pos = 0; pos < n_rows; pos++)
623 for (cnt = 1; cnt < n_rows - pos; cnt++)
624 if (mc_include_state (mc))
626 struct datasheet *ds;
628 clone_model (ods, odata, &ds, data);
629 mc_name_operation (mc, "delete %zu rows at %zu "
630 "(from %zu to %zu rows)",
631 cnt, pos, n_rows, n_rows - cnt);
633 datasheet_delete_rows (ds, pos, cnt);
635 release_data (cnt, oproto, &data[pos]);
636 remove_range (&data[0], n_rows, sizeof data[0], pos, cnt);
638 check_datasheet (mc, ds, data, n_rows - cnt, oproto);
639 release_data (n_rows - cnt, oproto, data);
642 /* Move all possible numbers of rows from all possible existing
643 positions to all possible new positions. */
644 for (pos = 0; pos < n_rows; pos++)
645 for (cnt = 1; cnt < n_rows - pos; cnt++)
646 for (new_pos = 0; new_pos < n_rows - cnt; new_pos++)
647 if (mc_include_state (mc))
649 struct datasheet *ds;
651 clone_model (ods, odata, &ds, data);
652 mc_name_operation (mc, "move %zu rows (of %zu) from %zu to %zu",
653 cnt, n_rows, pos, new_pos);
655 datasheet_move_rows (ds, pos, new_pos, cnt);
657 move_range (&data[0], n_rows, sizeof data[0],
660 check_datasheet (mc, ds, data, n_rows, oproto);
661 release_data (n_rows, oproto, data);
664 release_data (n_rows, oproto, odata);
667 /* "destroy" function for struct mc_class. */
669 datasheet_mc_destroy (const struct mc *mc UNUSED, void *ds_)
671 struct datasheet *ds = ds_;
672 datasheet_destroy (ds);
686 static struct argv_option datasheet_argv_options[N_DATASHEET_OPTIONS] =
688 {"max-rows", 0, required_argument, OPT_MAX_ROWS},
689 {"max-columns", 0, required_argument, OPT_MAX_COLUMNS},
690 {"backing-rows", 0, required_argument, OPT_BACKING_ROWS},
691 {"backing-columns", 0, required_argument, OPT_BACKING_COLUMNS},
692 {"widths", 0, required_argument, OPT_WIDTHS},
693 {"help", 'h', no_argument, OPT_HELP},
696 static void usage (void);
699 datasheet_option_callback (int id, void *params_)
701 struct datasheet_test_params *params = params_;
705 params->max_rows = atoi (optarg);
708 case OPT_MAX_COLUMNS:
709 params->max_cols = atoi (optarg);
712 case OPT_BACKING_ROWS:
713 params->backing_rows = atoi (optarg);
716 case OPT_BACKING_COLUMNS:
717 params->backing_cols = atoi (optarg);
725 params->n_widths = 0;
726 for (w = strtok (optarg, ", "); w != NULL; w = strtok (NULL, ", "))
728 int value = atoi (w);
730 if (params->n_widths >= MAX_WIDTHS)
731 error (1, 0, "Too many widths on --widths (only %d are allowed)",
733 if (!isdigit (w[0]) || value < 0 || value > 31)
734 error (1, 0, "--widths argument must be a list of 1 to %d "
735 "integers between 0 and 31 in increasing order",
738 /* This is an artificial requirement merely to ensure
739 that there are no duplicates. Duplicates aren't a
740 real problem but they would waste time. */
742 error (1, 0, "--widths arguments must be in increasing order");
744 params->widths[params->n_widths++] = value;
746 if (params->n_widths == 0)
747 error (1, 0, "at least one value must be specified on --widths");
763 printf ("%s, for testing the datasheet implementation.\n"
764 "Usage: %s [OPTION]...\n"
765 "\nTest state space parameters (min...max, default):\n"
766 " --max-rows=N Maximum number of rows (0...5, 3)\n"
767 " --max-rows=N Maximum number of columns (0...5, 3)\n"
768 " --backing-rows=N Rows of backing store (0...max_rows, 0)\n"
769 " --backing-columns=N Columns of backing store (0...max_cols, 0)\n"
770 " --widths=W[,W]... Column widths to test, where 0=numeric,\n"
771 " other values are string widths (0,1,11)\n",
772 program_name, program_name);
774 fputs ("\nOther options:\n"
775 " --help Display this help message\n"
776 "\nReport bugs to <bug-gnu-pspp@gnu.org>\n",
782 main (int argc, char *argv[])
784 static const struct mc_class datasheet_mc_class =
788 datasheet_mc_destroy,
791 struct datasheet_test_params params;
792 struct mc_options *options;
793 struct mc_results *results;
794 struct argv_parser *parser;
798 set_program_name (argv[0]);
800 /* Default parameters. */
803 params.backing_rows = 0;
804 params.backing_cols = 0;
805 params.widths[0] = 0;
806 params.widths[1] = 1;
807 params.widths[2] = 11;
809 params.next_value = 1;
811 /* Parse comand line. */
812 parser = argv_parser_create ();
813 options = mc_options_create ();
814 mc_options_register_argv_parser (options, parser);
815 argv_parser_add_options (parser, datasheet_argv_options, N_DATASHEET_OPTIONS,
816 datasheet_option_callback, ¶ms);
817 if (!argv_parser_run (parser, argc, argv))
819 argv_parser_destroy (parser);
820 verbosity = mc_options_get_verbosity (options);
822 /* Force parameters into allowed ranges. */
823 params.max_rows = MIN (params.max_rows, MAX_ROWS);
824 params.max_cols = MIN (params.max_cols, MAX_COLS);
825 params.backing_rows = MIN (params.backing_rows, params.max_rows);
826 params.backing_cols = MIN (params.backing_cols, params.max_cols);
827 mc_options_set_aux (options, ¶ms);
828 results = mc_run (&datasheet_mc_class, options);
830 /* Output results. */
831 success = (mc_results_get_stop_reason (results) != MC_MAX_ERROR_COUNT
832 && mc_results_get_stop_reason (results) != MC_INTERRUPTED);
833 if (verbosity > 0 || !success)
837 printf ("Parameters: "
838 "--max-rows=%d --max-columns=%d "
839 "--backing-rows=%d --backing-columns=%d ",
840 params.max_rows, params.max_cols,
841 params.backing_rows, params.backing_cols);
842 printf ("--widths=");
843 for (i = 0; i < params.n_widths; i++)
847 printf ("%d", params.widths[i]);
850 mc_results_print (results, stdout);
852 mc_results_destroy (results);
854 return success ? 0 : EXIT_FAILURE;