1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2017, 2018 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
24 #include "data/file-handle-def.h"
25 #include "data/settings.h"
26 #include "libpspp/i18n.h"
27 #include "libpspp/message.h"
28 #include "libpspp/string-map.h"
29 #include "libpspp/string-set.h"
30 #include "output/driver.h"
31 #include "output/group-item.h"
32 #include "output/page-setup-item.h"
33 #include "output/pivot-table.h"
34 #include "output/spv/light-binary-parser.h"
35 #include "output/spv/spv-legacy-data.h"
36 #include "output/spv/spv-output.h"
37 #include "output/spv/spv-select.h"
38 #include "output/spv/spv.h"
39 #include "output/table-item.h"
40 #include "output/text-item.h"
42 #include "gl/c-ctype.h"
44 #include "gl/progname.h"
45 #include "gl/version-etc.h"
46 #include "gl/xalloc.h"
48 #include <libxml/tree.h>
49 #include <libxml/xpath.h>
50 #include <libxml/xpathInternals.h>
53 #define _(msgid) gettext (msgid)
55 /* -O key=value: Output driver options. */
56 static struct string_map output_options
57 = STRING_MAP_INITIALIZER (output_options);
59 /* --member-name: Include .zip member name in "dir" output. */
60 static bool show_member_names;
62 /* --show-hidden, --select, --commands, ...: Selection criteria. */
63 static struct spv_criteria *criteria;
64 static size_t n_criteria, allocated_criteria;
66 /* --or: Add new element to 'criteria' array. */
67 static bool new_criteria;
69 /* --sort: Sort members under dump-light-table, to make comparisons easier. */
72 /* --raw: Dump raw binary data in dump-light-table. */
75 /* -f, --force: Keep output file even on error. */
78 /* Number of warnings issued. */
79 static size_t n_warnings;
81 static void usage (void);
82 static void parse_options (int argc, char **argv);
85 dump_item (const struct spv_item *item)
87 if (show_member_names && (item->xml_member || item->bin_member))
89 const char *x = item->xml_member;
90 const char *b = item->bin_member;
92 ? xasprintf (_("%s and %s:"), x, b)
93 : xasprintf ("%s:", x ? x : b));
94 text_item_submit (text_item_create_nocopy (TEXT_ITEM_TITLE, s));
97 switch (spv_item_get_type (item))
99 case SPV_ITEM_HEADING:
103 spv_text_submit (item);
107 pivot_table_submit (pivot_table_ref (spv_item_get_table (item)));
116 case SPV_ITEM_OBJECT:
128 print_item_directory (const struct spv_item *item)
130 for (int i = 1; i < spv_item_get_level (item); i++)
133 enum spv_item_type type = spv_item_get_type (item);
134 printf ("- %s", spv_item_type_to_string (type));
136 const char *label = spv_item_get_label (item);
138 printf (" \"%s\"", label);
140 if (type == SPV_ITEM_TABLE)
142 const struct pivot_table *table = spv_item_get_table (item);
143 char *title = pivot_value_to_string (table->title,
144 SETTINGS_VALUE_SHOW_DEFAULT,
145 SETTINGS_VALUE_SHOW_DEFAULT);
146 if (!label || strcmp (title, label))
147 printf (" title \"%s\"", title);
151 const char *command_id = spv_item_get_command_id (item);
153 printf (" command \"%s\"", command_id);
155 const char *subtype = spv_item_get_subtype (item);
156 if (subtype && (!label || strcmp (label, subtype)))
157 printf (" subtype \"%s\"", subtype);
159 if (!spv_item_is_visible (item))
160 printf (" (hidden)");
161 if (show_member_names && (item->xml_member || item->bin_member))
163 if (item->xml_member && item->bin_member)
164 printf (" in %s and %s", item->xml_member, item->bin_member);
165 else if (item->xml_member)
166 printf (" in %s", item->xml_member);
167 else if (item->bin_member)
168 printf (" in %s", item->bin_member);
174 run_detect (int argc UNUSED, char **argv)
176 char *err = spv_detect (argv[1]);
178 error (1, 0, "%s", err);
182 run_directory (int argc UNUSED, char **argv)
184 struct spv_reader *spv;
185 char *err = spv_open (argv[1], &spv);
187 error (1, 0, "%s", err);
189 struct spv_item **items;
191 spv_select (spv, criteria, n_criteria, &items, &n_items);
192 for (size_t i = 0; i < n_items; i++)
193 print_item_directory (items[i]);
201 const struct spv_item **nodes;
205 const struct spv_item *stub[N_STUB];
209 swap_nodes (const struct spv_item **a, const struct spv_item **b)
211 const struct spv_item *tmp = *a;
217 get_path (const struct spv_item *item, struct item_path *path)
219 size_t allocated = 10;
220 path->nodes = path->stub;
225 if (path->n >= allocated)
227 if (path->nodes == path->stub)
228 path->nodes = xmemdup (path->stub, sizeof path->stub);
229 path->nodes = x2nrealloc (path->nodes, &allocated,
230 sizeof *path->nodes);
232 path->nodes[path->n++] = item;
236 for (size_t i = 0; i < path->n / 2; i++)
237 swap_nodes (&path->nodes[i], &path->nodes[path->n - i - 1]);
241 free_path (struct item_path *path)
243 if (path && path->nodes != path->stub)
248 dump_heading_transition (const struct spv_item *old,
249 const struct spv_item *new)
254 struct item_path old_path, new_path;
255 get_path (old, &old_path);
256 get_path (new, &new_path);
259 for (; common < old_path.n && common < new_path.n; common++)
260 if (old_path.nodes[common] != new_path.nodes[common])
263 for (size_t i = common; i < old_path.n; i++)
264 group_close_item_submit (group_close_item_create ());
265 for (size_t i = common; i < new_path.n; i++)
266 group_open_item_submit (group_open_item_create (
267 new_path.nodes[i]->command_id));
269 free_path (&old_path);
270 free_path (&new_path);
274 run_convert (int argc UNUSED, char **argv)
276 struct spv_reader *spv;
277 char *err = spv_open (argv[1], &spv);
279 error (1, 0, "%s", err);
281 output_engine_push ();
282 output_set_filename (argv[1]);
283 string_map_replace (&output_options, "output-file", argv[2]);
284 struct output_driver *driver = output_driver_create (&output_options);
287 output_driver_register (driver);
289 const struct page_setup *ps = spv_get_page_setup (spv);
291 page_setup_item_submit (page_setup_item_create (ps));
293 struct spv_item **items;
295 spv_select (spv, criteria, n_criteria, &items, &n_items);
296 struct spv_item *prev_heading = spv_get_root (spv);
297 for (size_t i = 0; i < n_items; i++)
299 struct spv_item *heading
300 = items[i]->type == SPV_ITEM_HEADING ? items[i] : items[i]->parent;
301 dump_heading_transition (prev_heading, heading);
302 dump_item (items[i]);
303 prev_heading = heading;
305 dump_heading_transition (prev_heading, spv_get_root (spv));
310 output_engine_pop ();
313 if (n_warnings && !force)
315 /* XXX There could be other files to unlink, e.g. the ascii driver can
316 produce additional files with the charts. */
322 run_dump (int argc UNUSED, char **argv)
324 struct spv_reader *spv;
325 char *err = spv_open (argv[1], &spv);
327 error (1, 0, "%s", err);
329 struct spv_item **items;
331 spv_select (spv, criteria, n_criteria, &items, &n_items);
332 for (size_t i = 0; i < n_items; i++)
333 if (items[i]->type == SPV_ITEM_TABLE)
335 pivot_table_dump (spv_item_get_table (items[i]), 0);
344 compare_borders (const void *a_, const void *b_)
346 const struct spvlb_border *const *ap = a_;
347 const struct spvlb_border *const *bp = b_;
348 uint32_t a = (*ap)->border_type;
349 uint32_t b = (*bp)->border_type;
351 return a < b ? -1 : a > b;
355 compare_cells (const void *a_, const void *b_)
357 const struct spvlb_cell *const *ap = a_;
358 const struct spvlb_cell *const *bp = b_;
359 uint64_t a = (*ap)->index;
360 uint64_t b = (*bp)->index;
362 return a < b ? -1 : a > b;
366 run_dump_light_table (int argc UNUSED, char **argv)
368 if (raw && isatty (STDOUT_FILENO))
369 error (1, 0, "not writing binary data to tty");
371 struct spv_reader *spv;
372 char *err = spv_open (argv[1], &spv);
374 error (1, 0, "%s", err);
376 struct spv_item **items;
378 spv_select (spv, criteria, n_criteria, &items, &n_items);
379 for (size_t i = 0; i < n_items; i++)
381 if (!spv_item_is_light_table (items[i]))
389 error = spv_item_get_raw_light_table (items[i], &data, &size);
392 fwrite (data, size, 1, stdout);
398 struct spvlb_table *table;
399 error = spv_item_get_light_table (items[i], &table);
404 qsort (table->borders->borders, table->borders->n_borders,
405 sizeof *table->borders->borders, compare_borders);
406 qsort (table->cells->cells, table->cells->n_cells,
407 sizeof *table->cells->cells, compare_cells);
409 spvlb_print_table (items[i]->bin_member, 0, table);
410 spvlb_free_table (table);
415 msg (ME, "%s", error);
426 run_dump_legacy_data (int argc UNUSED, char **argv)
428 struct spv_reader *spv;
429 char *err = spv_open (argv[1], &spv);
431 error (1, 0, "%s", err);
433 struct spv_item **items;
435 spv_select (spv, criteria, n_criteria, &items, &n_items);
436 for (size_t i = 0; i < n_items; i++)
437 if (spv_item_is_legacy_table (items[i]))
439 struct spv_data data;
445 error = spv_item_get_raw_legacy_data (items[i], &data, &size);
448 fwrite (data, size, 1, stdout);
454 error = spv_item_get_legacy_data (items[i], &data);
457 printf ("%s:\n", items[i]->bin_member);
458 spv_data_dump (&data, stdout);
459 spv_data_uninit (&data);
466 msg (ME, "%s", error);
475 /* This is really bogus.
477 XPath doesn't have any notion of a default XML namespace, but all of the
478 elements in the documents we're interested in have a namespace. Thus, we'd
479 need to require the XPath expressions to have a namespace on every single
480 element: vis:sourceVariable, vis:graph, and so on. That's a pain. So,
481 instead, we remove the default namespace from everyplace it occurs. XPath
482 does support the null namespace, so this allows sourceVariable, graph,
485 See http://plasmasturm.org/log/259/ and
486 https://mail.gnome.org/archives/xml/2003-April/msg00144.html for more
489 remove_default_xml_namespace (xmlNode *node)
491 if (node->ns && !node->ns->prefix)
494 for (xmlNode *child = node->children; child; child = child->next)
495 remove_default_xml_namespace (child);
499 register_ns (xmlXPathContext *ctx, const char *prefix, const char *uri)
501 xmlXPathRegisterNs (ctx, CHAR_CAST (xmlChar *, prefix),
502 CHAR_CAST (xmlChar *, uri));
505 static xmlXPathContext *
506 create_xpath_context (xmlDoc *doc)
508 xmlXPathContext *ctx = xmlXPathNewContext (doc);
509 register_ns (ctx, "vgr", "http://xml.spss.com/spss/viewer/viewer-graph");
510 register_ns (ctx, "vizml", "http://xml.spss.com/visualization");
511 register_ns (ctx, "vmd", "http://xml.spss.com/spss/viewer/viewer-model");
512 register_ns (ctx, "vps", "http://xml.spss.com/spss/viewer/viewer-pagesetup");
513 register_ns (ctx, "vst", "http://xml.spss.com/spss/viewer/viewer-style");
514 register_ns (ctx, "vtb", "http://xml.spss.com/spss/viewer/viewer-table");
515 register_ns (ctx, "vtl", "http://xml.spss.com/spss/viewer/table-looks");
516 register_ns (ctx, "vtt", "http://xml.spss.com/spss/viewer/viewer-treemodel");
517 register_ns (ctx, "vtx", "http://xml.spss.com/spss/viewer/viewer-text");
518 register_ns (ctx, "xsi", "http://www.w3.org/2001/XMLSchema-instance");
523 dump_xml (int argc, char **argv, const char *member_name,
524 char *error_s, xmlDoc *doc)
530 printf ("<!-- %s -->\n", member_name);
531 xmlElemDump (stdout, NULL, xmlDocGetRootElement (doc));
536 bool any_results = false;
538 remove_default_xml_namespace (xmlDocGetRootElement (doc));
539 for (int i = 2; i < argc; i++)
541 xmlXPathContext *xpath_ctx = create_xpath_context (doc);
542 xmlXPathSetContextNode (xmlDocGetRootElement (doc),
544 xmlXPathObject *xpath_obj = xmlXPathEvalExpression(
545 CHAR_CAST (xmlChar *, argv[i]), xpath_ctx);
547 error (1, 0, _("%s: invalid XPath expression"), argv[i]);
549 const xmlNodeSet *nodes = xpath_obj->nodesetval;
550 if (nodes && nodes->nodeNr > 0)
554 printf ("<!-- %s -->\n", member_name);
557 for (size_t j = 0; j < nodes->nodeNr; j++)
559 xmlElemDump (stdout, doc, nodes->nodeTab[j]);
564 xmlXPathFreeObject (xpath_obj);
565 xmlXPathFreeContext (xpath_ctx);
574 printf ("<!-- %s -->\n", member_name);
575 msg (ME, "%s", error_s);
581 run_dump_legacy_table (int argc, char **argv)
583 struct spv_reader *spv;
584 char *err = spv_open (argv[1], &spv);
586 error (1, 0, "%s", err);
588 struct spv_item **items;
590 spv_select (spv, criteria, n_criteria, &items, &n_items);
591 for (size_t i = 0; i < n_items; i++)
592 if (spv_item_is_legacy_table (items[i]))
595 char *error_s = spv_item_get_legacy_table (items[i], &doc);
596 dump_xml (argc, argv, items[i]->xml_member, error_s, doc);
604 run_dump_structure (int argc, char **argv)
606 struct spv_reader *spv;
607 char *err = spv_open (argv[1], &spv);
609 error (1, 0, "%s", err);
611 struct spv_item **items;
613 spv_select (spv, criteria, n_criteria, &items, &n_items);
614 const char *last_structure_member = NULL;
615 for (size_t i = 0; i < n_items; i++)
616 if (!last_structure_member || strcmp (items[i]->structure_member,
617 last_structure_member))
619 last_structure_member = items[i]->structure_member;
622 char *error_s = spv_item_get_structure (items[i], &doc);
623 dump_xml (argc, argv, items[i]->structure_member, error_s, doc);
631 run_is_legacy (int argc UNUSED, char **argv)
633 struct spv_reader *spv;
634 char *err = spv_open (argv[1], &spv);
636 error (1, 0, "%s", err);
638 bool is_legacy = false;
640 struct spv_item **items;
642 spv_select (spv, criteria, n_criteria, &items, &n_items);
643 for (size_t i = 0; i < n_items; i++)
644 if (spv_item_is_legacy_table (items[i]))
653 exit (is_legacy ? EXIT_SUCCESS : EXIT_FAILURE);
659 int min_args, max_args;
660 void (*run) (int argc, char **argv);
663 static const struct command commands[] =
665 { "detect", 1, 1, run_detect },
666 { "dir", 1, 1, run_directory },
667 { "convert", 2, 2, run_convert },
669 /* Undocumented commands. */
670 { "dump", 1, 1, run_dump },
671 { "dump-light-table", 1, 1, run_dump_light_table },
672 { "dump-legacy-data", 1, 1, run_dump_legacy_data },
673 { "dump-legacy-table", 1, INT_MAX, run_dump_legacy_table },
674 { "dump-structure", 1, INT_MAX, run_dump_structure },
675 { "is-legacy", 1, 1, run_is_legacy },
677 static const int n_commands = sizeof commands / sizeof *commands;
679 static const struct command *
680 find_command (const char *name)
682 for (size_t i = 0; i < n_commands; i++)
684 const struct command *c = &commands[i];
685 if (!strcmp (name, c->name))
692 emit_msg (const struct msg *m, void *aux UNUSED)
694 if (m->severity == MSG_S_ERROR || m->severity == MSG_S_WARNING)
697 char *s = msg_to_string (m);
698 fprintf (stderr, "%s\n", s);
703 main (int argc, char **argv)
705 set_program_name (argv[0]);
706 msg_set_handler (emit_msg, NULL);
710 parse_options (argc, argv);
716 error (1, 0, _("missing command name (use --help for help)"));
718 const struct command *c = find_command (argv[0]);
720 error (1, 0, _("unknown command \"%s\" (use --help for help)"), argv[0]);
722 int n_args = argc - 1;
723 if (n_args < c->min_args || n_args > c->max_args)
725 if (c->min_args == c->max_args)
728 ngettext ("\"%s\" command takes exactly %d argument",
729 "\"%s\" command takes exactly %d arguments",
730 c->min_args), c->name, c->min_args);
732 else if (c->max_args == INT_MAX)
735 ngettext ("\"%s\" command requires at least %d argument",
736 "\"%s\" command requires at least %d arguments",
737 c->min_args), c->name, c->min_args);
742 _("\"%s\" command requires between %d and %d arguments"),
743 c->name, c->min_args, c->max_args);
751 return n_warnings ? EXIT_FAILURE : EXIT_SUCCESS;
754 static struct spv_criteria *
757 if (!n_criteria || new_criteria)
759 new_criteria = false;
760 if (n_criteria >= allocated_criteria)
761 criteria = x2nrealloc (criteria, &allocated_criteria,
763 criteria[n_criteria++] = (struct spv_criteria) SPV_CRITERIA_INITIALIZER;
766 return &criteria[n_criteria - 1];
770 parse_select (char *arg)
772 bool invert = arg[0] == '^';
775 unsigned classes = 0;
776 for (char *token = strtok (arg, ","); token; token = strtok (NULL, ","))
778 if (!strcmp (arg, "all"))
779 classes = SPV_ALL_CLASSES;
780 else if (!strcmp (arg, "help"))
782 puts (_("The following object classes are supported:"));
783 for (int class = 0; class < SPV_N_CLASSES; class++)
784 printf ("- %s\n", spv_item_class_to_string (class));
789 int class = spv_item_class_from_string (token);
790 if (class == SPV_N_CLASSES)
791 error (1, 0, _("%s: unknown object class (use --select=help "
793 classes |= 1u << class;
797 struct spv_criteria *c = get_criteria ();
798 c->classes = invert ? classes ^ SPV_ALL_CLASSES : classes;
801 static struct spv_criteria_match *
802 get_criteria_match (const char **arg)
804 struct spv_criteria *c = get_criteria ();
805 if ((*arg)[0] == '^')
815 parse_commands (const char *arg)
817 struct spv_criteria_match *cm = get_criteria_match (&arg);
818 string_array_parse (&cm->commands, ss_cstr (arg), ss_cstr (","));
822 parse_subtypes (const char *arg)
824 struct spv_criteria_match *cm = get_criteria_match (&arg);
825 string_array_parse (&cm->subtypes, ss_cstr (arg), ss_cstr (","));
829 parse_labels (const char *arg)
831 struct spv_criteria_match *cm = get_criteria_match (&arg);
832 string_array_parse (&cm->labels, ss_cstr (arg), ss_cstr (","));
836 parse_instances (char *arg)
838 struct spv_criteria *c = get_criteria ();
839 size_t allocated_instances = c->n_instances;
841 for (char *token = strtok (arg, ","); token; token = strtok (NULL, ","))
843 if (c->n_instances >= allocated_instances)
844 c->instances = x2nrealloc (c->instances, &allocated_instances,
845 sizeof *c->instances);
847 c->instances[c->n_instances++] = (!strcmp (token, "last") ? -1
853 parse_members (const char *arg)
855 struct spv_criteria *cm = get_criteria ();
856 string_array_parse (&cm->members, ss_cstr (arg), ss_cstr (","));
860 parse_options (int argc, char *argv[])
866 OPT_MEMBER_NAMES = UCHAR_MAX + 1,
879 static const struct option long_options[] =
881 /* Input selection options. */
882 { "show-hidden", no_argument, NULL, OPT_SHOW_HIDDEN },
883 { "select", required_argument, NULL, OPT_SELECT },
884 { "commands", required_argument, NULL, OPT_COMMANDS },
885 { "subtypes", required_argument, NULL, OPT_SUBTYPES },
886 { "labels", required_argument, NULL, OPT_LABELS },
887 { "instances", required_argument, NULL, OPT_INSTANCES },
888 { "members", required_argument, NULL, OPT_MEMBERS },
889 { "errors", no_argument, NULL, OPT_ERRORS },
890 { "or", no_argument, NULL, OPT_OR },
892 /* "dir" command options. */
893 { "member-names", no_argument, NULL, OPT_MEMBER_NAMES },
895 /* "convert" command options. */
896 { "force", no_argument, NULL, 'f' },
898 /* "dump-light-table" command options. */
899 { "sort", no_argument, NULL, OPT_SORT },
900 { "raw", no_argument, NULL, OPT_RAW },
902 { "help", no_argument, NULL, 'h' },
903 { "version", no_argument, NULL, 'v' },
905 { NULL, 0, NULL, 0 },
910 c = getopt_long (argc, argv, "O:hvf", long_options, NULL);
917 output_driver_parse_option (optarg, &output_options);
920 case OPT_MEMBER_NAMES:
921 show_member_names = true;
924 case OPT_SHOW_HIDDEN:
925 get_criteria ()->include_hidden = true;
929 parse_select (optarg);
933 parse_commands (optarg);
937 parse_subtypes (optarg);
941 parse_labels (optarg);
945 parse_instances (optarg);
949 parse_members (optarg);
953 get_criteria ()->error = true;
973 version_etc (stdout, "pspp-output", PACKAGE_NAME, PACKAGE_VERSION,
974 "Ben Pfaff", "John Darrington", NULL_SENTINEL);
990 struct string s = DS_EMPTY_INITIALIZER;
991 struct string_set formats = STRING_SET_INITIALIZER(formats);
992 output_get_supported_formats (&formats);
994 const struct string_set_node *node;
995 STRING_SET_FOR_EACH (format, node, &formats)
997 if (!ds_is_empty (&s))
998 ds_put_byte (&s, ' ');
999 ds_put_cstr (&s, format);
1001 string_set_destroy (&formats);
1004 %s, a utility for working with SPSS viewer (.spv) files.\n\
1005 Usage: %s [OPTION]... COMMAND ARG...\n\
1007 The following commands are available:\n\
1008 detect FILE Detect whether FILE is an SPV file.\n\
1009 dir FILE List tables and other items in FILE.\n\
1010 convert SOURCE DEST Convert .spv SOURCE to DEST.\n\
1012 Input selection options for \"dir\" and \"convert\":\n\
1013 --select=CLASS... include only some kinds of objects\n\
1014 --select=help print known object classes\n\
1015 --commands=COMMAND... include only specified COMMANDs\n\
1016 --subtypes=SUBTYPE... include only specified SUBTYPEs of output\n\
1017 --labels=LABEL... include only output objects with the given LABELs\n\
1018 --instances=INSTANCE... include only the given object INSTANCEs\n\
1019 --show-hidden include hidden output objects\n\
1020 --or separate two sets of selection options\n\
1022 \"convert\" by default infers the destination's format from its extension.\n\
1023 The known extensions are: %s\n\
1024 The following options override \"convert\" behavior:\n\
1025 -O format=FORMAT set destination format to FORMAT\n\
1026 -O OPTION=VALUE set output option\n\
1027 -f, --force keep output file even given errors\n\
1029 --help display this help and exit\n\
1030 --version output version information and exit\n",
1031 program_name, program_name, ds_cstr (&s));