1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2017, 2018 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
24 #include "data/file-handle-def.h"
25 #include "data/settings.h"
26 #include "libpspp/i18n.h"
27 #include "libpspp/message.h"
28 #include "libpspp/string-map.h"
29 #include "libpspp/string-set.h"
30 #include "output/driver.h"
31 #include "output/group-item.h"
32 #include "output/page-setup-item.h"
33 #include "output/pivot-table.h"
34 #include "output/spv/light-binary-parser.h"
35 #include "output/spv/spv-legacy-data.h"
36 #include "output/spv/spv-output.h"
37 #include "output/spv/spv-select.h"
38 #include "output/spv/spv.h"
39 #include "output/table-item.h"
40 #include "output/text-item.h"
42 #include "gl/c-ctype.h"
44 #include "gl/progname.h"
45 #include "gl/version-etc.h"
46 #include "gl/xalloc.h"
48 #include <libxml/tree.h>
49 #include <libxml/xpath.h>
50 #include <libxml/xpathInternals.h>
53 #define _(msgid) gettext (msgid)
55 /* -O key=value: Output driver options. */
56 static struct string_map output_options
57 = STRING_MAP_INITIALIZER (output_options);
59 /* --member-name: Include .zip member name in "dir" output. */
60 static bool show_member_names;
62 /* --show-hidden, --select, --commands, ...: Selection criteria. */
63 static struct spv_criteria *criteria;
64 static size_t n_criteria, allocated_criteria;
66 /* --or: Add new element to 'criteria' array. */
67 static bool new_criteria;
69 /* --sort: Sort members under dump-light-table, to make comparisons easier. */
72 /* --raw: Dump raw binary data in dump-light-table. */
75 /* -f, --force: Keep output file even on error. */
78 /* Number of warnings issued. */
79 static size_t n_warnings;
81 static void usage (void);
82 static void parse_options (int argc, char **argv);
85 dump_item (const struct spv_item *item)
87 if (show_member_names && (item->xml_member || item->bin_member))
89 const char *x = item->xml_member;
90 const char *b = item->bin_member;
92 ? xasprintf (_("%s and %s:"), x, b)
93 : xasprintf ("%s:", x ? x : b));
94 text_item_submit (text_item_create_nocopy (TEXT_ITEM_TITLE, s));
97 switch (spv_item_get_type (item))
99 case SPV_ITEM_HEADING:
103 spv_text_submit (item);
107 pivot_table_submit (pivot_table_ref (spv_item_get_table (item)));
116 case SPV_ITEM_OBJECT:
128 print_item_directory (const struct spv_item *item)
130 for (int i = 1; i < spv_item_get_level (item); i++)
133 enum spv_item_type type = spv_item_get_type (item);
134 printf ("- %s", spv_item_type_to_string (type));
136 const char *label = spv_item_get_label (item);
138 printf (" \"%s\"", label);
140 if (type == SPV_ITEM_TABLE)
142 const struct pivot_table *table = spv_item_get_table (item);
143 char *title = pivot_value_to_string (table->title,
144 SETTINGS_VALUE_SHOW_DEFAULT,
145 SETTINGS_VALUE_SHOW_DEFAULT);
146 if (!label || strcmp (title, label))
147 printf (" title \"%s\"", title);
151 const char *command_id = spv_item_get_command_id (item);
153 printf (" command \"%s\"", command_id);
155 const char *subtype = spv_item_get_subtype (item);
156 if (subtype && (!label || strcmp (label, subtype)))
157 printf (" subtype \"%s\"", subtype);
159 if (!spv_item_is_visible (item))
160 printf (" (hidden)");
161 if (show_member_names && (item->xml_member || item->bin_member))
163 if (item->xml_member && item->bin_member)
164 printf (" in %s and %s", item->xml_member, item->bin_member);
165 else if (item->xml_member)
166 printf (" in %s", item->xml_member);
167 else if (item->bin_member)
168 printf (" in %s", item->bin_member);
174 run_detect (int argc UNUSED, char **argv)
176 char *err = spv_detect (argv[1]);
178 error (1, 0, "%s", err);
182 run_directory (int argc UNUSED, char **argv)
184 struct spv_reader *spv;
185 char *err = spv_open (argv[1], &spv);
187 error (1, 0, "%s", err);
189 struct spv_item **items;
191 spv_select (spv, criteria, n_criteria, &items, &n_items);
192 for (size_t i = 0; i < n_items; i++)
193 print_item_directory (items[i]);
201 const struct spv_item **nodes;
205 const struct spv_item *stub[N_STUB];
209 swap_nodes (const struct spv_item **a, const struct spv_item **b)
211 const struct spv_item *tmp = *a;
217 get_path (const struct spv_item *item, struct item_path *path)
219 size_t allocated = 10;
220 path->nodes = path->stub;
225 if (path->n >= allocated)
227 if (path->nodes == path->stub)
228 path->nodes = xmemdup (path->stub, sizeof path->stub);
229 path->nodes = x2nrealloc (path->nodes, &allocated,
230 sizeof *path->nodes);
232 path->nodes[path->n++] = item;
236 for (size_t i = 0; i < path->n / 2; i++)
237 swap_nodes (&path->nodes[i], &path->nodes[path->n - i - 1]);
241 free_path (struct item_path *path)
243 if (path && path->nodes != path->stub)
248 dump_heading_transition (const struct spv_item *old,
249 const struct spv_item *new)
254 struct item_path old_path, new_path;
255 get_path (old, &old_path);
256 get_path (new, &new_path);
259 for (; common < old_path.n && common < new_path.n; common++)
260 if (old_path.nodes[common] != new_path.nodes[common])
263 for (size_t i = common; i < old_path.n; i++)
264 group_close_item_submit (group_close_item_create ());
265 for (size_t i = common; i < new_path.n; i++)
266 group_open_item_submit (group_open_item_create (
267 new_path.nodes[i]->command_id));
269 free_path (&old_path);
270 free_path (&new_path);
274 run_convert (int argc UNUSED, char **argv)
276 struct spv_reader *spv;
277 char *err = spv_open (argv[1], &spv);
279 error (1, 0, "%s", err);
281 output_engine_push ();
282 output_set_filename (argv[1]);
283 string_map_replace (&output_options, "output-file", argv[2]);
284 struct output_driver *driver = output_driver_create (&output_options);
287 output_driver_register (driver);
289 const struct page_setup *ps = spv_get_page_setup (spv);
291 page_setup_item_submit (page_setup_item_create (ps));
293 struct spv_item **items;
295 spv_select (spv, criteria, n_criteria, &items, &n_items);
296 struct spv_item *prev_heading = spv_get_root (spv);
297 for (size_t i = 0; i < n_items; i++)
299 struct spv_item *heading
300 = items[i]->type == SPV_ITEM_HEADING ? items[i] : items[i]->parent;
301 dump_heading_transition (prev_heading, heading);
302 dump_item (items[i]);
303 prev_heading = heading;
305 dump_heading_transition (prev_heading, spv_get_root (spv));
310 output_engine_pop ();
313 if (n_warnings && !force)
315 /* XXX There could be other files to unlink, e.g. the ascii driver can
316 produce additional files with the charts. */
322 run_dump (int argc UNUSED, char **argv)
324 struct spv_reader *spv;
325 char *err = spv_open (argv[1], &spv);
327 error (1, 0, "%s", err);
329 struct spv_item **items;
331 spv_select (spv, criteria, n_criteria, &items, &n_items);
332 for (size_t i = 0; i < n_items; i++)
333 if (items[i]->type == SPV_ITEM_TABLE)
335 pivot_table_dump (spv_item_get_table (items[i]), 0);
344 compare_borders (const void *a_, const void *b_)
346 const struct spvlb_border *const *ap = a_;
347 const struct spvlb_border *const *bp = b_;
348 uint32_t a = (*ap)->border_type;
349 uint32_t b = (*bp)->border_type;
351 return a < b ? -1 : a > b;
355 compare_cells (const void *a_, const void *b_)
357 const struct spvlb_cell *const *ap = a_;
358 const struct spvlb_cell *const *bp = b_;
359 uint64_t a = (*ap)->index;
360 uint64_t b = (*bp)->index;
362 return a < b ? -1 : a > b;
366 run_dump_light_table (int argc UNUSED, char **argv)
368 if (raw && isatty (STDOUT_FILENO))
369 error (1, 0, "not writing binary data to tty");
371 struct spv_reader *spv;
372 char *err = spv_open (argv[1], &spv);
374 error (1, 0, "%s", err);
376 struct spv_item **items;
378 spv_select (spv, criteria, n_criteria, &items, &n_items);
379 for (size_t i = 0; i < n_items; i++)
381 if (!spv_item_is_light_table (items[i]))
389 error = spv_item_get_raw_light_table (items[i], &data, &size);
392 fwrite (data, size, 1, stdout);
398 struct spvlb_table *table;
399 error = spv_item_get_light_table (items[i], &table);
404 qsort (table->borders->borders, table->borders->n_borders,
405 sizeof *table->borders->borders, compare_borders);
406 qsort (table->cells->cells, table->cells->n_cells,
407 sizeof *table->cells->cells, compare_cells);
409 spvlb_print_table (items[i]->bin_member, 0, table);
410 spvlb_free_table (table);
415 msg (ME, "%s", error);
426 run_dump_legacy_data (int argc UNUSED, char **argv)
428 struct spv_reader *spv;
429 char *err = spv_open (argv[1], &spv);
431 error (1, 0, "%s", err);
433 struct spv_item **items;
435 spv_select (spv, criteria, n_criteria, &items, &n_items);
436 for (size_t i = 0; i < n_items; i++)
437 if (spv_item_is_legacy_table (items[i]))
439 struct spv_data data;
445 error = spv_item_get_raw_legacy_data (items[i], &data, &size);
448 fwrite (data, size, 1, stdout);
454 error = spv_item_get_legacy_data (items[i], &data);
457 printf ("%s:\n", items[i]->bin_member);
458 spv_data_dump (&data, stdout);
459 spv_data_uninit (&data);
466 msg (ME, "%s", error);
475 /* This is really bogus.
477 XPath doesn't have any notion of a default XML namespace, but all of the
478 elements in the documents we're interested in have a namespace. Thus, we'd
479 need to require the XPath expressions to have a namespace on every single
480 element: vis:sourceVariable, vis:graph, and so on. That's a pain. So,
481 instead, we remove the default namespace from everyplace it occurs. XPath
482 does support the null namespace, so this allows sourceVariable, graph,
485 See http://plasmasturm.org/log/259/ and
486 https://mail.gnome.org/archives/xml/2003-April/msg00144.html for more
489 remove_default_xml_namespace (xmlNode *node)
491 if (node->ns && !node->ns->prefix)
494 for (xmlNode *child = node->children; child; child = child->next)
495 remove_default_xml_namespace (child);
499 register_ns (xmlXPathContext *ctx, const char *prefix, const char *uri)
501 xmlXPathRegisterNs (ctx, CHAR_CAST (xmlChar *, prefix),
502 CHAR_CAST (xmlChar *, uri));
505 static xmlXPathContext *
506 create_xpath_context (xmlDoc *doc)
508 xmlXPathContext *ctx = xmlXPathNewContext (doc);
509 register_ns (ctx, "vgr", "http://xml.spss.com/spss/viewer/viewer-graph");
510 register_ns (ctx, "vizml", "http://xml.spss.com/visualization");
511 register_ns (ctx, "vmd", "http://xml.spss.com/spss/viewer/viewer-model");
512 register_ns (ctx, "vps", "http://xml.spss.com/spss/viewer/viewer-pagesetup");
513 register_ns (ctx, "vst", "http://xml.spss.com/spss/viewer/viewer-style");
514 register_ns (ctx, "vtb", "http://xml.spss.com/spss/viewer/viewer-table");
515 register_ns (ctx, "vtl", "http://xml.spss.com/spss/viewer/table-looks");
516 register_ns (ctx, "vtt", "http://xml.spss.com/spss/viewer/viewer-treemodel");
517 register_ns (ctx, "vtx", "http://xml.spss.com/spss/viewer/viewer-text");
518 register_ns (ctx, "xsi", "http://www.w3.org/2001/XMLSchema-instance");
523 dump_xml (int argc, char **argv, const char *member_name,
524 char *error_s, xmlDoc *doc)
530 printf ("<!-- %s -->\n", member_name);
531 xmlElemDump (stdout, NULL, xmlDocGetRootElement (doc));
536 bool any_results = false;
538 remove_default_xml_namespace (xmlDocGetRootElement (doc));
539 for (int i = 2; i < argc; i++)
541 xmlXPathContext *xpath_ctx = create_xpath_context (doc);
542 xmlXPathSetContextNode (xmlDocGetRootElement (doc),
544 xmlXPathObject *xpath_obj = xmlXPathEvalExpression(
545 CHAR_CAST (xmlChar *, argv[i]), xpath_ctx);
547 error (1, 0, _("%s: invalid XPath expression"), argv[i]);
549 const xmlNodeSet *nodes = xpath_obj->nodesetval;
550 if (nodes && nodes->nodeNr > 0)
554 printf ("<!-- %s -->\n", member_name);
557 for (size_t j = 0; j < nodes->nodeNr; j++)
559 xmlElemDump (stdout, doc, nodes->nodeTab[j]);
564 xmlXPathFreeObject (xpath_obj);
565 xmlXPathFreeContext (xpath_ctx);
574 printf ("<!-- %s -->\n", member_name);
575 msg (ME, "%s", error_s);
581 run_dump_legacy_table (int argc, char **argv)
583 struct spv_reader *spv;
584 char *err = spv_open (argv[1], &spv);
586 error (1, 0, "%s", err);
588 struct spv_item **items;
590 spv_select (spv, criteria, n_criteria, &items, &n_items);
591 for (size_t i = 0; i < n_items; i++)
592 if (spv_item_is_legacy_table (items[i]))
595 char *error_s = spv_item_get_legacy_table (items[i], &doc);
596 dump_xml (argc, argv, items[i]->xml_member, error_s, doc);
604 run_dump_structure (int argc, char **argv)
606 struct spv_reader *spv;
607 char *err = spv_open (argv[1], &spv);
609 error (1, 0, "%s", err);
611 struct spv_item **items;
613 spv_select (spv, criteria, n_criteria, &items, &n_items);
614 const char *last_structure_member = NULL;
615 for (size_t i = 0; i < n_items; i++)
616 if (!last_structure_member || strcmp (items[i]->structure_member,
617 last_structure_member))
619 last_structure_member = items[i]->structure_member;
622 char *error_s = spv_item_get_structure (items[i], &doc);
623 dump_xml (argc, argv, items[i]->structure_member, error_s, doc);
631 run_is_legacy (int argc UNUSED, char **argv)
633 struct spv_reader *spv;
634 char *err = spv_open (argv[1], &spv);
636 error (1, 0, "%s", err);
638 bool is_legacy = false;
640 struct spv_item **items;
642 spv_select (spv, criteria, n_criteria, &items, &n_items);
643 for (size_t i = 0; i < n_items; i++)
644 if (spv_item_is_legacy_table (items[i]))
653 exit (is_legacy ? EXIT_SUCCESS : EXIT_FAILURE);
659 int min_args, max_args;
660 void (*run) (int argc, char **argv);
663 static const struct command commands[] =
665 { "detect", 1, 1, run_detect },
666 { "dir", 1, 1, run_directory },
667 { "convert", 2, 2, run_convert },
669 /* Undocumented commands. */
670 { "dump", 1, 1, run_dump },
671 { "dump-light-table", 1, 1, run_dump_light_table },
672 { "dump-legacy-data", 1, 1, run_dump_legacy_data },
673 { "dump-legacy-table", 1, INT_MAX, run_dump_legacy_table },
674 { "dump-structure", 1, INT_MAX, run_dump_structure },
675 { "is-legacy", 1, 1, run_is_legacy },
677 static const int n_commands = sizeof commands / sizeof *commands;
679 static const struct command *
680 find_command (const char *name)
682 for (size_t i = 0; i < n_commands; i++)
684 const struct command *c = &commands[i];
685 if (!strcmp (name, c->name))
692 emit_msg (const struct msg *m, void *aux UNUSED)
694 if (m->severity == MSG_S_ERROR || m->severity == MSG_S_WARNING)
697 char *s = msg_to_string (m);
698 fprintf (stderr, "%s\n", s);
703 main (int argc, char **argv)
705 set_program_name (argv[0]);
706 msg_set_handler (emit_msg, NULL);
710 parse_options (argc, argv);
716 error (1, 0, _("missing command name (use --help for help)"));
718 const struct command *c = find_command (argv[0]);
720 error (1, 0, _("unknown command \"%s\" (use --help for help)"), argv[0]);
722 int n_args = argc - 1;
723 if (n_args < c->min_args || n_args > c->max_args)
725 if (c->min_args == c->max_args)
726 error (1, 0, _("\"%s\" command takes exactly %d argument%s"),
727 c->name, c->min_args, c->min_args ? "s" : "");
728 else if (c->max_args == INT_MAX)
729 error (1, 0, _("\"%s\" command requires at least %d argument%s"),
730 c->name, c->min_args, c->min_args ? "s" : "");
732 error (1, 0, _("\"%s\" command requires between %d and %d arguments"),
733 c->name, c->min_args, c->max_args);
740 return n_warnings ? EXIT_FAILURE : EXIT_SUCCESS;
743 static struct spv_criteria *
746 if (!n_criteria || new_criteria)
748 new_criteria = false;
749 if (n_criteria >= allocated_criteria)
750 criteria = x2nrealloc (criteria, &allocated_criteria,
752 criteria[n_criteria++] = (struct spv_criteria) SPV_CRITERIA_INITIALIZER;
755 return &criteria[n_criteria - 1];
759 parse_select (char *arg)
761 bool invert = arg[0] == '^';
764 unsigned classes = 0;
765 for (char *token = strtok (arg, ","); token; token = strtok (NULL, ","))
767 if (!strcmp (arg, "all"))
768 classes = SPV_ALL_CLASSES;
769 else if (!strcmp (arg, "help"))
771 puts (_("The following object classes are supported:"));
772 for (int class = 0; class < SPV_N_CLASSES; class++)
773 printf ("- %s\n", spv_item_class_to_string (class));
778 int class = spv_item_class_from_string (token);
779 if (class == SPV_N_CLASSES)
780 error (1, 0, _("%s: unknown object class (use --select=help "
782 classes |= 1u << class;
786 struct spv_criteria *c = get_criteria ();
787 c->classes = invert ? classes ^ SPV_ALL_CLASSES : classes;
790 static struct spv_criteria_match *
791 get_criteria_match (const char **arg)
793 struct spv_criteria *c = get_criteria ();
794 if ((*arg)[0] == '^')
804 parse_commands (const char *arg)
806 struct spv_criteria_match *cm = get_criteria_match (&arg);
807 string_array_parse (&cm->commands, ss_cstr (arg), ss_cstr (","));
811 parse_subtypes (const char *arg)
813 struct spv_criteria_match *cm = get_criteria_match (&arg);
814 string_array_parse (&cm->subtypes, ss_cstr (arg), ss_cstr (","));
818 parse_labels (const char *arg)
820 struct spv_criteria_match *cm = get_criteria_match (&arg);
821 string_array_parse (&cm->labels, ss_cstr (arg), ss_cstr (","));
825 parse_instances (char *arg)
827 struct spv_criteria *c = get_criteria ();
828 size_t allocated_instances = c->n_instances;
830 for (char *token = strtok (arg, ","); token; token = strtok (NULL, ","))
832 if (c->n_instances >= allocated_instances)
833 c->instances = x2nrealloc (c->instances, &allocated_instances,
834 sizeof *c->instances);
836 c->instances[c->n_instances++] = (!strcmp (token, "last") ? -1
842 parse_members (const char *arg)
844 struct spv_criteria *cm = get_criteria ();
845 string_array_parse (&cm->members, ss_cstr (arg), ss_cstr (","));
849 parse_options (int argc, char *argv[])
855 OPT_MEMBER_NAMES = UCHAR_MAX + 1,
868 static const struct option long_options[] =
870 /* Input selection options. */
871 { "show-hidden", no_argument, NULL, OPT_SHOW_HIDDEN },
872 { "select", required_argument, NULL, OPT_SELECT },
873 { "commands", required_argument, NULL, OPT_COMMANDS },
874 { "subtypes", required_argument, NULL, OPT_SUBTYPES },
875 { "labels", required_argument, NULL, OPT_LABELS },
876 { "instances", required_argument, NULL, OPT_INSTANCES },
877 { "members", required_argument, NULL, OPT_MEMBERS },
878 { "errors", no_argument, NULL, OPT_ERRORS },
879 { "or", no_argument, NULL, OPT_OR },
881 /* "dir" command options. */
882 { "member-names", no_argument, NULL, OPT_MEMBER_NAMES },
884 /* "convert" command options. */
885 { "force", no_argument, NULL, 'f' },
887 /* "dump-light-table" command options. */
888 { "sort", no_argument, NULL, OPT_SORT },
889 { "raw", no_argument, NULL, OPT_RAW },
891 { "help", no_argument, NULL, 'h' },
892 { "version", no_argument, NULL, 'v' },
894 { NULL, 0, NULL, 0 },
899 c = getopt_long (argc, argv, "O:hvf", long_options, NULL);
906 output_driver_parse_option (optarg, &output_options);
909 case OPT_MEMBER_NAMES:
910 show_member_names = true;
913 case OPT_SHOW_HIDDEN:
914 get_criteria ()->include_hidden = true;
918 parse_select (optarg);
922 parse_commands (optarg);
926 parse_subtypes (optarg);
930 parse_labels (optarg);
934 parse_instances (optarg);
938 parse_members (optarg);
942 get_criteria ()->error = true;
962 version_etc (stdout, "pspp-output", PACKAGE_NAME, PACKAGE_VERSION,
963 "Ben Pfaff", "John Darrington", NULL_SENTINEL);
979 struct string s = DS_EMPTY_INITIALIZER;
980 struct string_set formats = STRING_SET_INITIALIZER(formats);
981 output_get_supported_formats (&formats);
983 const struct string_set_node *node;
984 STRING_SET_FOR_EACH (format, node, &formats)
986 if (!ds_is_empty (&s))
987 ds_put_byte (&s, ' ');
988 ds_put_cstr (&s, format);
990 string_set_destroy (&formats);
993 %s, a utility for working with SPSS viewer (.spv) files.\n\
994 Usage: %s [OPTION]... COMMAND ARG...\n\
996 The following commands are available:\n\
997 detect FILE Detect whether FILE is an SPV file.\n\
998 dir FILE List tables and other items in FILE.\n\
999 convert SOURCE DEST Convert .spv SOURCE to DEST.\n\
1001 Input selection options for \"dir\" and \"convert\":\n\
1002 --select=CLASS... include only some kinds of objects\n\
1003 --select=help print known object classes\n\
1004 --commands=COMMAND... include only specified COMMANDs\n\
1005 --subtypes=SUBTYPE... include only specified SUBTYPEs of output\n\
1006 --labels=LABEL... include only output objects with the given LABELs\n\
1007 --instances=INSTANCE... include only the given object INSTANCEs\n\
1008 --show-hidden include hidden output objects\n\
1009 --or separate two sets of selection options\n\
1011 \"convert\" by default infers the destination's format from its extension.\n\
1012 The known extensions are: %s\n\
1013 The following options override \"convert\" behavior:\n\
1014 -O format=FORMAT set destination format to FORMAT\n\
1015 -O OPTION=VALUE set output option\n\
1016 -f, --force keep output file even given errors\n\
1018 --help display this help and exit\n\
1019 --version output version information and exit\n",
1020 program_name, program_name, ds_cstr (&s));