1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2017, 2018 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
24 #include "data/file-handle-def.h"
25 #include "data/settings.h"
26 #include "libpspp/i18n.h"
27 #include "libpspp/message.h"
28 #include "libpspp/string-map.h"
29 #include "libpspp/string-set.h"
30 #include "output/driver.h"
31 #include "output/group-item.h"
32 #include "output/page-setup-item.h"
33 #include "output/pivot-table.h"
34 #include "output/spv/light-binary-parser.h"
35 #include "output/spv/spv-legacy-data.h"
36 #include "output/spv/spv-output.h"
37 #include "output/spv/spv-select.h"
38 #include "output/spv/spv.h"
39 #include "output/table-item.h"
40 #include "output/text-item.h"
42 #include "gl/c-ctype.h"
44 #include "gl/progname.h"
45 #include "gl/version-etc.h"
46 #include "gl/xalloc.h"
48 #include <libxml/tree.h>
49 #include <libxml/xpath.h>
50 #include <libxml/xpathInternals.h>
53 #define _(msgid) gettext (msgid)
55 /* -O key=value: Output driver options. */
56 static struct string_map output_options
57 = STRING_MAP_INITIALIZER (output_options);
59 /* --member-name: Include .zip member name in "dir" output. */
60 static bool show_member_name;
62 /* --show-hidden, --select, --commands, ...: Selection criteria. */
63 static struct spv_criteria criteria = SPV_CRITERIA_INITIALIZER(criteria);
65 /* --sort: Sort members under dump-light-table, to make comparisons easier. */
68 /* --raw: Dump raw binary data in dump-light-table. */
71 /* Number of warnings issued. */
72 static size_t n_warnings;
74 static void usage (void);
75 static void parse_options (int argc, char **argv);
78 dump_item (const struct spv_item *item)
80 switch (spv_item_get_type (item))
82 case SPV_ITEM_HEADING:
86 spv_text_submit (item);
90 pivot_table_submit (pivot_table_ref (spv_item_get_table (item)));
108 print_item_directory (const struct spv_item *item)
110 for (int i = 1; i < spv_item_get_level (item); i++)
114 const char *label = spv_item_get_label (item);
116 printf (" %s", label);
118 enum spv_item_type type = spv_item_get_type (item);
119 printf (" %s", spv_item_type_to_string (type));
120 if (type == SPV_ITEM_TABLE)
122 const struct pivot_table *table = spv_item_get_table (item);
123 char *title = pivot_value_to_string (table->title,
124 SETTINGS_VALUE_SHOW_DEFAULT,
125 SETTINGS_VALUE_SHOW_DEFAULT);
126 if (!label || strcmp (title, label))
127 printf (" \"%s\"", title);
131 const char *command_id = spv_item_get_command_id (item);
133 printf (" \"%s\"", command_id);
135 if (!spv_item_is_visible (item))
136 printf (" (hidden)");
137 if (show_member_name && (item->xml_member || item->bin_member))
139 if (item->xml_member && item->bin_member)
140 printf (" in %s and %s", item->xml_member, item->bin_member);
141 else if (item->xml_member)
142 printf (" in %s", item->xml_member);
143 else if (item->bin_member)
144 printf (" in %s", item->bin_member);
150 run_detect (int argc UNUSED, char **argv)
152 char *err = spv_detect (argv[1]);
154 error (1, 0, "%s", err);
158 run_directory (int argc UNUSED, char **argv)
160 struct spv_reader *spv;
161 char *err = spv_open (argv[1], &spv);
163 error (1, 0, "%s", err);
165 struct spv_item **items;
167 spv_select (spv, &criteria, &items, &n_items);
168 for (size_t i = 0; i < n_items; i++)
169 print_item_directory (items[i]);
177 const struct spv_item **nodes;
181 const struct spv_item *stub[N_STUB];
185 swap_nodes (const struct spv_item **a, const struct spv_item **b)
187 const struct spv_item *tmp = *a;
193 get_path (const struct spv_item *item, struct item_path *path)
195 size_t allocated = 10;
196 path->nodes = path->stub;
201 if (path->n >= allocated)
203 if (path->nodes == path->stub)
204 path->nodes = xmemdup (path->stub, sizeof path->stub);
205 path->nodes = x2nrealloc (path->nodes, &allocated,
206 sizeof *path->nodes);
208 path->nodes[path->n++] = item;
212 for (size_t i = 0; i < path->n / 2; i++)
213 swap_nodes (&path->nodes[i], &path->nodes[path->n - i - 1]);
217 free_path (struct item_path *path)
219 if (path && path->nodes != path->stub)
224 dump_heading_transition (const struct spv_item *old,
225 const struct spv_item *new)
230 struct item_path old_path, new_path;
231 get_path (old, &old_path);
232 get_path (new, &new_path);
235 for (; common < old_path.n && common < new_path.n; common++)
236 if (old_path.nodes[common] != new_path.nodes[common])
239 for (size_t i = common; i < old_path.n; i++)
240 group_close_item_submit (group_close_item_create ());
241 for (size_t i = common; i < new_path.n; i++)
242 group_open_item_submit (group_open_item_create (
243 new_path.nodes[i]->command_id));
245 free_path (&old_path);
246 free_path (&new_path);
250 run_convert (int argc UNUSED, char **argv)
252 output_engine_push ();
253 output_set_filename (argv[1]);
254 string_map_insert (&output_options, "output-file", argv[2]);
255 struct output_driver *driver = output_driver_create (&output_options);
258 output_driver_register (driver);
260 struct spv_reader *spv;
261 char *err = spv_open (argv[1], &spv);
263 error (1, 0, "%s", err);
265 const struct page_setup *ps = spv_get_page_setup (spv);
267 page_setup_item_submit (page_setup_item_create (ps));
269 struct spv_item **items;
271 spv_select (spv, &criteria, &items, &n_items);
272 struct spv_item *prev_heading = spv_get_root (spv);
273 for (size_t i = 0; i < n_items; i++)
275 struct spv_item *heading
276 = items[i]->type == SPV_ITEM_HEADING ? items[i] : items[i]->parent;
277 dump_heading_transition (prev_heading, heading);
278 dump_item (items[i]);
279 prev_heading = heading;
281 dump_heading_transition (prev_heading, spv_get_root (spv));
286 output_engine_pop ();
291 run_dump (int argc UNUSED, char **argv)
293 struct spv_reader *spv;
294 char *err = spv_open (argv[1], &spv);
296 error (1, 0, "%s", err);
298 struct spv_item **items;
300 spv_select (spv, &criteria, &items, &n_items);
301 for (size_t i = 0; i < n_items; i++)
302 if (items[i]->type == SPV_ITEM_TABLE)
304 pivot_table_dump (spv_item_get_table (items[i]), 0);
313 compare_borders (const void *a_, const void *b_)
315 const struct spvlb_border *const *ap = a_;
316 const struct spvlb_border *const *bp = b_;
317 uint32_t a = (*ap)->border_type;
318 uint32_t b = (*bp)->border_type;
320 return a < b ? -1 : a > b;
324 compare_cells (const void *a_, const void *b_)
326 const struct spvlb_cell *const *ap = a_;
327 const struct spvlb_cell *const *bp = b_;
328 uint64_t a = (*ap)->index;
329 uint64_t b = (*bp)->index;
331 return a < b ? -1 : a > b;
335 run_dump_light_table (int argc UNUSED, char **argv)
337 if (raw && isatty (STDOUT_FILENO))
338 error (1, 0, "not writing binary data to tty");
340 struct spv_reader *spv;
341 char *err = spv_open (argv[1], &spv);
343 error (1, 0, "%s", err);
345 struct spv_item **items;
347 spv_select (spv, &criteria, &items, &n_items);
348 for (size_t i = 0; i < n_items; i++)
350 if (!spv_item_is_light_table (items[i]))
358 error = spv_item_get_raw_light_table (items[i], &data, &size);
361 fwrite (data, size, 1, stdout);
367 struct spvlb_table *table;
368 error = spv_item_get_light_table (items[i], &table);
373 qsort (table->borders->borders, table->borders->n_borders,
374 sizeof *table->borders->borders, compare_borders);
375 qsort (table->cells->cells, table->cells->n_cells,
376 sizeof *table->cells->cells, compare_cells);
378 spvlb_print_table (items[i]->bin_member, 0, table);
379 spvlb_free_table (table);
384 msg (ME, "%s", error);
395 run_dump_legacy_data (int argc UNUSED, char **argv)
397 struct spv_reader *spv;
398 char *err = spv_open (argv[1], &spv);
400 error (1, 0, "%s", err);
402 struct spv_item **items;
404 spv_select (spv, &criteria, &items, &n_items);
405 for (size_t i = 0; i < n_items; i++)
406 if (spv_item_is_legacy_table (items[i]))
408 struct spv_data data;
414 error = spv_item_get_raw_legacy_data (items[i], &data, &size);
417 fwrite (data, size, 1, stdout);
423 error = spv_item_get_legacy_data (items[i], &data);
426 printf ("%s:\n", items[i]->bin_member);
427 spv_data_dump (&data, stdout);
428 spv_data_uninit (&data);
435 msg (ME, "%s", error);
444 /* This is really bogus.
446 XPath doesn't have any notion of a default XML namespace, but all of the
447 elements in the documents we're interested in have a namespace. Thus, we'd
448 need to require the XPath expressions to have a namespace on every single
449 element: vis:sourceVariable, vis:graph, and so on. That's a pain. So,
450 instead, we remove the default namespace from everyplace it occurs. XPath
451 does support the null namespace, so this allows sourceVariable, graph,
454 See http://plasmasturm.org/log/259/ and
455 https://mail.gnome.org/archives/xml/2003-April/msg00144.html for more
458 remove_default_xml_namespace (xmlNode *node)
460 if (node->ns && !node->ns->prefix)
463 for (xmlNode *child = node->children; child; child = child->next)
464 remove_default_xml_namespace (child);
468 register_ns (xmlXPathContext *ctx, const char *prefix, const char *uri)
470 xmlXPathRegisterNs (ctx, CHAR_CAST (xmlChar *, prefix),
471 CHAR_CAST (xmlChar *, uri));
474 static xmlXPathContext *
475 create_xpath_context (xmlDoc *doc)
477 xmlXPathContext *ctx = xmlXPathNewContext (doc);
478 register_ns (ctx, "vgr", "http://xml.spss.com/spss/viewer/viewer-graph");
479 register_ns (ctx, "vizml", "http://xml.spss.com/visualization");
480 register_ns (ctx, "vmd", "http://xml.spss.com/spss/viewer/viewer-model");
481 register_ns (ctx, "vps", "http://xml.spss.com/spss/viewer/viewer-pagesetup");
482 register_ns (ctx, "vst", "http://xml.spss.com/spss/viewer/viewer-style");
483 register_ns (ctx, "vtb", "http://xml.spss.com/spss/viewer/viewer-table");
484 register_ns (ctx, "vtl", "http://xml.spss.com/spss/viewer/table-looks");
485 register_ns (ctx, "vtt", "http://xml.spss.com/spss/viewer/viewer-treemodel");
486 register_ns (ctx, "vtx", "http://xml.spss.com/spss/viewer/viewer-text");
487 register_ns (ctx, "xsi", "http://www.w3.org/2001/XMLSchema-instance");
492 dump_xml (int argc, char **argv, const char *member_name,
493 char *error_s, xmlDoc *doc)
499 printf ("<!-- %s -->\n", member_name);
500 xmlElemDump (stdout, NULL, xmlDocGetRootElement (doc));
505 bool any_results = false;
507 remove_default_xml_namespace (xmlDocGetRootElement (doc));
508 for (int i = 2; i < argc; i++)
510 xmlXPathContext *xpath_ctx = create_xpath_context (doc);
511 xmlXPathSetContextNode (xmlDocGetRootElement (doc),
513 xmlXPathObject *xpath_obj = xmlXPathEvalExpression(
514 CHAR_CAST (xmlChar *, argv[i]), xpath_ctx);
516 error (1, 0, _("%s: invalid XPath expression"), argv[i]);
518 const xmlNodeSet *nodes = xpath_obj->nodesetval;
519 if (nodes && nodes->nodeNr > 0)
523 printf ("<!-- %s -->\n", member_name);
526 for (size_t j = 0; j < nodes->nodeNr; j++)
528 xmlElemDump (stdout, doc, nodes->nodeTab[j]);
533 xmlXPathFreeObject (xpath_obj);
534 xmlXPathFreeContext (xpath_ctx);
543 printf ("<!-- %s -->\n", member_name);
544 msg (ME, "%s", error_s);
550 run_dump_legacy_table (int argc, char **argv)
552 struct spv_reader *spv;
553 char *err = spv_open (argv[1], &spv);
555 error (1, 0, "%s", err);
557 struct spv_item **items;
559 spv_select (spv, &criteria, &items, &n_items);
560 for (size_t i = 0; i < n_items; i++)
561 if (spv_item_is_legacy_table (items[i]))
564 char *error_s = spv_item_get_legacy_table (items[i], &doc);
565 dump_xml (argc, argv, items[i]->xml_member, error_s, doc);
573 run_dump_structure (int argc, char **argv)
575 struct spv_reader *spv;
576 char *err = spv_open (argv[1], &spv);
578 error (1, 0, "%s", err);
580 struct spv_item **items;
582 spv_select (spv, &criteria, &items, &n_items);
583 const char *last_structure_member = NULL;
584 for (size_t i = 0; i < n_items; i++)
585 if (!last_structure_member || strcmp (items[i]->structure_member,
586 last_structure_member))
588 last_structure_member = items[i]->structure_member;
591 char *error_s = spv_item_get_structure (items[i], &doc);
592 dump_xml (argc, argv, items[i]->structure_member, error_s, doc);
600 run_is_legacy (int argc UNUSED, char **argv)
602 struct spv_reader *spv;
603 char *err = spv_open (argv[1], &spv);
605 error (1, 0, "%s", err);
607 bool is_legacy = false;
609 struct spv_item **items;
611 spv_select (spv, &criteria, &items, &n_items);
612 for (size_t i = 0; i < n_items; i++)
613 if (spv_item_is_legacy_table (items[i]))
622 exit (is_legacy ? EXIT_SUCCESS : EXIT_FAILURE);
628 int min_args, max_args;
629 void (*run) (int argc, char **argv);
632 static const struct command commands[] =
634 { "detect", 1, 1, run_detect },
635 { "dir", 1, 1, run_directory },
636 { "convert", 2, 2, run_convert },
638 /* Undocumented commands. */
639 { "dump", 1, 1, run_dump },
640 { "dump-light-table", 1, 1, run_dump_light_table },
641 { "dump-legacy-data", 1, 1, run_dump_legacy_data },
642 { "dump-legacy-table", 1, INT_MAX, run_dump_legacy_table },
643 { "dump-structure", 1, INT_MAX, run_dump_structure },
644 { "is-legacy", 1, 1, run_is_legacy },
646 static const int n_commands = sizeof commands / sizeof *commands;
648 static const struct command *
649 find_command (const char *name)
651 for (size_t i = 0; i < n_commands; i++)
653 const struct command *c = &commands[i];
654 if (!strcmp (name, c->name))
661 emit_msg (const struct msg *m, void *aux UNUSED)
663 if (m->severity == MSG_S_ERROR || m->severity == MSG_S_WARNING)
666 char *s = msg_to_string (m);
667 fprintf (stderr, "%s\n", s);
672 main (int argc, char **argv)
674 set_program_name (argv[0]);
675 msg_set_handler (emit_msg, NULL);
679 parse_options (argc, argv);
685 error (1, 0, _("missing command name (use --help for help)"));
687 const struct command *c = find_command (argv[0]);
689 error (1, 0, _("unknown command \"%s\" (use --help for help)"), argv[0]);
691 int n_args = argc - 1;
692 if (n_args < c->min_args || n_args > c->max_args)
694 if (c->min_args == c->max_args)
695 error (1, 0, _("\"%s\" command takes exactly %d argument%s"),
696 c->name, c->min_args, c->min_args ? "s" : "");
697 else if (c->max_args == INT_MAX)
698 error (1, 0, _("\"%s\" command requires at least %d argument%s"),
699 c->name, c->min_args, c->min_args ? "s" : "");
701 error (1, 0, _("\"%s\" command requires between %d and %d arguments"),
702 c->name, c->min_args, c->max_args);
709 return n_warnings ? EXIT_FAILURE : EXIT_SUCCESS;
713 parse_select (char *arg, bool invert)
715 unsigned classes = 0;
716 for (char *token = strtok (arg, ","); token; token = strtok (NULL, ","))
718 if (!strcmp (arg, "all"))
719 classes = SPV_ALL_CLASSES;
720 else if (!strcmp (arg, "help"))
722 puts (_("The following object classes are supported:"));
723 for (int class = 0; class < SPV_N_CLASSES; class++)
724 printf ("- %s\n", spv_item_class_to_string (class));
729 int class = spv_item_class_from_string (token);
730 if (class == SPV_N_CLASSES)
731 error (1, 0, _("%s: unknown object class (use --select=help "
733 classes |= 1u << class;
737 criteria.classes = invert ? classes ^ SPV_ALL_CLASSES : classes;
741 parse_commands (char *arg)
743 size_t allocated_commands = criteria.n_commands;
745 for (char *token = strtok (arg, ","); token; token = strtok (NULL, ","))
747 char *save_ptr = NULL;
748 char *name = strtok_r (token, "()", &save_ptr);
749 char *number = strtok_r (NULL, "()", &save_ptr);
751 if (criteria.n_commands >= allocated_commands)
752 criteria.commands = x2nrealloc (criteria.commands, &allocated_commands,
753 sizeof *criteria.commands);
755 struct spv_command_match *cm = &criteria.commands[criteria.n_commands++];
756 if (!strcmp (name, "last"))
761 else if (c_isdigit (name[0]))
764 cm->instance = atoi (name);
769 cm->instance = (!number ? 0
770 : !strcmp (number, "last") ? -1
777 parse_subtypes (char *arg)
779 for (char *token = strtok (arg, ","); token; token = strtok (NULL, ","))
780 string_set_insert (&criteria.subtypes, token);
784 parse_labels (char *arg, enum spv_label_match_op op)
786 size_t allocated_labels = criteria.n_labels;
788 for (char *token = strtok (arg, ","); token; token = strtok (NULL, ","))
790 if (criteria.n_labels >= allocated_labels)
791 criteria.labels = x2nrealloc (criteria.labels, &allocated_labels,
792 sizeof *criteria.labels);
794 struct spv_label_match *lm = &criteria.labels[criteria.n_labels++];
801 parse_instances (char *arg)
803 size_t allocated_instances = criteria.n_instances;
805 for (char *token = strtok (arg, ","); token; token = strtok (NULL, ","))
807 if (criteria.n_instances >= allocated_instances)
808 criteria.instances = x2nrealloc (criteria.instances,
809 &allocated_instances,
810 sizeof *criteria.instances);
812 criteria.instances[criteria.n_instances++]
813 = (!strcmp (token, "last") ? -1 : atoi (token));
818 parse_options (int argc, char *argv[])
824 OPT_MEMBER_NAME = UCHAR_MAX + 1,
831 OPT_LABELS_CONTAINING,
839 static const struct option long_options[] =
841 { "member-name", no_argument, NULL, OPT_MEMBER_NAME },
842 { "show-hidden", no_argument, NULL, OPT_SHOW_HIDDEN },
843 { "select", required_argument, NULL, OPT_SELECT },
844 { "select-except", required_argument, NULL, OPT_SELECT_EXCEPT },
845 { "commands", required_argument, NULL, OPT_COMMANDS },
846 { "subtypes", required_argument, NULL, OPT_SUBTYPES },
847 { "labels", required_argument, NULL, OPT_LABELS },
848 { "labels-containing", required_argument, NULL,
849 OPT_LABELS_CONTAINING },
850 { "labels-starting", required_argument, NULL, OPT_LABELS_STARTING },
851 { "labels-ending", required_argument, NULL, OPT_LABELS_ENDING },
852 { "instances", required_argument, NULL, OPT_INSTANCES },
853 { "errors", no_argument, NULL, OPT_ERRORS },
854 { "sort", no_argument, NULL, OPT_SORT },
855 { "raw", no_argument, NULL, OPT_RAW },
856 { "help", no_argument, NULL, 'h' },
857 { "version", no_argument, NULL, 'v' },
858 { NULL, 0, NULL, 0 },
863 c = getopt_long (argc, argv, "O:hv", long_options, NULL);
870 output_driver_parse_option (optarg, &output_options);
873 case OPT_MEMBER_NAME:
874 show_member_name = true;
877 case OPT_SHOW_HIDDEN:
878 criteria.include_hidden = true;
882 parse_select (optarg, false);
885 case OPT_SELECT_EXCEPT:
886 parse_select (optarg, true);
890 parse_commands (optarg);
894 parse_subtypes (optarg);
898 parse_labels (optarg, SPV_LABEL_MATCH_EQUALS);
901 case OPT_LABELS_CONTAINING:
902 parse_labels (optarg, SPV_LABEL_MATCH_CONTAINS);
905 case OPT_LABELS_STARTING:
906 parse_labels (optarg, SPV_LABEL_MATCH_STARTS);
909 case OPT_LABELS_ENDING:
910 parse_labels (optarg, SPV_LABEL_MATCH_ENDS);
914 parse_instances (optarg);
918 criteria.error = true;
930 version_etc (stdout, "pspp-output", PACKAGE_NAME, PACKAGE_VERSION,
931 "Ben Pfaff", "John Darrington", NULL_SENTINEL);
947 struct string s = DS_EMPTY_INITIALIZER;
948 struct string_set formats = STRING_SET_INITIALIZER(formats);
949 output_get_supported_formats (&formats);
951 const struct string_set_node *node;
952 STRING_SET_FOR_EACH (format, node, &formats)
954 if (!ds_is_empty (&s))
955 ds_put_byte (&s, ' ');
956 ds_put_cstr (&s, format);
958 string_set_destroy (&formats);
961 %s, a utility for working with SPSS output (.spv) files.\n\
962 Usage: %s [OPTION]... COMMAND ARG...\n\
964 The following commands are available:\n\
965 detect INPUT Detect whether INPUT is an SPV file.\n\
966 dir INPUT List tables and other items in INPUT.\n\
967 convert INPUT OUTPUT Convert .spv INPUT to OUTPUT.\n\
969 The desired format of OUTPUT is by default inferred from its extension:\n\
973 -O format=FORMAT override format for output\n\
974 -O OPTION=VALUE set output option\n\
975 --help display this help and exit\n\
976 --version output version information and exit\n",
977 program_name, program_name, ds_cstr (&s));