1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2017, 2018 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
24 #include "data/file-handle-def.h"
25 #include "data/settings.h"
26 #include "libpspp/i18n.h"
27 #include "libpspp/message.h"
28 #include "libpspp/string-map.h"
29 #include "libpspp/string-set.h"
30 #include "output/driver.h"
31 #include "output/group-item.h"
32 #include "output/page-setup-item.h"
33 #include "output/pivot-table.h"
34 #include "output/spv/light-binary-parser.h"
35 #include "output/spv/spv-legacy-data.h"
36 #include "output/spv/spv-output.h"
37 #include "output/spv/spv-select.h"
38 #include "output/spv/spv.h"
39 #include "output/table-item.h"
40 #include "output/text-item.h"
42 #include "gl/c-ctype.h"
44 #include "gl/progname.h"
45 #include "gl/version-etc.h"
46 #include "gl/xalloc.h"
48 #include <libxml/tree.h>
49 #include <libxml/xpath.h>
50 #include <libxml/xpathInternals.h>
53 #define _(msgid) gettext (msgid)
55 /* -O key=value: Output driver options. */
56 static struct string_map output_options
57 = STRING_MAP_INITIALIZER (output_options);
59 /* --member-name: Include .zip member name in "dir" output. */
60 static bool show_member_names;
62 /* --show-hidden, --select, --commands, ...: Selection criteria. */
63 static struct spv_criteria *criteria;
64 static size_t n_criteria, allocated_criteria;
66 /* --or: Add new element to 'criteria' array. */
67 static bool new_criteria;
69 /* --sort: Sort members under dump-light-table, to make comparisons easier. */
72 /* --raw: Dump raw binary data in dump-light-table. */
75 /* -f, --force: Keep output file even on error. */
78 /* Number of warnings issued. */
79 static size_t n_warnings;
81 static void usage (void);
82 static void parse_options (int argc, char **argv);
85 dump_item (const struct spv_item *item)
87 if (show_member_names && (item->xml_member || item->bin_member))
89 const char *x = item->xml_member;
90 const char *b = item->bin_member;
92 ? xasprintf (_("%s and %s:"), x, b)
93 : xasprintf ("%s:", x ? x : b));
94 text_item_submit (text_item_create_nocopy (TEXT_ITEM_TITLE, s));
97 switch (spv_item_get_type (item))
99 case SPV_ITEM_HEADING:
103 spv_text_submit (item);
107 pivot_table_submit (pivot_table_ref (spv_item_get_table (item)));
111 spv_item_get_graph (item);
117 case SPV_ITEM_OBJECT:
129 print_item_directory (const struct spv_item *item)
131 for (int i = 1; i < spv_item_get_level (item); i++)
134 enum spv_item_type type = spv_item_get_type (item);
135 printf ("- %s", spv_item_type_to_string (type));
137 const char *label = spv_item_get_label (item);
139 printf (" \"%s\"", label);
141 if (type == SPV_ITEM_TABLE)
143 const struct pivot_table *table = spv_item_get_table (item);
144 char *title = pivot_value_to_string (table->title,
145 SETTINGS_VALUE_SHOW_DEFAULT,
146 SETTINGS_VALUE_SHOW_DEFAULT);
147 if (!label || strcmp (title, label))
148 printf (" title \"%s\"", title);
152 const char *command_id = spv_item_get_command_id (item);
154 printf (" command \"%s\"", command_id);
156 const char *subtype = spv_item_get_subtype (item);
157 if (subtype && (!label || strcmp (label, subtype)))
158 printf (" subtype \"%s\"", subtype);
160 if (!spv_item_is_visible (item))
161 printf (" (hidden)");
162 if (show_member_names && (item->xml_member || item->bin_member))
164 if (item->xml_member && item->bin_member)
165 printf (" in %s and %s", item->xml_member, item->bin_member);
166 else if (item->xml_member)
167 printf (" in %s", item->xml_member);
168 else if (item->bin_member)
169 printf (" in %s", item->bin_member);
175 run_detect (int argc UNUSED, char **argv)
177 char *err = spv_detect (argv[1]);
179 error (1, 0, "%s", err);
183 run_directory (int argc UNUSED, char **argv)
185 struct spv_reader *spv;
186 char *err = spv_open (argv[1], &spv);
188 error (1, 0, "%s", err);
190 struct spv_item **items;
192 spv_select (spv, criteria, n_criteria, &items, &n_items);
193 for (size_t i = 0; i < n_items; i++)
194 print_item_directory (items[i]);
202 const struct spv_item **nodes;
206 const struct spv_item *stub[N_STUB];
210 swap_nodes (const struct spv_item **a, const struct spv_item **b)
212 const struct spv_item *tmp = *a;
218 get_path (const struct spv_item *item, struct item_path *path)
220 size_t allocated = 10;
221 path->nodes = path->stub;
226 if (path->n >= allocated)
228 if (path->nodes == path->stub)
229 path->nodes = xmemdup (path->stub, sizeof path->stub);
230 path->nodes = x2nrealloc (path->nodes, &allocated,
231 sizeof *path->nodes);
233 path->nodes[path->n++] = item;
237 for (size_t i = 0; i < path->n / 2; i++)
238 swap_nodes (&path->nodes[i], &path->nodes[path->n - i - 1]);
242 free_path (struct item_path *path)
244 if (path && path->nodes != path->stub)
249 dump_heading_transition (const struct spv_item *old,
250 const struct spv_item *new)
255 struct item_path old_path, new_path;
256 get_path (old, &old_path);
257 get_path (new, &new_path);
260 for (; common < old_path.n && common < new_path.n; common++)
261 if (old_path.nodes[common] != new_path.nodes[common])
264 for (size_t i = common; i < old_path.n; i++)
265 group_close_item_submit (group_close_item_create ());
266 for (size_t i = common; i < new_path.n; i++)
267 group_open_item_submit (group_open_item_create (
268 new_path.nodes[i]->command_id));
270 free_path (&old_path);
271 free_path (&new_path);
275 run_convert (int argc UNUSED, char **argv)
277 struct spv_reader *spv;
278 char *err = spv_open (argv[1], &spv);
280 error (1, 0, "%s", err);
282 output_engine_push ();
283 output_set_filename (argv[1]);
284 string_map_replace (&output_options, "output-file", argv[2]);
285 struct output_driver *driver = output_driver_create (&output_options);
288 output_driver_register (driver);
290 const struct page_setup *ps = spv_get_page_setup (spv);
292 page_setup_item_submit (page_setup_item_create (ps));
294 struct spv_item **items;
296 spv_select (spv, criteria, n_criteria, &items, &n_items);
297 struct spv_item *prev_heading = spv_get_root (spv);
298 for (size_t i = 0; i < n_items; i++)
300 struct spv_item *heading
301 = items[i]->type == SPV_ITEM_HEADING ? items[i] : items[i]->parent;
302 dump_heading_transition (prev_heading, heading);
303 dump_item (items[i]);
304 prev_heading = heading;
306 dump_heading_transition (prev_heading, spv_get_root (spv));
311 output_engine_pop ();
314 if (n_warnings && !force)
316 /* XXX There could be other files to unlink, e.g. the ascii driver can
317 produce additional files with the charts. */
323 run_dump (int argc UNUSED, char **argv)
325 struct spv_reader *spv;
326 char *err = spv_open (argv[1], &spv);
328 error (1, 0, "%s", err);
330 struct spv_item **items;
332 spv_select (spv, criteria, n_criteria, &items, &n_items);
333 for (size_t i = 0; i < n_items; i++)
334 if (items[i]->type == SPV_ITEM_TABLE)
336 pivot_table_dump (spv_item_get_table (items[i]), 0);
345 compare_borders (const void *a_, const void *b_)
347 const struct spvlb_border *const *ap = a_;
348 const struct spvlb_border *const *bp = b_;
349 uint32_t a = (*ap)->border_type;
350 uint32_t b = (*bp)->border_type;
352 return a < b ? -1 : a > b;
356 compare_cells (const void *a_, const void *b_)
358 const struct spvlb_cell *const *ap = a_;
359 const struct spvlb_cell *const *bp = b_;
360 uint64_t a = (*ap)->index;
361 uint64_t b = (*bp)->index;
363 return a < b ? -1 : a > b;
367 run_dump_light_table (int argc UNUSED, char **argv)
369 if (raw && isatty (STDOUT_FILENO))
370 error (1, 0, "not writing binary data to tty");
372 struct spv_reader *spv;
373 char *err = spv_open (argv[1], &spv);
375 error (1, 0, "%s", err);
377 struct spv_item **items;
379 spv_select (spv, criteria, n_criteria, &items, &n_items);
380 for (size_t i = 0; i < n_items; i++)
382 if (!spv_item_is_light_table (items[i]))
390 error = spv_item_get_raw_light_table (items[i], &data, &size);
393 fwrite (data, size, 1, stdout);
399 struct spvlb_table *table;
400 error = spv_item_get_light_table (items[i], &table);
405 qsort (table->borders->borders, table->borders->n_borders,
406 sizeof *table->borders->borders, compare_borders);
407 qsort (table->cells->cells, table->cells->n_cells,
408 sizeof *table->cells->cells, compare_cells);
410 spvlb_print_table (items[i]->bin_member, 0, table);
411 spvlb_free_table (table);
416 msg (ME, "%s", error);
427 run_dump_legacy_data (int argc UNUSED, char **argv)
429 struct spv_reader *spv;
430 char *err = spv_open (argv[1], &spv);
432 error (1, 0, "%s", err);
434 struct spv_item **items;
436 spv_select (spv, criteria, n_criteria, &items, &n_items);
437 for (size_t i = 0; i < n_items; i++)
438 if (spv_item_is_legacy_table (items[i]))
440 struct spv_data data;
446 error = spv_item_get_raw_legacy_data (items[i], &data, &size);
449 fwrite (data, size, 1, stdout);
455 error = spv_item_get_legacy_data (items[i], &data);
458 printf ("%s:\n", items[i]->bin_member);
459 spv_data_dump (&data, stdout);
460 spv_data_uninit (&data);
467 msg (ME, "%s", error);
476 /* This is really bogus.
478 XPath doesn't have any notion of a default XML namespace, but all of the
479 elements in the documents we're interested in have a namespace. Thus, we'd
480 need to require the XPath expressions to have a namespace on every single
481 element: vis:sourceVariable, vis:graph, and so on. That's a pain. So,
482 instead, we remove the default namespace from everyplace it occurs. XPath
483 does support the null namespace, so this allows sourceVariable, graph,
486 See http://plasmasturm.org/log/259/ and
487 https://mail.gnome.org/archives/xml/2003-April/msg00144.html for more
490 remove_default_xml_namespace (xmlNode *node)
492 if (node->ns && !node->ns->prefix)
495 for (xmlNode *child = node->children; child; child = child->next)
496 remove_default_xml_namespace (child);
500 register_ns (xmlXPathContext *ctx, const char *prefix, const char *uri)
502 xmlXPathRegisterNs (ctx, CHAR_CAST (xmlChar *, prefix),
503 CHAR_CAST (xmlChar *, uri));
506 static xmlXPathContext *
507 create_xpath_context (xmlDoc *doc)
509 xmlXPathContext *ctx = xmlXPathNewContext (doc);
510 register_ns (ctx, "vgr", "http://xml.spss.com/spss/viewer/viewer-graph");
511 register_ns (ctx, "vizml", "http://xml.spss.com/visualization");
512 register_ns (ctx, "vmd", "http://xml.spss.com/spss/viewer/viewer-model");
513 register_ns (ctx, "vps", "http://xml.spss.com/spss/viewer/viewer-pagesetup");
514 register_ns (ctx, "vst", "http://xml.spss.com/spss/viewer/viewer-style");
515 register_ns (ctx, "vtb", "http://xml.spss.com/spss/viewer/viewer-table");
516 register_ns (ctx, "vtl", "http://xml.spss.com/spss/viewer/table-looks");
517 register_ns (ctx, "vtt", "http://xml.spss.com/spss/viewer/viewer-treemodel");
518 register_ns (ctx, "vtx", "http://xml.spss.com/spss/viewer/viewer-text");
519 register_ns (ctx, "xsi", "http://www.w3.org/2001/XMLSchema-instance");
524 dump_xml (int argc, char **argv, const char *member_name,
525 char *error_s, xmlDoc *doc)
531 printf ("<!-- %s -->\n", member_name);
532 xmlElemDump (stdout, NULL, xmlDocGetRootElement (doc));
537 bool any_results = false;
539 remove_default_xml_namespace (xmlDocGetRootElement (doc));
540 for (int i = 2; i < argc; i++)
542 xmlXPathContext *xpath_ctx = create_xpath_context (doc);
543 xmlXPathSetContextNode (xmlDocGetRootElement (doc),
545 xmlXPathObject *xpath_obj = xmlXPathEvalExpression(
546 CHAR_CAST (xmlChar *, argv[i]), xpath_ctx);
548 error (1, 0, _("%s: invalid XPath expression"), argv[i]);
550 const xmlNodeSet *nodes = xpath_obj->nodesetval;
551 if (nodes && nodes->nodeNr > 0)
555 printf ("<!-- %s -->\n", member_name);
558 for (size_t j = 0; j < nodes->nodeNr; j++)
560 xmlElemDump (stdout, doc, nodes->nodeTab[j]);
565 xmlXPathFreeObject (xpath_obj);
566 xmlXPathFreeContext (xpath_ctx);
575 printf ("<!-- %s -->\n", member_name);
576 msg (ME, "%s", error_s);
582 run_dump_legacy_table (int argc, char **argv)
584 struct spv_reader *spv;
585 char *err = spv_open (argv[1], &spv);
587 error (1, 0, "%s", err);
589 struct spv_item **items;
591 spv_select (spv, criteria, n_criteria, &items, &n_items);
592 for (size_t i = 0; i < n_items; i++)
593 if (spv_item_is_legacy_table (items[i]))
596 char *error_s = spv_item_get_legacy_table (items[i], &doc);
597 dump_xml (argc, argv, items[i]->xml_member, error_s, doc);
605 run_dump_structure (int argc, char **argv)
607 struct spv_reader *spv;
608 char *err = spv_open (argv[1], &spv);
610 error (1, 0, "%s", err);
612 struct spv_item **items;
614 spv_select (spv, criteria, n_criteria, &items, &n_items);
615 const char *last_structure_member = NULL;
616 for (size_t i = 0; i < n_items; i++)
617 if (!last_structure_member || strcmp (items[i]->structure_member,
618 last_structure_member))
620 last_structure_member = items[i]->structure_member;
623 char *error_s = spv_item_get_structure (items[i], &doc);
624 dump_xml (argc, argv, items[i]->structure_member, error_s, doc);
632 run_is_legacy (int argc UNUSED, char **argv)
634 struct spv_reader *spv;
635 char *err = spv_open (argv[1], &spv);
637 error (1, 0, "%s", err);
639 bool is_legacy = false;
641 struct spv_item **items;
643 spv_select (spv, criteria, n_criteria, &items, &n_items);
644 for (size_t i = 0; i < n_items; i++)
645 if (spv_item_is_legacy_table (items[i]))
654 exit (is_legacy ? EXIT_SUCCESS : EXIT_FAILURE);
660 int min_args, max_args;
661 void (*run) (int argc, char **argv);
664 static const struct command commands[] =
666 { "detect", 1, 1, run_detect },
667 { "dir", 1, 1, run_directory },
668 { "convert", 2, 2, run_convert },
670 /* Undocumented commands. */
671 { "dump", 1, 1, run_dump },
672 { "dump-light-table", 1, 1, run_dump_light_table },
673 { "dump-legacy-data", 1, 1, run_dump_legacy_data },
674 { "dump-legacy-table", 1, INT_MAX, run_dump_legacy_table },
675 { "dump-structure", 1, INT_MAX, run_dump_structure },
676 { "is-legacy", 1, 1, run_is_legacy },
678 static const int n_commands = sizeof commands / sizeof *commands;
680 static const struct command *
681 find_command (const char *name)
683 for (size_t i = 0; i < n_commands; i++)
685 const struct command *c = &commands[i];
686 if (!strcmp (name, c->name))
693 emit_msg (const struct msg *m, void *aux UNUSED)
695 if (m->severity == MSG_S_ERROR || m->severity == MSG_S_WARNING)
698 char *s = msg_to_string (m);
699 fprintf (stderr, "%s\n", s);
704 main (int argc, char **argv)
706 set_program_name (argv[0]);
707 msg_set_handler (emit_msg, NULL);
711 parse_options (argc, argv);
717 error (1, 0, _("missing command name (use --help for help)"));
719 const struct command *c = find_command (argv[0]);
721 error (1, 0, _("unknown command \"%s\" (use --help for help)"), argv[0]);
723 int n_args = argc - 1;
724 if (n_args < c->min_args || n_args > c->max_args)
726 if (c->min_args == c->max_args)
727 error (1, 0, _("\"%s\" command takes exactly %d argument%s"),
728 c->name, c->min_args, c->min_args ? "s" : "");
729 else if (c->max_args == INT_MAX)
730 error (1, 0, _("\"%s\" command requires at least %d argument%s"),
731 c->name, c->min_args, c->min_args ? "s" : "");
733 error (1, 0, _("\"%s\" command requires between %d and %d arguments"),
734 c->name, c->min_args, c->max_args);
741 return n_warnings ? EXIT_FAILURE : EXIT_SUCCESS;
744 static struct spv_criteria *
747 if (!n_criteria || new_criteria)
749 new_criteria = false;
750 if (n_criteria >= allocated_criteria)
751 criteria = x2nrealloc (criteria, &allocated_criteria,
753 criteria[n_criteria++] = (struct spv_criteria) SPV_CRITERIA_INITIALIZER;
756 return &criteria[n_criteria - 1];
760 parse_select (char *arg)
762 bool invert = arg[0] == '^';
765 unsigned classes = 0;
766 for (char *token = strtok (arg, ","); token; token = strtok (NULL, ","))
768 if (!strcmp (arg, "all"))
769 classes = SPV_ALL_CLASSES;
770 else if (!strcmp (arg, "help"))
772 puts (_("The following object classes are supported:"));
773 for (int class = 0; class < SPV_N_CLASSES; class++)
774 printf ("- %s\n", spv_item_class_to_string (class));
779 int class = spv_item_class_from_string (token);
780 if (class == SPV_N_CLASSES)
781 error (1, 0, _("%s: unknown object class (use --select=help "
783 classes |= 1u << class;
787 struct spv_criteria *c = get_criteria ();
788 c->classes = invert ? classes ^ SPV_ALL_CLASSES : classes;
791 static struct spv_criteria_match *
792 get_criteria_match (const char **arg)
794 struct spv_criteria *c = get_criteria ();
795 if ((*arg)[0] == '^')
805 parse_commands (const char *arg)
807 struct spv_criteria_match *cm = get_criteria_match (&arg);
808 string_array_parse (&cm->commands, ss_cstr (arg), ss_cstr (","));
812 parse_subtypes (const char *arg)
814 struct spv_criteria_match *cm = get_criteria_match (&arg);
815 string_array_parse (&cm->subtypes, ss_cstr (arg), ss_cstr (","));
819 parse_labels (const char *arg)
821 struct spv_criteria_match *cm = get_criteria_match (&arg);
822 string_array_parse (&cm->labels, ss_cstr (arg), ss_cstr (","));
826 parse_instances (char *arg)
828 struct spv_criteria *c = get_criteria ();
829 size_t allocated_instances = c->n_instances;
831 for (char *token = strtok (arg, ","); token; token = strtok (NULL, ","))
833 if (c->n_instances >= allocated_instances)
834 c->instances = x2nrealloc (c->instances, &allocated_instances,
835 sizeof *c->instances);
837 c->instances[c->n_instances++] = (!strcmp (token, "last") ? -1
843 parse_members (const char *arg)
845 struct spv_criteria *cm = get_criteria ();
846 string_array_parse (&cm->members, ss_cstr (arg), ss_cstr (","));
850 parse_options (int argc, char *argv[])
856 OPT_MEMBER_NAMES = UCHAR_MAX + 1,
869 static const struct option long_options[] =
871 /* Input selection options. */
872 { "show-hidden", no_argument, NULL, OPT_SHOW_HIDDEN },
873 { "select", required_argument, NULL, OPT_SELECT },
874 { "commands", required_argument, NULL, OPT_COMMANDS },
875 { "subtypes", required_argument, NULL, OPT_SUBTYPES },
876 { "labels", required_argument, NULL, OPT_LABELS },
877 { "instances", required_argument, NULL, OPT_INSTANCES },
878 { "members", required_argument, NULL, OPT_MEMBERS },
879 { "errors", no_argument, NULL, OPT_ERRORS },
880 { "or", no_argument, NULL, OPT_OR },
882 /* "dir" command options. */
883 { "member-names", no_argument, NULL, OPT_MEMBER_NAMES },
885 /* "convert" command options. */
886 { "force", no_argument, NULL, 'f' },
888 /* "dump-light-table" command options. */
889 { "sort", no_argument, NULL, OPT_SORT },
890 { "raw", no_argument, NULL, OPT_RAW },
892 { "help", no_argument, NULL, 'h' },
893 { "version", no_argument, NULL, 'v' },
895 { NULL, 0, NULL, 0 },
900 c = getopt_long (argc, argv, "O:hvf", long_options, NULL);
907 output_driver_parse_option (optarg, &output_options);
910 case OPT_MEMBER_NAMES:
911 show_member_names = true;
914 case OPT_SHOW_HIDDEN:
915 get_criteria ()->include_hidden = true;
919 parse_select (optarg);
923 parse_commands (optarg);
927 parse_subtypes (optarg);
931 parse_labels (optarg);
935 parse_instances (optarg);
939 parse_members (optarg);
943 get_criteria ()->error = true;
963 version_etc (stdout, "pspp-output", PACKAGE_NAME, PACKAGE_VERSION,
964 "Ben Pfaff", "John Darrington", NULL_SENTINEL);
980 struct string s = DS_EMPTY_INITIALIZER;
981 struct string_set formats = STRING_SET_INITIALIZER(formats);
982 output_get_supported_formats (&formats);
984 const struct string_set_node *node;
985 STRING_SET_FOR_EACH (format, node, &formats)
987 if (!ds_is_empty (&s))
988 ds_put_byte (&s, ' ');
989 ds_put_cstr (&s, format);
991 string_set_destroy (&formats);
994 %s, a utility for working with SPSS viewer (.spv) files.\n\
995 Usage: %s [OPTION]... COMMAND ARG...\n\
997 The following commands are available:\n\
998 detect FILE Detect whether FILE is an SPV file.\n\
999 dir FILE List tables and other items in FILE.\n\
1000 convert SOURCE DEST Convert .spv SOURCE to DEST.\n\
1002 Input selection options for \"dir\" and \"convert\":\n\
1003 --select=CLASS... include only some kinds of objects\n\
1004 --select=help print known object classes\n\
1005 --commands=COMMAND... include only specified COMMANDs\n\
1006 --subtypes=SUBTYPE... include only specified SUBTYPEs of output\n\
1007 --labels=LABEL... include only output objects with the given LABELs\n\
1008 --instances=INSTANCE... include only the given object INSTANCEs\n\
1009 --show-hidden include hidden output objects\n\
1010 --or separate two sets of selection options\n\
1012 \"convert\" by default infers the destination's format from its extension.\n\
1013 The known extensions are: %s\n\
1014 The following options override \"convert\" behavior:\n\
1015 -O format=FORMAT set destination format to FORMAT\n\
1016 -O OPTION=VALUE set output option\n\
1017 -f, --force keep output file even given errors\n\
1019 --help display this help and exit\n\
1020 --version output version information and exit\n",
1021 program_name, program_name, ds_cstr (&s));