#include <config.h>
+#include <cairo.h>
#include <getopt.h>
#include <limits.h>
#include <stdlib.h>
#include <unistd.h>
+#include <unistr.h>
#include "data/file-handle-def.h"
#include "data/settings.h"
+#include "libpspp/encoding-guesser.h"
#include "libpspp/i18n.h"
#include "libpspp/message.h"
#include "libpspp/string-map.h"
#include "libpspp/string-set.h"
#include "output/driver.h"
-#include "output/group-item.h"
-#include "output/page-setup-item.h"
+#include "output/output-item.h"
#include "output/pivot-table.h"
#include "output/spv/light-binary-parser.h"
#include "output/spv/spv-legacy-data.h"
+#include "output/spv/spv-light-decoder.h"
#include "output/spv/spv-output.h"
#include "output/spv/spv-select.h"
#include "output/spv/spv-table-look.h"
#include "output/spv/spv.h"
-#include "output/table-item.h"
-#include "output/text-item.h"
#include "gl/c-ctype.h"
#include "gl/error.h"
static struct string_map output_options
= STRING_MAP_INITIALIZER (output_options);
-/* --member-name: Include .zip member name in "dir" output. */
+/* --member-names: Include .zip member name in "dir" output. */
static bool show_member_names;
/* --show-hidden, --select, --commands, ...: Selection criteria. */
/* --sort: Sort members under dump-light-table, to make comparisons easier. */
static bool sort;
-/* --raw: Dump raw binary data in dump-light-table. */
+/* --raw: Dump raw binary data in "dump-light-table"; dump all strings in
+ "strings". */
static bool raw;
+/* --no-ascii-only: Drop all-ASCII strings in "strings". */
+static bool exclude_ascii_only;
+
+/* --utf8-only: Only print strings that have UTF-8 multibyte sequences in
+ * "strings". */
+static bool include_utf8_only;
+
/* -f, --force: Keep output file even on error. */
static bool force;
static size_t n_warnings;
static void usage (void);
+static void developer_usage (void);
static void parse_options (int argc, char **argv);
static void
{
const char *x = item->xml_member;
const char *b = item->bin_member;
+
+ /* The strings below are not marked for translation because they are only
+ useful to developers. */
char *s = (x && b
- ? xasprintf (_("%s and %s:"), x, b)
+ ? xasprintf ("%s and %s:", x, b)
: xasprintf ("%s:", x ? x : b));
- text_item_submit (text_item_create_nocopy (TEXT_ITEM_TITLE, s));
+ output_item_submit (text_item_create_nocopy (TEXT_ITEM_TITLE, s,
+ xstrdup ("Member Names")));
}
switch (spv_item_get_type (item))
case SPV_ITEM_MODEL:
break;
- case SPV_ITEM_OBJECT:
+ case SPV_ITEM_IMAGE:
+ output_item_submit (image_item_create (cairo_surface_reference (
+ spv_item_get_image (item))));
break;
case SPV_ITEM_TREE:
if (type == SPV_ITEM_TABLE)
{
const struct pivot_table *table = spv_item_get_table (item);
- char *title = pivot_value_to_string (table->title,
- SETTINGS_VALUE_SHOW_DEFAULT,
- SETTINGS_VALUE_SHOW_DEFAULT);
+ char *title = pivot_value_to_string (table->title, table);
if (!label || strcmp (title, label))
printf (" title \"%s\"", title);
free (title);
if (!spv_item_is_visible (item))
printf (" (hidden)");
- if (show_member_names && (item->xml_member || item->bin_member))
+
+ if (show_member_names)
{
- if (item->xml_member && item->bin_member)
- printf (" in %s and %s", item->xml_member, item->bin_member);
- else if (item->xml_member)
- printf (" in %s", item->xml_member);
- else if (item->bin_member)
- printf (" in %s", item->bin_member);
+ const char *members[] = {
+ item->xml_member,
+ item->bin_member,
+ item->png_member,
+ };
+ size_t n = 0;
+
+ for (size_t i = 0; i < sizeof members / sizeof *members; i++)
+ if (members[i])
+ printf (" %s %s", n++ == 0 ? "in" : "and", members[i]);
}
putchar ('\n');
}
break;
for (size_t i = common; i < old_path.n; i++)
- group_close_item_submit (group_close_item_create ());
+ output_item_submit (group_close_item_create ());
for (size_t i = common; i < new_path.n; i++)
- group_open_item_submit (group_open_item_create (
- new_path.nodes[i]->command_id));
+ output_item_submit (group_open_item_create (
+ new_path.nodes[i]->command_id,
+ new_path.nodes[i]->label));
free_path (&old_path);
free_path (&new_path);
const struct page_setup *ps = spv_get_page_setup (spv);
if (ps)
- page_setup_item_submit (page_setup_item_create (ps));
+ output_item_submit (page_setup_item_create (ps));
struct spv_item **items;
size_t n_items;
static void
run_get_table_look (int argc UNUSED, char **argv)
{
- struct spv_reader *spv;
- char *err = spv_open (argv[1], &spv);
+ struct pivot_table_look *look;
+ if (strcmp (argv[1], "-"))
+ {
+ struct spv_reader *spv;
+ char *err = spv_open (argv[1], &spv);
+ if (err)
+ error (1, 0, "%s", err);
+
+ const struct pivot_table *table = get_first_table (spv);
+ if (!table)
+ error (1, 0, "%s: no tables found", argv[1]);
+
+ look = pivot_table_look_ref (pivot_table_get_look (table));
+
+ spv_close (spv);
+ }
+ else
+ look = pivot_table_look_ref (pivot_table_look_builtin_default ());
+
+ char *err = spv_table_look_write (argv[2], look);
if (err)
error (1, 0, "%s", err);
- const struct pivot_table *table = get_first_table (spv);
- if (!table)
- error (1, 0, "%s: no tables found", argv[1]);
+ pivot_table_look_unref (look);
+}
- err = spv_table_look_write (argv[2], pivot_table_get_look (table));
+static void
+run_convert_table_look (int argc UNUSED, char **argv)
+{
+ struct pivot_table_look *look;
+ char *err = spv_table_look_read (argv[1], &look);
if (err)
error (1, 0, "%s", err);
- spv_close (spv);
+ err = spv_table_look_write (argv[2], look);
+ if (err)
+ error (1, 0, "%s", err);
+
+ pivot_table_look_unref (look);
+ free (look);
}
static void
if (err)
error (1, 0, "%s", err);
+ if (raw && isatty (STDOUT_FILENO))
+ error (1, 0, "not writing binary data to tty");
+
struct spv_item **items;
size_t n_items;
spv_select (spv, criteria, n_criteria, &items, &n_items);
xmlXPathFreeContext (xpath_ctx);
}
if (any_results)
- putchar ('\n');;
+ putchar ('\n');
}
xmlFreeDoc (doc);
}
exit (is_legacy ? EXIT_SUCCESS : EXIT_FAILURE);
}
+static bool
+is_all_ascii (const char *s)
+{
+ for (; *s; s++)
+ if (!encoding_guess_is_ascii_text (*s))
+ return false;
+
+ return true;
+}
+
+static void
+dump_strings (const char *encoding, struct string_array *strings)
+{
+ string_array_sort (strings);
+ string_array_uniq (strings);
+
+ if (raw)
+ {
+ if (exclude_ascii_only || include_utf8_only)
+ {
+ size_t i = 0;
+ for (size_t j = 0; j < strings->n; j++)
+ {
+ char *s = strings->strings[j];
+ bool is_ascii = is_all_ascii (s);
+ bool is_utf8 = !u8_check (CHAR_CAST (uint8_t *, s), strlen (s));
+ if (!is_ascii && (!include_utf8_only || is_utf8))
+ strings->strings[i++] = s;
+ else
+ free (s);
+ }
+ strings->n = i;
+ }
+ for (size_t i = 0; i < strings->n; i++)
+ puts (strings->strings[i]);
+ }
+ else
+ {
+ size_t n_nonascii = 0;
+ size_t n_utf8 = 0;
+ for (size_t i = 0; i < strings->n; i++)
+ {
+ const char *s = strings->strings[i];
+ if (!is_all_ascii (s))
+ {
+ n_nonascii++;
+ if (!u8_check (CHAR_CAST (uint8_t *, s), strlen (s)))
+ n_utf8++;
+ }
+ }
+ printf ("%s: %zu unique strings, %zu non-ASCII, %zu UTF-8.\n",
+ encoding, strings->n, n_nonascii, n_utf8);
+ }
+}
+
+static void
+run_strings (int argc UNUSED, char **argv)
+{
+ struct spv_reader *spv;
+ char *err = spv_open (argv[1], &spv);
+ if (err)
+ error (1, 0, "%s", err);
+
+ struct encoded_strings
+ {
+ char *encoding;
+ struct string_array strings;
+ }
+ *es = NULL;
+ size_t n_es = 0;
+ size_t allocated_es = 0;
+
+ struct spv_item **items;
+ size_t n_items;
+ spv_select (spv, criteria, n_criteria, &items, &n_items);
+ for (size_t i = 0; i < n_items; i++)
+ {
+ if (!spv_item_is_light_table (items[i]))
+ continue;
+
+ char *error;
+ struct spvlb_table *table;
+ error = spv_item_get_light_table (items[i], &table);
+ if (error)
+ {
+ msg (ME, "%s", error);
+ free (error);
+ continue;
+ }
+
+ const char *table_encoding = spvlb_table_get_encoding (table);
+ size_t j = 0;
+ for (j = 0; j < n_es; j++)
+ if (!strcmp (es[j].encoding, table_encoding))
+ break;
+ if (j >= n_es)
+ {
+ if (n_es >= allocated_es)
+ es = x2nrealloc (es, &allocated_es, sizeof *es);
+ es[n_es++] = (struct encoded_strings) {
+ .encoding = xstrdup (table_encoding),
+ .strings = STRING_ARRAY_INITIALIZER,
+ };
+ }
+ collect_spvlb_strings (table, &es[j].strings);
+ }
+ free (items);
+
+ for (size_t i = 0; i < n_es; i++)
+ {
+ dump_strings (es[i].encoding, &es[i].strings);
+ free (es[i].encoding);
+ string_array_destroy (&es[i].strings);
+ }
+ free (es);
+
+ spv_close (spv);
+}
+
struct command
{
const char *name;
{ "dir", 1, 1, run_directory },
{ "convert", 2, 2, run_convert },
{ "get-table-look", 2, 2, run_get_table_look },
+ { "convert-table-look", 2, 2, run_convert_table_look },
/* Undocumented commands. */
{ "dump", 1, 1, run_dump },
{ "dump-legacy-table", 1, INT_MAX, run_dump_legacy_table },
{ "dump-structure", 1, INT_MAX, run_dump_structure },
{ "is-legacy", 1, 1, run_is_legacy },
+ { "strings", 1, 1, run_strings },
};
static const int n_commands = sizeof commands / sizeof *commands;
c->run (argc, argv);
- if (table_look)
- {
- pivot_table_look_uninit (table_look);
- free (table_look);
- }
+ pivot_table_look_unref (table_look);
i18n_done ();
return n_warnings ? EXIT_FAILURE : EXIT_SUCCESS;
static void
parse_table_look (const char *arg)
{
- if (table_look)
- {
- pivot_table_look_uninit (table_look);
- free (table_look);
- }
+ pivot_table_look_unref (table_look);
char *error_s = spv_table_look_read (arg, &table_look);
if (error_s)
OPT_OR,
OPT_SORT,
OPT_RAW,
+ OPT_NO_ASCII_ONLY,
+ OPT_UTF8_ONLY,
OPT_TABLE_LOOK,
+ OPT_HELP_DEVELOPER,
};
static const struct option long_options[] =
{
{ "sort", no_argument, NULL, OPT_SORT },
{ "raw", no_argument, NULL, OPT_RAW },
+ /* "strings" command options. */
+ { "no-ascii-only", no_argument, NULL, OPT_NO_ASCII_ONLY },
+ { "utf8-only", no_argument, NULL, OPT_UTF8_ONLY },
+
{ "help", no_argument, NULL, 'h' },
+ { "help-developer", no_argument, NULL, OPT_HELP_DEVELOPER },
{ "version", no_argument, NULL, 'v' },
{ NULL, 0, NULL, 0 },
parse_table_look (optarg);
break;
+ case OPT_NO_ASCII_ONLY:
+ exclude_ascii_only = true;
+ break;
+
+ case OPT_UTF8_ONLY:
+ include_utf8_only = true;
+ break;
+
case 'f':
force = true;
break;
usage ();
exit (EXIT_SUCCESS);
+ case OPT_HELP_DEVELOPER:
+ developer_usage ();
+ exit (EXIT_SUCCESS);
+
default:
exit (EXIT_FAILURE);
}
dir FILE List tables and other items in FILE.\n\
convert SOURCE DEST Convert .spv SOURCE to DEST.\n\
get-table-look SOURCE DEST Copies first selected TableLook into DEST\n\
+ convert-table-look SOURCE DEST Copies .tlo or .stt SOURCE into DEST\n\
\n\
Input selection options for \"dir\" and \"convert\":\n\
--select=CLASS... include only some kinds of objects\n\
--table-look=FILE override tables' style with TableLook from FILE\n\
Other options:\n\
--help display this help and exit\n\
+ --help-developer display help for developer commands and exit\n\
--version output version information and exit\n",
program_name, program_name, ds_cstr (&s));
ds_destroy (&s);
}
+
+static void
+developer_usage (void)
+{
+ printf ("\
+The following developer commands are available:\n\
+ dump FILE Dump pivot table structure\n\
+ [--raw | --sort] dump-light-table FILE Dump light tables\n\
+ [--raw] dump-legacy-data FILE Dump legacy table data\n\
+ dump-legacy-table FILE [XPATH]... Dump legacy table XML\n\
+ dump-structure FILE [XPATH]... Dump structure XML\n\
+ is-legacy FILE Exit with status 0 if any legacy table selected\n\
+ strings FILE Dump analysis of strings\n\
+\n\
+Additional input selection options:\n\
+ --members=MEMBER... include only objects with these Zip member names\n\
+ --errors include only objects that cannot be loaded\n\
+\n\
+Additional options for \"dir\" command:\n\
+ --member-names show Zip member names with objects\n\
+\n\
+Options for the \"strings\" command:\n\
+ --raw Dump all (unique) strings\n\
+ --raw --no-ascii-only Dump all strings that contain non-ASCII characters\n\
+ --raw --utf8-only Dump all non-ASCII strings that are valid UTF-8\n\
+\n\
+Other options:\n\
+ --raw print raw binary data instead of a parsed version\n\
+ --sort sort borders and areas for shorter \"diff\" output\n");
+}