+ s0 = encodings[0].utf8_strings[string_idx];
+ for (i = 1; i < n_encodings; i++)
+ if (strcmp (s0, encodings[i].utf8_strings[string_idx]))
+ return false;
+
+ return true;
+}
+
+static int
+equal_prefix (const struct encoding *encodings, size_t n_encodings,
+ size_t string_idx)
+{
+ const char *s0;
+ size_t prefix;
+ size_t i;
+
+ s0 = encodings[0].utf8_strings[string_idx];
+ prefix = strlen (s0);
+ for (i = 1; i < n_encodings; i++)
+ {
+ const char *si = encodings[i].utf8_strings[string_idx];
+ size_t j;
+
+ for (j = 0; j < prefix; j++)
+ if (s0[j] != si[j])
+ {
+ prefix = j;
+ if (!prefix)
+ return 0;
+ break;
+ }
+ }
+
+ while (prefix > 0 && s0[prefix - 1] != ' ')
+ prefix--;
+ return prefix;
+}
+
+static int
+equal_suffix (const struct encoding *encodings, size_t n_encodings,
+ size_t string_idx)
+{
+ const char *s0;
+ size_t s0_len;
+ size_t suffix;
+ size_t i;
+
+ s0 = encodings[0].utf8_strings[string_idx];
+ s0_len = strlen (s0);
+ suffix = s0_len;
+ for (i = 1; i < n_encodings; i++)
+ {
+ const char *si = encodings[i].utf8_strings[string_idx];
+ size_t si_len = strlen (si);
+ size_t j;
+
+ if (si_len < suffix)
+ suffix = si_len;
+ for (j = 0; j < suffix; j++)
+ if (s0[s0_len - j - 1] != si[si_len - j - 1])
+ {
+ suffix = j;
+ if (!suffix)
+ return 0;
+ break;
+ }
+ }
+
+ while (suffix > 0 && s0[s0_len - suffix] != ' ')
+ suffix--;
+ return suffix;
+}
+
+static void
+report_encodings (const struct file_handle *h, struct pool *pool,
+ char **titles, bool *ids, char **strings, size_t n_strings)
+{
+ struct encoding encodings[N_ENCODING_NAMES];
+ size_t n_encodings, n_unique_strings;
+
+ n_encodings = 0;
+ for (size_t i = 0; i < N_ENCODING_NAMES; i++)
+ {
+ char **utf8_strings;
+ struct encoding *e;
+ unsigned int hash;
+
+ utf8_strings = recode_strings (pool, strings, ids, n_strings,
+ encoding_names[i]);
+ if (!utf8_strings)
+ continue;
+
+ /* Hash utf8_strings. */
+ hash = 0;
+ for (size_t j = 0; j < n_strings; j++)
+ hash = hash_string (utf8_strings[j], hash);
+
+ /* If there's a duplicate encoding, just mark it. */
+ e = find_duplicate_encoding (encodings, n_encodings,
+ utf8_strings, n_strings, hash);
+ if (e)
+ {
+ e->encodings |= UINT64_C (1) << i;
+ continue;
+ }
+
+ e = &encodings[n_encodings++];
+ e->encodings = UINT64_C (1) << i;
+ e->utf8_strings = utf8_strings;
+ e->hash = hash;
+ }
+ if (!n_encodings)
+ {
+ msg (SW, _("No valid encodings found."));
+ return;
+ }
+
+ /* Table of valid encodings. */
+ struct pivot_table *table = pivot_table_create__ (
+ pivot_value_new_text_format (N_("Usable encodings for %s."),
+ fh_get_name (h)));
+ table->caption = pivot_value_new_text_format (
+ N_("Encodings that can successfully read %s (by specifying the encoding "
+ "name on the GET command's ENCODING subcommand). Encodings that "
+ "yield identical text are listed together."),
+ fh_get_name (h));
+
+ pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Encodings"),
+ N_("Encodings"));
+ struct pivot_dimension *number = pivot_dimension_create__ (
+ table, PIVOT_AXIS_ROW, pivot_value_new_user_text ("#", -1));
+ number->root->show_label = true;
+
+ for (size_t i = 0; i < n_encodings; i++)
+ {
+ struct string s = DS_EMPTY_INITIALIZER;
+ for (size_t j = 0; j < 64; j++)
+ if (encodings[i].encodings & (UINT64_C (1) << j))
+ ds_put_format (&s, "%s, ", encoding_names[j]);
+ ds_chomp (&s, ss_cstr (", "));
+
+ int row = pivot_category_create_leaf (number->root,
+ pivot_value_new_integer (i + 1));
+ pivot_table_put2 (
+ table, 0, row, pivot_value_new_user_text_nocopy (ds_steal_cstr (&s)));
+ }
+ pivot_table_submit (table);
+
+ n_unique_strings = 0;
+ for (size_t i = 0; i < n_strings; i++)
+ if (!all_equal (encodings, n_encodings, i))
+ n_unique_strings++;
+ if (!n_unique_strings)
+ return;
+
+ /* Table of alternative interpretations. */
+ table = pivot_table_create__ (
+ pivot_value_new_text_format (N_("%s Encoded Text Strings"),
+ fh_get_name (h)));
+ table->caption = pivot_value_new_text (
+ N_("Text strings in the file dictionary that the previously listed "
+ "encodings interpret differently, along with the interpretations."));
+
+ pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Text"), N_("Text"));
+
+ number = pivot_dimension_create__ (table, PIVOT_AXIS_ROW,
+ pivot_value_new_user_text ("#", -1));
+ number->root->show_label = true;
+ for (size_t i = 0; i < n_encodings; i++)
+ pivot_category_create_leaf (number->root,
+ pivot_value_new_integer (i + 1));
+
+ struct pivot_dimension *purpose = pivot_dimension_create (
+ table, PIVOT_AXIS_ROW, N_("Purpose"));
+ purpose->root->show_label = true;
+
+ for (size_t i = 0; i < n_strings; i++)
+ if (!all_equal (encodings, n_encodings, i))
+ {
+ int prefix = equal_prefix (encodings, n_encodings, i);
+ int suffix = equal_suffix (encodings, n_encodings, i);
+
+ int purpose_idx = pivot_category_create_leaf (
+ purpose->root, pivot_value_new_user_text (titles[i], -1));
+
+ for (size_t j = 0; j < n_encodings; j++)
+ {
+ const char *s = encodings[j].utf8_strings[i] + prefix;
+
+ if (prefix || suffix)
+ {
+ size_t len = strlen (s) - suffix;
+ struct string entry;
+
+ ds_init_empty (&entry);
+ if (prefix)
+ ds_put_cstr (&entry, "...");
+ ds_put_substring (&entry, ss_buffer (s, len));
+ if (suffix)
+ ds_put_cstr (&entry, "...");
+
+ pivot_table_put3 (table, 0, j, purpose_idx,
+ pivot_value_new_user_text_nocopy (
+ ds_steal_cstr (&entry)));
+ }
+ else
+ pivot_table_put3 (table, 0, j, purpose_idx,
+ pivot_value_new_user_text (s, -1));
+ }
+ }
+
+ pivot_table_submit (table);