From db9a44802bb9fde4d4acd1b11572493b82193ab0 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 7 Jan 2021 16:38:18 -0800 Subject: [PATCH] spv-light-decoder: Text strings are all UTF-8 encoded. Thanks to elias tsolis for reporting this bug. Bug #59837. --- doc/dev/spv-file-format.texi | 9 +- src/output/spv/spv-light-decoder.c | 153 ++++++++++++----------------- 2 files changed, 65 insertions(+), 97 deletions(-) diff --git a/doc/dev/spv-file-format.texi b/doc/dev/spv-file-format.texi index 16b21dcf12..666e8d4410 100644 --- a/doc/dev/spv-file-format.texi +++ b/doc/dev/spv-file-format.texi @@ -893,8 +893,8 @@ A 32-bit IEEE floating-point number. @item string @itemx bestring A 32-bit unsigned integer, in little-endian or big-endian byte order, -respectively, followed by the specified number of bytes of character -data. (The encoding is indicated by the Formats nonterminal.) +respectively, followed by the specified number of bytes of UTF-8 +encoded character data. @item @var{x}? @var{x} is optional, e.g.@: 00? is an optional zero byte. @@ -1336,9 +1336,8 @@ If @code{n-widths} is nonzero, then the accompanying integers are column widths as manually adjusted by the user. @code{locale} is a locale including an encoding, such as -@code{en_US.windows-1252} or @code{it_IT.windows-1252}. The rest of -the character strings in the member use this encoding. The encoding -string is itself encoded in US-ASCII. +@code{en_US.windows-1252} or @code{it_IT.windows-1252}. The encoding +string (like other strings in the member) is encoded in UTF-8. @code{epoch} is the year that starts the epoch. A 2-digit year is interpreted as belonging to the 100 years beginning at the epoch. The diff --git a/src/output/spv/spv-light-decoder.c b/src/output/spv/spv-light-decoder.c index 9c4bf80bc1..d5c387d09e 100644 --- a/src/output/spv/spv-light-decoder.c +++ b/src/output/spv/spv-light-decoder.c @@ -33,15 +33,9 @@ #include "gl/xsize.h" static char * -to_utf8 (const char *s, const char *encoding) +xstrdup_if_nonempty (const char *s) { - return recode_string ("UTF-8", encoding, s, strlen (s)); -} - -static char * -to_utf8_if_nonempty (const char *s, const char *encoding) -{ - return s && s[0] ? to_utf8 (s, encoding) : NULL; + return s && s[0] ? xstrdup (s) : NULL; } static void @@ -107,7 +101,7 @@ decode_spvlb_color_u32 (uint32_t x) static char * WARN_UNUSED_RESULT decode_spvlb_font_style (const struct spvlb_font_style *in, - const char *encoding, struct font_style **outp) + struct font_style **outp) { if (!in) { @@ -129,7 +123,7 @@ decode_spvlb_font_style (const struct spvlb_font_style *in, .underline = in->underline, .fg = { fg, fg }, .bg = { bg, bg }, - .typeface = to_utf8 (in->typeface, encoding), + .typeface = xstrdup (in->typeface), .size = in->size / 1.33, }; return NULL; @@ -225,17 +219,17 @@ decode_spvlb_cell_style (const struct spvlb_cell_style *in, static char *decode_spvlb_value ( const struct pivot_table *, const struct spvlb_value *, - const char *encoding, struct pivot_value **) WARN_UNUSED_RESULT; + struct pivot_value **) WARN_UNUSED_RESULT; static char * WARN_UNUSED_RESULT decode_spvlb_argument (const struct pivot_table *table, const struct spvlb_argument *in, - const char *encoding, struct pivot_argument *out) + struct pivot_argument *out) { if (in->value) { struct pivot_value *value; - char *error = decode_spvlb_value (table, in->value, encoding, &value); + char *error = decode_spvlb_value (table, in->value, &value); if (error) return error; @@ -249,7 +243,7 @@ decode_spvlb_argument (const struct pivot_table *table, out->values = xnmalloc (in->n_values, sizeof *out->values); for (size_t i = 0; i < in->n_values; i++) { - char *error = decode_spvlb_value (table, in->values[i], encoding, + char *error = decode_spvlb_value (table, in->values[i], &out->values[i]); if (error) { @@ -279,8 +273,7 @@ decode_spvlb_value_show (uint8_t in, enum settings_value_show *out) static char * WARN_UNUSED_RESULT decode_spvlb_value (const struct pivot_table *table, - const struct spvlb_value *in, - const char *encoding, struct pivot_value **outp) + const struct spvlb_value *in, struct pivot_value **outp) { *outp = NULL; @@ -308,18 +301,16 @@ decode_spvlb_value (const struct pivot_table *table, error = decode_spvlb_value_show (in->type_02.show, &out->numeric.show); if (error) return NULL; - out->numeric.var_name = to_utf8_if_nonempty (in->type_02.var_name, - encoding); - out->numeric.value_label = to_utf8_if_nonempty (in->type_02.value_label, - encoding); + out->numeric.var_name = xstrdup_if_nonempty (in->type_02.var_name); + out->numeric.value_label = xstrdup_if_nonempty (in->type_02.value_label); break; case 3: vm = in->type_03.value_mod; out->type = PIVOT_VALUE_TEXT; - out->text.local = to_utf8 (in->type_03.local, encoding); - out->text.c = to_utf8 (in->type_03.c, encoding); - out->text.id = to_utf8 (in->type_03.id, encoding); + out->text.local = xstrdup (in->type_03.local); + out->text.c = xstrdup (in->type_03.c); + out->text.id = xstrdup (in->type_03.id); out->text.user_provided = !in->type_03.fixed; break; @@ -329,10 +320,9 @@ decode_spvlb_value (const struct pivot_table *table, error = decode_spvlb_value_show (in->type_04.show, &out->string.show); if (error) return NULL; - out->string.s = to_utf8 (in->type_04.s, encoding); - out->string.var_name = to_utf8 (in->type_04.var_name, encoding); - out->string.value_label = to_utf8_if_nonempty (in->type_04.value_label, - encoding); + out->string.s = xstrdup (in->type_04.s); + out->string.var_name = xstrdup (in->type_04.var_name); + out->string.value_label = xstrdup_if_nonempty (in->type_04.value_label); break; case 5: @@ -341,24 +331,23 @@ decode_spvlb_value (const struct pivot_table *table, error = decode_spvlb_value_show (in->type_05.show, &out->variable.show); if (error) return error; - out->variable.var_name = to_utf8 (in->type_05.var_name, encoding); - out->variable.var_label = to_utf8_if_nonempty (in->type_05.var_label, - encoding); + out->variable.var_name = xstrdup (in->type_05.var_name); + out->variable.var_label = xstrdup_if_nonempty (in->type_05.var_label); break; case 6: vm = in->type_06.value_mod; out->type = PIVOT_VALUE_TEXT; - out->text.local = to_utf8 (in->type_06.local, encoding); - out->text.c = to_utf8 (in->type_06.c, encoding); - out->text.id = to_utf8 (in->type_06.id, encoding); + out->text.local = xstrdup (in->type_06.local); + out->text.c = xstrdup (in->type_06.c); + out->text.id = xstrdup (in->type_06.id); out->text.user_provided = false; break; case -1: vm = in->type_else.value_mod; out->type = PIVOT_VALUE_TEMPLATE; - out->template.local = to_utf8 (in->type_else.template, encoding); + out->template.local = xstrdup (in->type_else.template); out->template.id = out->template.local; out->template.n_args = 0; out->template.args = xnmalloc (in->type_else.n_args, @@ -366,7 +355,7 @@ decode_spvlb_value (const struct pivot_table *table, for (size_t i = 0; i < in->type_else.n_args; i++) { error = decode_spvlb_argument (table, in->type_else.args[i], - encoding, &out->template.args[i]); + &out->template.args[i]); if (error) { pivot_value_destroy (out); @@ -388,7 +377,7 @@ decode_spvlb_value (const struct pivot_table *table, out->subscripts = xnmalloc (vm->n_subscripts, sizeof *out->subscripts); for (size_t i = 0; i < vm->n_subscripts; i++) - out->subscripts[i] = to_utf8 (vm->subscripts[i], encoding); + out->subscripts[i] = xstrdup (vm->subscripts[i]); } if (vm->n_refs) @@ -412,7 +401,7 @@ decode_spvlb_value (const struct pivot_table *table, if (vm->style_pair) { error = decode_spvlb_font_style (vm->style_pair->font_style, - encoding, &out->font_style); + &out->font_style); if (!error) error = decode_spvlb_cell_style (vm->style_pair->cell_style, &out->cell_style); @@ -427,7 +416,7 @@ decode_spvlb_value (const struct pivot_table *table, && vm->template_string->id && vm->template_string->id[0] && out->type == PIVOT_VALUE_TEMPLATE) - out->template.id = to_utf8 (vm->template_string->id, encoding); + out->template.id = xstrdup (vm->template_string->id); } *outp = out; @@ -435,8 +424,7 @@ decode_spvlb_value (const struct pivot_table *table, } static char * WARN_UNUSED_RESULT -decode_spvlb_area (const struct spvlb_area *in, struct table_area_style *out, - const char *encoding) +decode_spvlb_area (const struct spvlb_area *in, struct table_area_style *out) { char *error; @@ -476,7 +464,7 @@ decode_spvlb_area (const struct spvlb_area *in, struct table_area_style *out, .underline = in->underline, .fg = { fg0, in->alternate ? fg1 : fg0 }, .bg = { bg0, in->alternate ? bg1 : bg0 }, - .typeface = to_utf8 (in->typeface, encoding), + .typeface = xstrdup (in->typeface), .size = in->size / 1.33, }, .cell_style = { @@ -497,16 +485,14 @@ decode_spvlb_group (const struct pivot_table *, size_t n_categories, bool show_label, struct pivot_category *parent, - struct pivot_dimension *, - const char *encoding); + struct pivot_dimension *); static char * WARN_UNUSED_RESULT decode_spvlb_categories (const struct pivot_table *table, struct spvlb_category **categories, size_t n_categories, struct pivot_category *parent, - struct pivot_dimension *dimension, - const char *encoding) + struct pivot_dimension *dimension) { for (size_t i = 0; i < n_categories; i++) { @@ -515,7 +501,7 @@ decode_spvlb_categories (const struct pivot_table *table, { char *error = decode_spvlb_categories ( table, in->group->subcategories, in->group->n_subcategories, - parent, dimension, encoding); + parent, dimension); if (error) return error; @@ -523,7 +509,7 @@ decode_spvlb_categories (const struct pivot_table *table, } struct pivot_value *name; - char *error = decode_spvlb_value (table, in->name, encoding, &name); + char *error = decode_spvlb_value (table, in->name, &name); if (error) return error; @@ -535,7 +521,7 @@ decode_spvlb_categories (const struct pivot_table *table, { char *error = decode_spvlb_group (table, in->group->subcategories, in->group->n_subcategories, - true, out, dimension, encoding); + true, out, dimension); if (error) { pivot_category_destroy (out); @@ -565,8 +551,7 @@ decode_spvlb_group (const struct pivot_table *table, struct spvlb_category **categories, size_t n_categories, bool show_label, struct pivot_category *category, - struct pivot_dimension *dimension, - const char *encoding) + struct pivot_dimension *dimension) { category->subs = XCALLOC (n_categories, struct pivot_category *); category->n_subs = 0; @@ -574,7 +559,7 @@ decode_spvlb_group (const struct pivot_table *table, category->show_label = show_label; return decode_spvlb_categories (table, categories, n_categories, category, - dimension, encoding); + dimension); } static char * WARN_UNUSED_RESULT @@ -607,12 +592,11 @@ fill_leaves (struct pivot_category *category, static char * WARN_UNUSED_RESULT decode_spvlb_dimension (const struct pivot_table *table, const struct spvlb_dimension *in, - size_t idx, const char *encoding, - struct pivot_dimension **outp) + size_t idx, struct pivot_dimension **outp) { /* Convert most of the dimension. */ struct pivot_value *name; - char *error = decode_spvlb_value (table, in->name, encoding, &name); + char *error = decode_spvlb_value (table, in->name, &name); if (error) return error; @@ -630,7 +614,7 @@ decode_spvlb_dimension (const struct pivot_table *table, }; error = decode_spvlb_group (table, in->categories, in->n_categories, !in->props->hide_dim_label, out->root, - out, encoding); + out); if (error) goto error; @@ -740,7 +724,7 @@ decode_data_index (uint64_t in, const struct pivot_table *table, static char * WARN_UNUSED_RESULT decode_spvlb_cells (struct spvlb_cell **in, size_t n_in, - struct pivot_table *table, const char *encoding) + struct pivot_table *table) { if (!table->n_dimensions) return NULL; @@ -751,7 +735,7 @@ decode_spvlb_cells (struct spvlb_cell **in, size_t n_in, struct pivot_value *value; char *error = decode_data_index (in[i]->index, table, dindexes); if (!error) - error = decode_spvlb_value (table, in[i]->value, encoding, &value); + error = decode_spvlb_value (table, in[i]->value, &value); if (error) { free (dindexes); @@ -765,18 +749,18 @@ decode_spvlb_cells (struct spvlb_cell **in, size_t n_in, } static char * WARN_UNUSED_RESULT -decode_spvlb_footnote (const struct spvlb_footnote *in, const char *encoding, +decode_spvlb_footnote (const struct spvlb_footnote *in, size_t idx, struct pivot_table *table) { struct pivot_value *content; - char *error = decode_spvlb_value (table, in->text, encoding, &content); + char *error = decode_spvlb_value (table, in->text, &content); if (error) return error; struct pivot_value *marker = NULL; if (in->marker) { - error = decode_spvlb_value (table, in->marker, encoding, &marker); + error = decode_spvlb_value (table, in->marker, &marker); if (error) { pivot_value_destroy (content); @@ -830,18 +814,6 @@ decode_spvlb_table (const struct spvlb_table *in, struct pivot_table **outp) out->look = pivot_table_look_new_builtin_default (); out->settings = (struct fmt_settings) FMT_SETTINGS_INIT; - const struct spvlb_y1 *y1 = (in->formats->x0 ? in->formats->x0->y1 - : in->formats->x3 ? in->formats->x3->y1 - : NULL); - const char *encoding; - if (y1) - encoding = y1->charset; - else - { - const char *dot = strchr (in->formats->locale, '.'); - encoding = dot ? dot + 1 : "windows-1252"; - } - /* Display settings. */ out->look->show_numeric_markers = !in->ts->show_alphabetic_markers; out->rotate_inner_column_labels = in->header->rotate_inner_column_labels; @@ -897,8 +869,8 @@ decode_spvlb_table (const struct spvlb_table *in, struct pivot_table **outp) &out->sizing[TABLE_HORZ].keeps, &out->sizing[TABLE_HORZ].n_keeps); - out->notes = to_utf8_if_nonempty (in->ts->notes, encoding); - out->look->name = to_utf8_if_nonempty (in->ts->table_look, encoding); + out->notes = xstrdup_if_nonempty (in->ts->notes); + out->look->name = xstrdup_if_nonempty (in->ts->table_look); /* Print settings. */ out->look->print_all_layers = in->ps->all_layers; @@ -934,10 +906,13 @@ decode_spvlb_table (const struct spvlb_table *in, struct pivot_table **outp) out->small = in->formats->x3 ? in->formats->x3->small : 0; /* Command information. */ + const struct spvlb_y1 *y1 = (in->formats->x0 ? in->formats->x0->y1 + : in->formats->x3 ? in->formats->x3->y1 + : NULL); if (y1) { - out->command_local = to_utf8 (y1->command_local, encoding); - out->command_c = to_utf8 (y1->command, encoding); + out->command_local = xstrdup (y1->command_local); + out->command_c = xstrdup (y1->command); out->language = xstrdup (y1->language); /* charset? */ out->locale = xstrdup (y1->locale); @@ -948,8 +923,8 @@ decode_spvlb_table (const struct spvlb_table *in, struct pivot_table **outp) if (x3) { if (x3->dataset && x3->dataset[0] && x3->dataset[0] != 4) - out->dataset = to_utf8 (x3->dataset, encoding); - out->datafile = to_utf8_if_nonempty (x3->datafile, encoding); + out->dataset = xstrdup (x3->dataset); + out->datafile = xstrdup_if_nonempty (x3->datafile); out->date = x3->date; } @@ -971,36 +946,32 @@ decode_spvlb_table (const struct spvlb_table *in, struct pivot_table **outp) pivot_table_create_footnote__ (out, fn->n_footnotes - 1, NULL, NULL); for (size_t i = 0; i < fn->n_footnotes; i++) { - error = decode_spvlb_footnote (in->footnotes->footnotes[i], - encoding, i, out); + error = decode_spvlb_footnote (in->footnotes->footnotes[i], i, out); if (error) goto error; } } /* Title and caption. */ - error = decode_spvlb_value (out, in->titles->user_title, encoding, - &out->title); + error = decode_spvlb_value (out, in->titles->user_title, &out->title); if (error) goto error; - error = decode_spvlb_value (out, in->titles->subtype, encoding, - &out->subtype); + error = decode_spvlb_value (out, in->titles->subtype, &out->subtype); if (error) goto error; if (in->titles->corner_text) { error = decode_spvlb_value (out, in->titles->corner_text, - encoding, &out->corner_text); + &out->corner_text); if (error) goto error; } if (in->titles->caption) { - error = decode_spvlb_value (out, in->titles->caption, encoding, - &out->caption); + error = decode_spvlb_value (out, in->titles->caption, &out->caption); if (error) goto error; } @@ -1009,8 +980,7 @@ decode_spvlb_table (const struct spvlb_table *in, struct pivot_table **outp) /* Styles. */ for (size_t i = 0; i < PIVOT_N_AREAS; i++) { - error = decode_spvlb_area (in->areas->areas[i], &out->look->areas[i], - encoding); + error = decode_spvlb_area (in->areas->areas[i], &out->look->areas[i]); if (error) goto error; } @@ -1027,7 +997,7 @@ decode_spvlb_table (const struct spvlb_table *in, struct pivot_table **outp) for (size_t i = 0; i < out->n_dimensions; i++) { error = decode_spvlb_dimension (out, in->dimensions->dims[i], - i, encoding, &out->dimensions[i]); + i, &out->dimensions[i]); if (error) goto error; } @@ -1063,8 +1033,7 @@ decode_spvlb_table (const struct spvlb_table *in, struct pivot_table **outp) goto error; /* Data. */ - error = decode_spvlb_cells (in->cells->cells, in->cells->n_cells, out, - encoding); + error = decode_spvlb_cells (in->cells->cells, in->cells->n_cells, out); *outp = out; return NULL; -- 2.30.2