From 97fed7964411b7bed4d8ad5bc895966de7e6b2b3 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 23 Apr 2011 11:59:49 -0700 Subject: [PATCH] short-names: Consider character encoding when making short names. Variable names and short names are always in UTF-8, but the length of short names needs to be limited to 8 bytes in the dictionary encoding, not in UTF-8. This commit fixes that problem. --- src/data/short-names.c | 58 ++++++++++++++++-------------------------- src/data/variable.c | 12 +++++---- 2 files changed, 29 insertions(+), 41 deletions(-) diff --git a/src/data/short-names.c b/src/data/short-names.c index fbf813a3..aa157bae 100644 --- a/src/data/short-names.c +++ b/src/data/short-names.c @@ -23,6 +23,7 @@ #include "data/variable.h" #include "libpspp/assertion.h" #include "libpspp/compiler.h" +#include "libpspp/i18n.h" #include "libpspp/message.h" #include "libpspp/str.h" #include "libpspp/stringi-set.h" @@ -30,37 +31,6 @@ #include "gettext.h" #define _(msgid) gettext (msgid) -/* Sets V's short name to BASE, followed by a suffix of the form - _A, _B, _C, ..., _AA, _AB, etc. according to the value of - SUFFIX_NUMBER. Truncates BASE as necessary to fit. */ -static void -set_var_short_name_suffix (struct variable *v, size_t i, - const char *base, int suffix_number) -{ - char suffix[SHORT_NAME_LEN + 1]; - char short_name[SHORT_NAME_LEN + 1]; - int len, ofs; - - assert (suffix_number >= 0); - - /* Compose suffix. */ - suffix[0] = '_'; - if (!str_format_26adic (suffix_number, &suffix[1], sizeof suffix - 1)) - msg (SE, _("Variable suffix too large.")); - len = strlen (suffix); - - /* Append suffix to V's short name. */ - str_copy_trunc (short_name, sizeof short_name, base); - if (strlen (short_name) + len > SHORT_NAME_LEN) - ofs = SHORT_NAME_LEN - len; - else - ofs = strlen (short_name); - strcpy (short_name + ofs, suffix); - - /* Set name. */ - var_set_short_name (v, i, short_name); -} - static void claim_short_name (struct variable *v, size_t i, struct stringi_set *short_names) @@ -83,13 +53,28 @@ assign_short_name (struct variable *v, size_t i, for (trial = 0; ; trial++) { + char suffix[SHORT_NAME_LEN + 1]; + char *short_name; + + /* Compose suffix. */ if (trial == 0) - var_set_short_name (v, i, var_get_name (v)); + suffix[0] = '\0'; else - set_var_short_name_suffix (v, i, var_get_name (v), trial); + { + suffix[0] = '_'; + str_format_26adic (trial, &suffix[1], sizeof suffix - 1); + } - if (stringi_set_insert (short_names, var_get_short_name (v, i))) - break; + /* Set name. */ + short_name = utf8_encoding_concat (var_get_name (v), suffix, + var_get_encoding (v), SHORT_NAME_LEN); + if (stringi_set_insert (short_names, short_name)) + { + var_set_short_name (v, i, short_name); + free (short_name); + return; + } + free (short_name); } } @@ -133,7 +118,8 @@ short_names_assign (struct dictionary *d) { struct variable *v = dict_get_var (d, i); const char *name = var_get_name (v); - if (strlen (name) <= SHORT_NAME_LEN) + int len = recode_string_len (var_get_encoding (v), "UTF-8", name, -1); + if (len <= SHORT_NAME_LEN) var_set_short_name (v, 0, name); } diff --git a/src/data/variable.c b/src/data/variable.c index c83c31ad..17a1542c 100644 --- a/src/data/variable.c +++ b/src/data/variable.c @@ -782,10 +782,12 @@ var_get_short_name (const struct variable *var, size_t idx) return idx < var->short_name_cnt ? var->short_names[idx] : NULL; } -/* Sets VAR's short name with the given IDX to SHORT_NAME, - truncating it to SHORT_NAME_LEN characters and converting it - to uppercase in the process. Specifying a null pointer for - SHORT_NAME clears the specified short name. */ +/* Sets VAR's short name with the given IDX to the UTF-8 string SHORT_NAME. + The caller must already have checked that, in the dictionary encoding, + SHORT_NAME is no more than SHORT_NAME_LEN bytes long. The new short name + will be converted to uppercase. + + Specifying a null pointer for SHORT_NAME clears the specified short name. */ void var_set_short_name (struct variable *var, size_t idx, const char *short_name) { @@ -811,7 +813,7 @@ var_set_short_name (struct variable *var, size_t idx, const char *short_name) for (i = old_cnt; i < var->short_name_cnt; i++) var->short_names[i] = NULL; } - var->short_names[idx] = xstrndup (short_name, MAX_SHORT_STRING); + var->short_names[idx] = xstrdup (short_name); str_uppercase (var->short_names[idx]); } -- 2.30.2