From 4e8257086ffc71bc5a1785fd86610921be677887 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 18 Feb 2011 21:55:18 -0800 Subject: [PATCH] format: Count prefix and suffix width in terms of display columns. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Until now, the prefixes and suffixes for custom currency formats (CCA, etc.) have been considered to occupy one display column per byte. This is fine for prefixes and suffixes like "$" or "%", but falls down badly with U+00A5 (¥) or U+20AC (€), which occupy two or three bytes, respectively, in UTF-8, while occupying only a single display column. This commit fixes the problem. It doesn't add a test yet because there are still some higher-level issues, but that will come in a later commit when those remaining issues are resolved. --- Smake | 1 + src/data/data-out.c | 26 +++++++++++++++----------- src/data/format.c | 26 ++++++++++++++++++-------- src/data/format.h | 12 +++++++++++- 4 files changed, 45 insertions(+), 20 deletions(-) diff --git a/Smake b/Smake index 6d54f5b7..2210e662 100644 --- a/Smake +++ b/Smake @@ -78,6 +78,7 @@ GNULIB_MODULES = \ unistr/u8-mbtouc \ unistr/u8-strlen \ unistr/u8-strncat \ + uniwidth/u8-strwidth \ unitypes \ unlocked-io \ vasprintf-posix \ diff --git a/src/data/data-out.c b/src/data/data-out.c index a30e7e70..bb774374 100644 --- a/src/data/data-out.c +++ b/src/data/data-out.c @@ -131,11 +131,12 @@ char * data_out_pool (const union value *input, const char *encoding, const struct fmt_spec *format, struct pool *pool) { + const struct fmt_number_style *style = settings_get_style (format->type); char *output; char *t ; assert (fmt_check_output (format)); - output = xmalloc (format->w + 1); + output = xmalloc (format->w + style->extra_bytes + 1); converters[format->type] (input, format, output); @@ -602,9 +603,9 @@ output_decimal (const struct rounder *r, const struct fmt_spec *format, the negative suffix, plus (if negative) the negative prefix. */ width = rounder_width (r, decimals, &integer_digits, &add_neg_prefix); - width += strlen (style->neg_suffix.s); + width += style->neg_suffix.width; if (add_neg_prefix) - width += strlen (style->neg_prefix.s); + width += style->neg_prefix.width; if (width > format->w) continue; @@ -659,8 +660,11 @@ output_decimal (const struct rounder *r, const struct fmt_spec *format, if (add_neg_prefix) p = stpcpy (p, style->neg_suffix.s); else - p = mempset (p, ' ', strlen (style->neg_suffix.s)); - assert (p == output + format->w); + p = mempset (p, ' ', style->neg_suffix.width); + + assert (p >= output + format->w); + assert (p <= output + format->w + style->extra_bytes); + *p = '\0'; return true; } @@ -681,9 +685,9 @@ output_scientific (double number, const struct fmt_spec *format, char buf[64], *p; /* Allocate minimum required space. */ - width = 6 + strlen (style->neg_suffix.s); + width = 6 + style->neg_suffix.width; if (number < 0) - width += strlen (style->neg_prefix.s); + width += style->neg_prefix.width; if (width > format->w) return false; @@ -739,11 +743,11 @@ output_scientific (double number, const struct fmt_spec *format, if (number < 0) p = stpcpy (p, style->neg_suffix.s); else - p = mempset (p, ' ', strlen (style->neg_suffix.s)); + p = mempset (p, ' ', style->neg_suffix.width); - assert (p == buf + format->w); - memcpy (output, buf, format->w); - output[format->w] = '\0'; + assert (p >= output + format->w); + assert (p <= output + format->w + style->extra_bytes); + *p = '\0'; return true; } diff --git a/src/data/format.c b/src/data/format.c index d3c68802..95e87a0d 100644 --- a/src/data/format.c +++ b/src/data/format.c @@ -20,6 +20,7 @@ #include #include +#include #include "data/identifier.h" #include "data/settings.h" @@ -113,7 +114,7 @@ fmt_settings_get_style (const struct fmt_settings *settings, /* Sets the number style for TYPE to have the given DECIMAL and GROUPING characters, negative prefix NEG_PREFIX, prefix PREFIX, suffix SUFFIX, and - negative suffix NEG_SUFFIX. */ + negative suffix NEG_SUFFIX. All of the strings are UTF-8 encoded. */ void fmt_settings_set_style (struct fmt_settings *settings, enum fmt_type type, char decimal, char grouping, @@ -121,6 +122,7 @@ fmt_settings_set_style (struct fmt_settings *settings, enum fmt_type type, const char *suffix, const char *neg_suffix) { struct fmt_number_style *style = &settings->styles[type]; + int total_bytes, total_width; assert (grouping == '.' || grouping == ',' || grouping == 0); assert (decimal == '.' || decimal == ','); @@ -134,6 +136,12 @@ fmt_settings_set_style (struct fmt_settings *settings, enum fmt_type type, fmt_affix_set (&style->neg_suffix, neg_suffix); style->decimal = decimal; style->grouping = grouping; + + total_bytes = (strlen (neg_prefix) + strlen (prefix) + + strlen (suffix) + strlen (neg_suffix)); + total_width = (style->neg_prefix.width + style->prefix.width + + style->suffix.width + style->neg_suffix.width); + style->extra_bytes = MAX (0, total_bytes - total_width); } /* Sets the decimal point character for the settings in S to DECIMAL. @@ -934,11 +942,12 @@ max_digits_for_bytes (int bytes) return map[bytes - 1]; } -/* Sets AFFIX's string value to S. */ +/* Sets AFFIX's string value to S, a UTF-8 encoded string. */ static void fmt_affix_set (struct fmt_affix *affix, const char *s) { affix->s = s[0] == '\0' ? CONST_CAST (char *, "") : xstrdup (s); + affix->width = u8_strwidth (CHAR_CAST (const uint8_t *, s), "UTF-8"); } /* Frees data in AFFIX. */ @@ -970,6 +979,7 @@ fmt_number_style_clone (struct fmt_number_style *new, fmt_affix_set (&new->neg_suffix, old->neg_suffix.s); new->decimal = old->decimal; new->grouping = old->grouping; + new->extra_bytes = old->extra_bytes; } /* Destroys a struct fmt_number_style. */ @@ -985,20 +995,20 @@ fmt_number_style_destroy (struct fmt_number_style *style) } } -/* Returns the total width of the standard prefix and suffix for - STYLE. */ +/* Returns the total width of the standard prefix and suffix for STYLE, in + display columns (e.g. as returned by u8_strwidth()). */ int fmt_affix_width (const struct fmt_number_style *style) { - return strlen (style->prefix.s) + strlen (style->suffix.s); + return style->prefix.width + style->suffix.width; } -/* Returns the total width of the negative prefix and suffix for - STYLE. */ +/* Returns the total width of the negative prefix and suffix for STYLE, in + display columns (e.g. as returned by u8_strwidth()). */ int fmt_neg_affix_width (const struct fmt_number_style *style) { - return strlen (style->neg_prefix.s) + strlen (style->neg_suffix.s); + return style->neg_prefix.width + style->neg_suffix.width; } /* Returns the struct fmt_desc for the given format TYPE. */ diff --git a/src/data/format.h b/src/data/format.h index 55643ab4..7df37445 100644 --- a/src/data/format.h +++ b/src/data/format.h @@ -146,7 +146,8 @@ void fmt_settings_set_style (struct fmt_settings *, enum fmt_type, /* A prefix or suffix for a numeric output format. */ struct fmt_affix { - char *s; /* String contents of affix. */ + char *s; /* String contents of affix, in UTF-8. */ + int width; /* Display width in columns (see wcwidth()). */ }; /* A numeric output style. */ @@ -158,6 +159,15 @@ struct fmt_number_style struct fmt_affix neg_suffix; /* Negative suffix. */ char decimal; /* Decimal point: '.' or ','. */ char grouping; /* Grouping character: ',', '.', or 0. */ + + /* A fmt_affix may require more bytes than its display width; for example, + U+00A5 (¥) is 3 bytes in UTF-8 but occupies only one display column. + This member is the sum of the number of bytes required by all of the + fmt_affix members in this struct, minus their display widths. Thus, it + can be used to size memory allocations: for example, the formatted + result of CCA20.5 requires no more than (20 + extra_bytes) bytes in + UTF-8. */ + int extra_bytes; }; int fmt_affix_width (const struct fmt_number_style *); -- 2.30.2