From e375202ee45066d689317bc0d67c564e0a5894da Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Wed, 24 Jan 2007 00:56:40 +0000 Subject: [PATCH] Add an optional argument specifying transliteration. --- ChangeLog | 12 ++++ lib/striconveha.c | 131 ++++++++++++++++++++++++++++++++++++-------- lib/striconveha.h | 10 ++++ modules/striconveha | 4 ++ 4 files changed, 134 insertions(+), 23 deletions(-) diff --git a/ChangeLog b/ChangeLog index 46e11d7654..387656f4c0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2007-01-23 Bruno Haible + + * lib/striconveha.h: Include . + (mem_iconveha, str_iconveha): Add 'transliterate' argument. + * lib/striconveha.c: Include allocsa.h, strdup.h, c-strcase.h. + (mem_iconveha_notranslit): Renamed from mem_iconveha. + (mem_iconveha): New function. + (str_iconveha_notranslit): Renamed from str_iconveha. + (str_iconveha): New function. + * modules/striconveha (Depends-on): Add stdbool, allocsa, strdup, + c-strcase. + 2007-01-23 Bruno Haible * lib/striconveha.c (mem_iconveha): Fix endless recursion. Try all diff --git a/lib/striconveha.c b/lib/striconveha.c index f17293ff0d..78dfddc42d 100644 --- a/lib/striconveha.c +++ b/lib/striconveha.c @@ -25,6 +25,10 @@ #include #include +#include "allocsa.h" +#include "strdup.h" +#include "c-strcase.h" + #define SIZEOF(a) (sizeof(a)/sizeof(a[0])) @@ -143,12 +147,13 @@ uniconv_register_autodetect (const char *name, } } -int -mem_iconveha (const char *src, size_t srclen, - const char *from_codeset, const char *to_codeset, - enum iconv_ilseq_handler handler, - size_t *offsets, - char **resultp, size_t *lengthp) +/* Like mem_iconveha, except no handling of transliteration. */ +static int +mem_iconveha_notranslit (const char *src, size_t srclen, + const char *from_codeset, const char *to_codeset, + enum iconv_ilseq_handler handler, + size_t *offsets, + char **resultp, size_t *lengthp) { int retval = mem_iconveh (src, srclen, from_codeset, to_codeset, handler, offsets, resultp, lengthp); @@ -171,10 +176,10 @@ mem_iconveha (const char *src, size_t srclen, encodings = alias->encodings_to_try; do { - retval = mem_iconveha (src, srclen, - *encodings, to_codeset, - iconveh_error, offsets, - resultp, lengthp); + retval = mem_iconveha_notranslit (src, srclen, + *encodings, to_codeset, + iconveh_error, offsets, + resultp, lengthp); if (!(retval < 0 && errno == EILSEQ)) return retval; encodings++; @@ -185,10 +190,10 @@ mem_iconveha (const char *src, size_t srclen, encodings = alias->encodings_to_try; do { - retval = mem_iconveha (src, srclen, - *encodings, to_codeset, - handler, offsets, - resultp, lengthp); + retval = mem_iconveha_notranslit (src, srclen, + *encodings, to_codeset, + handler, offsets, + resultp, lengthp); if (!(retval < 0 && errno == EILSEQ)) return retval; encodings++; @@ -205,10 +210,52 @@ mem_iconveha (const char *src, size_t srclen, } } -char * -str_iconveha (const char *src, +int +mem_iconveha (const char *src, size_t srclen, const char *from_codeset, const char *to_codeset, - enum iconv_ilseq_handler handler) + bool transliterate, + enum iconv_ilseq_handler handler, + size_t *offsets, + char **resultp, size_t *lengthp) +{ + if (srclen == 0) + { + /* Nothing to convert. */ + *lengthp = 0; + return 0; + } + + /* When using GNU libc >= 2.2 or GNU libiconv >= 1.5, + we want to use transliteration. */ +#if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || _LIBICONV_VERSION >= 0x0105 + if (transliterate) + { + int retval; + size_t len = strlen (to_codeset); + char *to_codeset_suffixed = (char *) allocsa (len + 10 + 1); + memcpy (to_codeset_suffixed, to_codeset, len); + memcpy (to_codeset_suffixed + len, "//TRANSLIT", 10 + 1); + + retval = mem_iconveha_notranslit (src, srclen, + from_codeset, to_codeset_suffixed, + handler, offsets, resultp, lengthp); + + freesa (to_codeset_suffixed); + + return retval; + } + else +#endif + return mem_iconveha_notranslit (src, srclen, + from_codeset, to_codeset, + handler, offsets, resultp, lengthp); +} + +/* Like str_iconveha, except no handling of transliteration. */ +static char * +str_iconveha_notranslit (const char *src, + const char *from_codeset, const char *to_codeset, + enum iconv_ilseq_handler handler) { char *result = str_iconveh (src, from_codeset, to_codeset, handler); @@ -231,9 +278,9 @@ str_iconveha (const char *src, encodings = alias->encodings_to_try; do { - result = str_iconveha (src, - *encodings, to_codeset, - iconveh_error); + result = str_iconveha_notranslit (src, + *encodings, to_codeset, + iconveh_error); if (!(result == NULL && errno == EILSEQ)) return result; encodings++; @@ -244,9 +291,9 @@ str_iconveha (const char *src, encodings = alias->encodings_to_try; do { - result = str_iconveha (src, - *encodings, to_codeset, - handler); + result = str_iconveha_notranslit (src, + *encodings, to_codeset, + handler); if (!(result == NULL && errno == EILSEQ)) return result; encodings++; @@ -262,3 +309,41 @@ str_iconveha (const char *src, return NULL; } } + +char * +str_iconveha (const char *src, + const char *from_codeset, const char *to_codeset, + bool transliterate, + enum iconv_ilseq_handler handler) +{ + if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0) + { + char *result = strdup (src); + + if (result == NULL) + errno = ENOMEM; + return result; + } + + /* When using GNU libc >= 2.2 or GNU libiconv >= 1.5, + we want to use transliteration. */ +#if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || _LIBICONV_VERSION >= 0x0105 + if (transliterate) + { + char *result; + size_t len = strlen (to_codeset); + char *to_codeset_suffixed = (char *) allocsa (len + 10 + 1); + memcpy (to_codeset_suffixed, to_codeset, len); + memcpy (to_codeset_suffixed + len, "//TRANSLIT", 10 + 1); + + result = str_iconveha_notranslit (src, from_codeset, to_codeset_suffixed, + handler); + + freesa (to_codeset_suffixed); + + return result; + } + else +#endif + return str_iconveha_notranslit (src, from_codeset, to_codeset, handler); +} diff --git a/lib/striconveha.h b/lib/striconveha.h index 2798f53f46..9c4c676f3f 100644 --- a/lib/striconveha.h +++ b/lib/striconveha.h @@ -19,6 +19,8 @@ #ifndef _STRICONVEHA_H #define _STRICONVEHA_H +#include + #include "striconveh.h" @@ -30,6 +32,9 @@ extern "C" { /* Convert an entire string from one encoding to another, using iconv. The original string is at [SRC,...,SRC+SRCLEN-1]. The "from" encoding can also be a name defined for autodetection. + If TRANSLITERATE is true, transliteration will attempted to avoid conversion + errors, for iconv implementations that support this. Usually you'll choose + TRANSLITERATE = true if HANDLER != iconveh_error. If OFFSETS is not NULL, it should point to an array of SRCLEN integers; this array is filled with offsets into the result, i.e. the character starting at SRC[i] corresponds to the character starting at (*RESULTP)[OFFSETS[i]], @@ -44,6 +49,7 @@ extern "C" { extern int mem_iconveha (const char *src, size_t srclen, const char *from_codeset, const char *to_codeset, + bool transliterate, enum iconv_ilseq_handler handler, size_t *offsets, char **resultp, size_t *lengthp); @@ -53,12 +59,16 @@ extern int Both the "from" and the "to" encoding must use a single NUL byte at the end of the string (i.e. not UCS-2, UCS-4, UTF-16, UTF-32). The "from" encoding can also be a name defined for autodetection. + If TRANSLITERATE is true, transliteration will attempted to avoid conversion + errors, for iconv implementations that support this. Usually you'll choose + TRANSLITERATE = true if HANDLER != iconveh_error. Allocate a malloced memory block for the result. Return value: the freshly allocated resulting NUL-terminated string if successful, otherwise NULL and errno set. */ extern char * str_iconveha (const char *src, const char *from_codeset, const char *to_codeset, + bool transliterate, enum iconv_ilseq_handler handler); diff --git a/modules/striconveha b/modules/striconveha index 9b44e24d68..964bb9174f 100644 --- a/modules/striconveha +++ b/modules/striconveha @@ -7,7 +7,11 @@ lib/striconveha.h lib/striconveha.c Depends-on: +stdbool striconveh +allocsa +strdup +c-strcase configure.ac: -- 2.30.2