From 771ffe34fdff1d8f2c40d697cb363829d01ff995 Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Wed, 6 Sep 2006 12:21:39 +0000 Subject: [PATCH] New module 'striconv'. --- lib/striconv.c | 430 +++++++++++++++++++++++++++++++++++++++++++++++ lib/striconv.h | 75 +++++++++ modules/striconv | 31 ++++ 3 files changed, 536 insertions(+) create mode 100644 lib/striconv.c create mode 100644 lib/striconv.h create mode 100644 modules/striconv diff --git a/lib/striconv.c b/lib/striconv.c new file mode 100644 index 0000000000..4ca983914f --- /dev/null +++ b/lib/striconv.c @@ -0,0 +1,430 @@ +/* Charset conversion. + Copyright (C) 2001-2006 Free Software Foundation, Inc. + Written by Bruno Haible and Simon Josefsson. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +/* Specification. */ +#include "striconv.h" + +#include +#include +#include + +#if HAVE_ICONV +# include +/* Get MB_LEN_MAX, CHAR_BIT. */ +# include +#endif + +#include "strdup.h" +#include "c-strcase.h" + +#ifndef SIZE_MAX +# define SIZE_MAX ((size_t) -1) +#endif + + +#if HAVE_ICONV + +int +mem_cd_iconv (const char *src, size_t srclen, iconv_t cd, + char **resultp, size_t *lengthp) +{ +# define tmpbufsize 4096 + size_t length; + char *result; + + /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ +# if defined _LIBICONV_VERSION \ + || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + /* Set to the initial state. */ + iconv (cd, NULL, NULL, NULL, NULL); +# endif + + /* Determine the length we need. */ + { + size_t count = 0; + char tmpbuf[tmpbufsize]; + const char *inptr = src; + size_t insize = srclen; + + while (insize > 0) + { + char *outptr = tmpbuf; + size_t outsize = tmpbufsize; + size_t res = iconv (cd, + (ICONV_CONST char **) &inptr, &insize, + &outptr, &outsize); + + if (res == (size_t)(-1)) + { + if (errno == E2BIG) + ; + else if (errno == EINVAL) + break; + else + return -1; + } +# if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi) + /* Irix iconv() inserts a NUL byte if it cannot convert. */ + else if (res > 0) + { + errno = EILSEQ; + return -1; + } +# endif + count += outptr - tmpbuf; + } + /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ +# if defined _LIBICONV_VERSION \ + || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + { + char *outptr = tmpbuf; + size_t outsize = tmpbufsize; + size_t res = iconv (cd, NULL, NULL, &outptr, &outsize); + + if (res == (size_t)(-1)) + return -1; + count += outptr - tmpbuf; + } +# endif + length = count; + } + + if (length == 0) + { + *lengthp = 0; + return 0; + } + result = (*resultp != NULL ? realloc (*resultp, length) : malloc (length)); + if (result == NULL) + { + errno = ENOMEM; + return -1; + } + *resultp = result; + *lengthp = length; + + /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ +# if defined _LIBICONV_VERSION \ + || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + /* Return to the initial state. */ + iconv (cd, NULL, NULL, NULL, NULL); +# endif + + /* Do the conversion for real. */ + { + const char *inptr = src; + size_t insize = srclen; + char *outptr = result; + size_t outsize = length; + + while (insize > 0) + { + size_t res = iconv (cd, + (ICONV_CONST char **) &inptr, &insize, + &outptr, &outsize); + + if (res == (size_t)(-1)) + { + if (errno == EINVAL) + break; + else + return -1; + } +# if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi) + /* Irix iconv() inserts a NUL byte if it cannot convert. */ + else if (res > 0) + { + errno = EILSEQ; + return -1; + } +# endif + } + /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ +# if defined _LIBICONV_VERSION \ + || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + { + size_t res = iconv (cd, NULL, NULL, &outptr, &outsize); + + if (res == (size_t)(-1)) + return -1; + } +# endif + if (outsize != 0) + abort (); + } + + return 0; +# undef tmpbufsize +} + +char * +str_cd_iconv (const char *src, iconv_t cd) +{ + /* For most encodings, a trailing NUL byte in the input will be converted + to a trailing NUL byte in the output. But not for UTF-7. So that this + function is usable for UTF-7, we have to exclude the NUL byte from the + conversion and add it by hand afterwards. */ +# if PROBABLY_SLOWER + + char *result = NULL; + size_t length; + int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length); + char *final_result; + + if (retval < 0) + { + if (result != NULL) + { + int saved_errno = errno; + free (result); + errno = saved_errno; + } + return NULL; + } + + /* Add the terminating NUL byte. */ + final_result = + (result != NULL ? realloc (result, length + 1) : malloc (length + 1)); + if (final_result == NULL) + { + if (result != NULL) + free (result); + errno = ENOMEM; + return NULL; + } + final_result[length] = '\0'; + + return final_result; + +# else + + char *result; + size_t result_size; + size_t length; + const char *inptr = src; + size_t inbytes_remaining = strlen (src); + + /* Make a guess for the worst-case output size, in order to avoid a + realloc. It's OK if the guess is wrong as long as it is not zero and + doesn't lead to an integer overflow. */ + result_size = inbytes_remaining; + { + size_t approx_sqrt_SIZE_MAX = SIZE_MAX >> (sizeof (size_t) * CHAR_BIT / 2); + if (result_size <= approx_sqrt_SIZE_MAX / MB_LEN_MAX) + result_size *= MB_LEN_MAX; + } + result_size += 1; /* for the terminating NUL */ + + result = (char *) malloc (result_size); + if (result == NULL) + { + errno = ENOMEM; + return NULL; + } + + /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ +# if defined _LIBICONV_VERSION \ + || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + /* Set to the initial state. */ + iconv (cd, NULL, NULL, NULL, NULL); +# endif + + /* Do the conversion. */ + { + char *outptr = result; + size_t outbytes_remaining = result_size - 1; + + for (;;) + { + /* Here inptr + inbytes_remaining = src + strlen (src), + outptr + outbytes_remaining = result + result_size - 1. */ + size_t res = iconv (cd, + (ICONV_CONST char **) &inptr, &inbytes_remaining, + &outptr, &outbytes_remaining); + + if (res == (size_t)(-1)) + { + if (errno == EINVAL) + break; + else if (errno == E2BIG) + { + size_t used = outptr - result; + size_t newsize = result_size * 2; + char *newresult; + + if (!(newsize > result_size)) + { + errno = ENOMEM; + goto failed; + } + newresult = (char *) realloc (result, newsize); + if (newresult == NULL) + { + errno = ENOMEM; + goto failed; + } + result = newresult; + result_size = newsize; + outptr = result + used; + outbytes_remaining = result_size - 1 - used; + } + else + goto failed; + } +# if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi) + /* Irix iconv() inserts a NUL byte if it cannot convert. */ + else if (res > 0) + { + errno = EILSEQ; + goto failed; + } +# endif + else + break; + } + /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ +# if defined _LIBICONV_VERSION \ + || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + for (;;) + { + /* Here outptr + outbytes_remaining = result + result_size - 1. */ + size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining); + + if (res == (size_t)(-1)) + { + if (errno == E2BIG) + { + size_t used = outptr - result; + size_t newsize = result_size * 2; + char *newresult; + + if (!(newsize > result_size)) + { + errno = ENOMEM; + goto failed; + } + newresult = (char *) realloc (result, newsize); + if (newresult == NULL) + { + errno = ENOMEM; + goto failed; + } + result = newresult; + result_size = newsize; + outptr = result + used; + outbytes_remaining = result_size - 1 - used; + } + else + goto failed; + } + else + break; + } +# endif + + /* Add the terminating NUL byte. */ + *outptr++ = '\0'; + + length = outptr - result; + } + + /* Give away unused memory. */ + if (length < result_size) + { + char *smaller_result = (char *) realloc (result, length); + + if (smaller_result != NULL) + result = smaller_result; + } + + return result; + + failed: + { + int saved_errno = errno; + free (result); + errno = saved_errno; + return NULL; + } + +# endif +} + +#endif + +char * +str_iconv (const char *src, const char *from_codeset, const char *to_codeset) +{ + if (c_strcasecmp (from_codeset, to_codeset) == 0) + return strdup (src); + else + { +#if HAVE_ICONV + iconv_t cd; + char *result; + + /* Avoid glibc-2.1 bug with EUC-KR. */ +# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION + if (c_strcasecmp (from_codeset, "EUC-KR") == 0 + || c_strcasecmp (to_codeset, "EUC-KR") == 0) + { + errno = EINVAL; + return NULL; + } +# endif + cd = iconv_open (to_codeset, from_codeset); + if (cd == (iconv_t) -1) + return NULL; + + result = str_cd_iconv (src, cd); + + if (result == NULL) + { + /* Close cd, but preserve the errno from str_cd_iconv. */ + int saved_errno = errno; + iconv_close (cd); + errno = saved_errno; + } + else + { + if (iconv_close (cd) < 0) + { + /* Return NULL, but free the allocated memory, and while doing + that, preserve the errno from iconv_close. */ + int saved_errno = errno; + free (result); + errno = saved_errno; + return NULL; + } + } + return result; +#else + /* This is a different error code than if iconv_open existed but didn't + support from_codeset and to_codeset, so that the caller can emit + an error message such as + "iconv() is not supported. Installing GNU libiconv and + then reinstalling this package would fix this." */ + errno = ENOSYS; + return NULL; +#endif + } +} diff --git a/lib/striconv.h b/lib/striconv.h new file mode 100644 index 0000000000..dd75adef68 --- /dev/null +++ b/lib/striconv.h @@ -0,0 +1,75 @@ +/* Charset conversion. + Copyright (C) 2001-2004, 2006 Free Software Foundation, Inc. + Written by Bruno Haible and Simon Josefsson. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#ifndef _STRICONV_H +#define _STRICONV_H + +#include +#if HAVE_ICONV +#include +#endif + + +#ifdef __cplusplus +extern "C" { +#endif + + +#if HAVE_ICONV + +/* Convert an entire string from one encoding to another, using iconv. + The original string is at [SRC,...,SRC+SRCLEN-1]. + The conversion descriptor is passed as CD. + *RESULTP should initially contain NULL or a malloced memory block. + May change the size of the allocated memory block in *RESULTP, storing + its new address in *RESULTP and its new length in *LENGTHP. + Return value: 0 if successful, otherwise -1 and errno set. + If successful, the resulting string is stored in *RESULTP and its length + in *LENGTHP. */ +extern int mem_cd_iconv (const char *src, size_t srclen, iconv_t cd, + char **resultp, size_t *lengthp); + +/* Convert an entire string from one encoding to another, using iconv. + The original string is the NUL-terminated string starting at SRC. + The conversion descriptor is passed as CD. Both the "from" and the "to" + encoding must use a single NUL byte at the end of the string (i.e. not + UCS-2, UCS-4, UTF-16, UTF-32). + Allocate a malloced memory block for the result. + Return value: the freshly allocated resulting NUL-terminated string if + successful, otherwise NULL and errno set. */ +extern char * str_cd_iconv (const char *src, iconv_t cd); + +#endif + +/* Convert an entire string from one encoding to another, using iconv. + The original string is the NUL-terminated string starting at SRC. + Both the "from" and the "to" encoding must use a single NUL byte at the + end of the string (i.e. not UCS-2, UCS-4, UTF-16, UTF-32). + Allocate a malloced memory block for the result. + Return value: the freshly allocated resulting NUL-terminated string if + successful, otherwise NULL and errno set. */ +extern char * str_iconv (const char *src, + const char *from_codeset, const char *to_codeset); + + +#ifdef __cplusplus +} +#endif + + +#endif /* _STRICONV_H */ diff --git a/modules/striconv b/modules/striconv new file mode 100644 index 0000000000..887681e281 --- /dev/null +++ b/modules/striconv @@ -0,0 +1,31 @@ +Description: +Character set conversion of strings made easy, uses iconv. + +Files: +lib/striconv.h +lib/striconv.c + +Depends-on: +iconv +strdup +c-strcase + +configure.ac: + +Makefile.am: +lib_SOURCES += striconv.h striconv.c +if GL_COND_LIBTOOL +lib_LDFLAGS += $(LTLIBICONV) +else +lib_LDFLAGS += $(LIBICONV) +endif + +Include: +"striconv.h" + +License: +LGPL + +Maintainer: +Bruno Haible, Simon Josefsson + -- 2.30.2