/* Character set conversion with error handling.
- Copyright (C) 2001-2008 Free Software Foundation, Inc.
+ Copyright (C) 2001-2009 Free Software Foundation, Inc.
Written by Bruno Haible and Simon Josefsson.
This program is free software: you can redistribute it and/or modify
#if HAVE_ICONV
-/* The caller must provide CD, CD1, CD2, not just CD, because when a conversion
- error occurs, we may have to determine the Unicode representation of the
- inconvertible character. */
+/* The caller must provide an iconveh_t, not just an iconv_t, because when a
+ conversion error occurs, we may have to determine the Unicode representation
+ of the inconvertible character. */
+
+int
+iconveh_open (const char *to_codeset, const char *from_codeset, iconveh_t *cdp)
+{
+ iconv_t cd;
+ iconv_t cd1;
+ iconv_t cd2;
+
+ /* Avoid glibc-2.1 bug with EUC-KR. */
+# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
+ if (c_strcasecmp (from_codeset, "EUC-KR") == 0
+ || c_strcasecmp (to_codeset, "EUC-KR") == 0)
+ {
+ errno = EINVAL;
+ return -1;
+ }
+# endif
+
+ cd = iconv_open (to_codeset, from_codeset);
+
+ if (STRCASEEQ (from_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0))
+ cd1 = (iconv_t)(-1);
+ else
+ {
+ cd1 = iconv_open ("UTF-8", from_codeset);
+ if (cd1 == (iconv_t)(-1))
+ {
+ int saved_errno = errno;
+ if (cd != (iconv_t)(-1))
+ iconv_close (cdp->cd);
+ errno = saved_errno;
+ return -1;
+ }
+ }
+
+ if (STRCASEEQ (to_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0)
+# if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || _LIBICONV_VERSION >= 0x0105
+ || c_strcasecmp (to_codeset, "UTF-8//TRANSLIT") == 0
+# endif
+ )
+ cd2 = (iconv_t)(-1);
+ else
+ {
+ cd2 = iconv_open (to_codeset, "UTF-8");
+ if (cd2 == (iconv_t)(-1))
+ {
+ int saved_errno = errno;
+ if (cd1 != (iconv_t)(-1))
+ iconv_close (cd1);
+ if (cd != (iconv_t)(-1))
+ iconv_close (cd);
+ errno = saved_errno;
+ return -1;
+ }
+ }
+
+ cdp->cd = cd;
+ cdp->cd1 = cd1;
+ cdp->cd2 = cd2;
+ return 0;
+}
+
+int
+iconveh_close (const iconveh_t *cd)
+{
+ if (cd->cd2 != (iconv_t)(-1) && iconv_close (cd->cd2) < 0)
+ {
+ /* Return -1, but preserve the errno from iconv_close. */
+ int saved_errno = errno;
+ if (cd->cd1 != (iconv_t)(-1))
+ iconv_close (cd->cd1);
+ if (cd->cd != (iconv_t)(-1))
+ iconv_close (cd->cd);
+ errno = saved_errno;
+ return -1;
+ }
+ if (cd->cd1 != (iconv_t)(-1) && iconv_close (cd->cd1) < 0)
+ {
+ /* Return -1, but preserve the errno from iconv_close. */
+ int saved_errno = errno;
+ if (cd->cd != (iconv_t)(-1))
+ iconv_close (cd->cd);
+ errno = saved_errno;
+ return -1;
+ }
+ if (cd->cd != (iconv_t)(-1) && iconv_close (cd->cd) < 0)
+ return -1;
+ return 0;
+}
/* iconv_carefully is like iconv, except that it stops as soon as it encounters
a conversion error, and it returns in *INCREMENTED a boolean telling whether
int
mem_cd_iconveh (const char *src, size_t srclen,
- iconv_t cd, iconv_t cd1, iconv_t cd2,
+ const iconveh_t *cd,
enum iconv_ilseq_handler handler,
size_t *offsets,
char **resultp, size_t *lengthp)
{
- return mem_cd_iconveh_internal (src, srclen, cd, cd1, cd2, handler, 0,
- offsets, resultp, lengthp);
+ return mem_cd_iconveh_internal (src, srclen, cd->cd, cd->cd1, cd->cd2,
+ handler, 0, offsets, resultp, lengthp);
}
char *
str_cd_iconveh (const char *src,
- iconv_t cd, iconv_t cd1, iconv_t cd2,
+ const iconveh_t *cd,
enum iconv_ilseq_handler handler)
{
/* For most encodings, a trailing NUL byte in the input will be converted
char *result = NULL;
size_t length = 0;
int retval = mem_cd_iconveh_internal (src, strlen (src),
- cd, cd1, cd2, handler, 1, NULL,
- &result, &length);
+ cd->cd, cd->cd1, cd->cd2, handler, 1,
+ NULL, &result, &length);
if (retval < 0)
{
else
{
#if HAVE_ICONV
- iconv_t cd;
- iconv_t cd1;
- iconv_t cd2;
+ iconveh_t cd;
char *result;
size_t length;
int retval;
- /* Avoid glibc-2.1 bug with EUC-KR. */
-# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
- if (c_strcasecmp (from_codeset, "EUC-KR") == 0
- || c_strcasecmp (to_codeset, "EUC-KR") == 0)
- {
- errno = EINVAL;
- return -1;
- }
-# endif
-
- cd = iconv_open (to_codeset, from_codeset);
-
- if (STRCASEEQ (from_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0))
- cd1 = (iconv_t)(-1);
- else
- {
- cd1 = iconv_open ("UTF-8", from_codeset);
- if (cd1 == (iconv_t)(-1))
- {
- int saved_errno = errno;
- if (cd != (iconv_t)(-1))
- iconv_close (cd);
- errno = saved_errno;
- return -1;
- }
- }
-
- if (STRCASEEQ (to_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0)
-# if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || _LIBICONV_VERSION >= 0x0105
- || c_strcasecmp (to_codeset, "UTF-8//TRANSLIT") == 0
-# endif
- )
- cd2 = (iconv_t)(-1);
- else
- {
- cd2 = iconv_open (to_codeset, "UTF-8");
- if (cd2 == (iconv_t)(-1))
- {
- int saved_errno = errno;
- if (cd1 != (iconv_t)(-1))
- iconv_close (cd1);
- if (cd != (iconv_t)(-1))
- iconv_close (cd);
- errno = saved_errno;
- return -1;
- }
- }
+ if (iconveh_open (to_codeset, from_codeset, &cd) < 0)
+ return -1;
result = *resultp;
length = *lengthp;
- retval = mem_cd_iconveh (src, srclen, cd, cd1, cd2, handler, offsets,
+ retval = mem_cd_iconveh (src, srclen, &cd, handler, offsets,
&result, &length);
if (retval < 0)
{
- /* Close cd, cd1, cd2, but preserve the errno from str_cd_iconv. */
+ /* Close cd, but preserve the errno from str_cd_iconv. */
int saved_errno = errno;
- if (cd2 != (iconv_t)(-1))
- iconv_close (cd2);
- if (cd1 != (iconv_t)(-1))
- iconv_close (cd1);
- if (cd != (iconv_t)(-1))
- iconv_close (cd);
+ iconveh_close (&cd);
errno = saved_errno;
}
else
{
- if (cd2 != (iconv_t)(-1) && iconv_close (cd2) < 0)
- {
- /* Return -1, but free the allocated memory, and while doing
- that, preserve the errno from iconv_close. */
- int saved_errno = errno;
- if (cd1 != (iconv_t)(-1))
- iconv_close (cd1);
- if (cd != (iconv_t)(-1))
- iconv_close (cd);
- if (result != *resultp && result != NULL)
- free (result);
- errno = saved_errno;
- return -1;
- }
- if (cd1 != (iconv_t)(-1) && iconv_close (cd1) < 0)
+ if (iconveh_close (&cd) < 0)
{
/* Return -1, but free the allocated memory, and while doing
- that, preserve the errno from iconv_close. */
- int saved_errno = errno;
- if (cd != (iconv_t)(-1))
- iconv_close (cd);
- if (result != *resultp && result != NULL)
- free (result);
- errno = saved_errno;
- return -1;
- }
- if (cd != (iconv_t)(-1) && iconv_close (cd) < 0)
- {
- /* Return -1, but free the allocated memory, and while doing
- that, preserve the errno from iconv_close. */
+ that, preserve the errno from iconveh_close. */
int saved_errno = errno;
if (result != *resultp && result != NULL)
free (result);
else
{
#if HAVE_ICONV
- iconv_t cd;
- iconv_t cd1;
- iconv_t cd2;
+ iconveh_t cd;
char *result;
- /* Avoid glibc-2.1 bug with EUC-KR. */
-# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
- if (c_strcasecmp (from_codeset, "EUC-KR") == 0
- || c_strcasecmp (to_codeset, "EUC-KR") == 0)
- {
- errno = EINVAL;
- return NULL;
- }
-# endif
+ if (iconveh_open (to_codeset, from_codeset, &cd) < 0)
+ return NULL;
- cd = iconv_open (to_codeset, from_codeset);
-
- if (STRCASEEQ (from_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0))
- cd1 = (iconv_t)(-1);
- else
- {
- cd1 = iconv_open ("UTF-8", from_codeset);
- if (cd1 == (iconv_t)(-1))
- {
- int saved_errno = errno;
- if (cd != (iconv_t)(-1))
- iconv_close (cd);
- errno = saved_errno;
- return NULL;
- }
- }
-
- if (STRCASEEQ (to_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0)
-# if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || _LIBICONV_VERSION >= 0x0105
- || c_strcasecmp (to_codeset, "UTF-8//TRANSLIT") == 0
-# endif
- )
- cd2 = (iconv_t)(-1);
- else
- {
- cd2 = iconv_open (to_codeset, "UTF-8");
- if (cd2 == (iconv_t)(-1))
- {
- int saved_errno = errno;
- if (cd1 != (iconv_t)(-1))
- iconv_close (cd1);
- if (cd != (iconv_t)(-1))
- iconv_close (cd);
- errno = saved_errno;
- return NULL;
- }
- }
-
- result = str_cd_iconveh (src, cd, cd1, cd2, handler);
+ result = str_cd_iconveh (src, &cd, handler);
if (result == NULL)
{
- /* Close cd, cd1, cd2, but preserve the errno from str_cd_iconv. */
+ /* Close cd, but preserve the errno from str_cd_iconv. */
int saved_errno = errno;
- if (cd2 != (iconv_t)(-1))
- iconv_close (cd2);
- if (cd1 != (iconv_t)(-1))
- iconv_close (cd1);
- if (cd != (iconv_t)(-1))
- iconv_close (cd);
+ iconveh_close (&cd);
errno = saved_errno;
}
else
{
- if (cd2 != (iconv_t)(-1) && iconv_close (cd2) < 0)
- {
- /* Return NULL, but free the allocated memory, and while doing
- that, preserve the errno from iconv_close. */
- int saved_errno = errno;
- if (cd1 != (iconv_t)(-1))
- iconv_close (cd1);
- if (cd != (iconv_t)(-1))
- iconv_close (cd);
- free (result);
- errno = saved_errno;
- return NULL;
- }
- if (cd1 != (iconv_t)(-1) && iconv_close (cd1) < 0)
- {
- /* Return NULL, but free the allocated memory, and while doing
- that, preserve the errno from iconv_close. */
- int saved_errno = errno;
- if (cd != (iconv_t)(-1))
- iconv_close (cd);
- free (result);
- errno = saved_errno;
- return NULL;
- }
- if (cd != (iconv_t)(-1) && iconv_close (cd) < 0)
+ if (iconveh_close (&cd) < 0)
{
/* Return NULL, but free the allocated memory, and while doing
- that, preserve the errno from iconv_close. */
+ that, preserve the errno from iconveh_close. */
int saved_errno = errno;
free (result);
errno = saved_errno;
#if HAVE_ICONV
+/* An conversion descriptor for use by the iconveh functions. */
+typedef struct
+ {
+ /* Conversion descriptor from FROM_CODESET to TO_CODESET, or (iconv_t)(-1)
+ if the system does not support a direct conversion from FROM_CODESET to
+ TO_CODESET. */
+ iconv_t cd;
+ /* Conversion descriptor from FROM_CODESET to UTF-8 (or (iconv_t)(-1) if
+ FROM_CODESET is UTF-8). */
+ iconv_t cd1;
+ /* Conversion descriptor from UTF-8 to TO_CODESET (or (iconv_t)(-1) if
+ TO_CODESET is UTF-8). */
+ iconv_t cd2;
+ }
+ iconveh_t;
+
+/* Open a conversion descriptor for use by the iconveh functions.
+ If successful, fills *CDP and returns 0. Upon failure, return -1 with errno
+ set. */
+extern int
+ iconveh_open (const char *to_codeset, const char *from_codeset,
+ iconveh_t *cdp);
+
+/* Close a conversion descriptor created by iconveh_open().
+ Return value: 0 if successful, otherwise -1 and errno set. */
+extern int
+ iconveh_close (const iconveh_t *cd);
+
/* Convert an entire string from one encoding to another, using iconv.
The original string is at [SRC,...,SRC+SRCLEN-1].
- CD is the conversion descriptor from FROMCODE to TOCODE, or (iconv_t)(-1) if
- the system does not support a direct conversion from FROMCODE to TOCODE.
- CD1 is the conversion descriptor from FROM_CODESET to UTF-8 (or
- (iconv_t)(-1) if FROM_CODESET is UTF-8).
- CD2 is the conversion descriptor from UTF-8 to TO_CODESET (or (iconv_t)(-1)
- if TO_CODESET is UTF-8).
+ CD points to the conversion descriptor from FROMCODE to TOCODE, created by
+ the function iconveh_open().
If OFFSETS is not NULL, it should point to an array of SRCLEN integers; this
array is filled with offsets into the result, i.e. the character starting
at SRC[i] corresponds to the character starting at (*RESULTP)[OFFSETS[i]],
unchanged if no dynamic memory allocation was necessary. */
extern int
mem_cd_iconveh (const char *src, size_t srclen,
- iconv_t cd, iconv_t cd1, iconv_t cd2,
+ const iconveh_t *cd,
enum iconv_ilseq_handler handler,
size_t *offsets,
char **resultp, size_t *lengthp);
/* Convert an entire string from one encoding to another, using iconv.
The original string is the NUL-terminated string starting at SRC.
- CD is the conversion descriptor from FROMCODE to TOCODE, or (iconv_t)(-1) if
- the system does not support a direct conversion from FROMCODE to TOCODE.
+ CD points to the conversion descriptor from FROMCODE to TOCODE, created by
+ the function iconveh_open().
Both the "from" and the "to" encoding must use a single NUL byte at the end
of the string (i.e. not UCS-2, UCS-4, UTF-16, UTF-32).
- CD1 is the conversion descriptor from FROM_CODESET to UTF-8 (or
- (iconv_t)(-1) if FROM_CODESET is UTF-8).
- CD2 is the conversion descriptor from UTF-8 to TO_CODESET (or (iconv_t)(-1)
- if TO_CODESET is UTF-8).
Allocate a malloced memory block for the result.
Return value: the freshly allocated resulting NUL-terminated string if
successful, otherwise NULL and errno set. */
extern char *
str_cd_iconveh (const char *src,
- iconv_t cd, iconv_t cd1, iconv_t cd2,
+ const iconveh_t *cd,
enum iconv_ilseq_handler handler);
#endif
iconv_t cd_88592_to_utf8 = iconv_open ("UTF-8", "ISO-8859-2");
iconv_t cd_utf8_to_88592 = iconv_open ("ISO-8859-2", "UTF-8");
iconv_t cd_utf7_to_utf8 = iconv_open ("UTF-8", "UTF-7");
+ iconveh_t cdeh_88592_to_88591;
+ iconveh_t cdeh_88591_to_utf8;
+ iconveh_t cdeh_utf8_to_88591;
+ iconveh_t cdeh_utf7_to_utf8;
ASSERT (cd_88591_to_utf8 != (iconv_t)(-1));
ASSERT (cd_utf8_to_88591 != (iconv_t)(-1));
ASSERT (cd_88592_to_utf8 != (iconv_t)(-1));
ASSERT (cd_utf8_to_88592 != (iconv_t)(-1));
+ cdeh_88592_to_88591.cd = cd_88592_to_88591;
+ cdeh_88592_to_88591.cd1 = cd_88592_to_utf8;
+ cdeh_88592_to_88591.cd2 = cd_utf8_to_88591;
+
+ cdeh_88591_to_utf8.cd = cd_88591_to_utf8;
+ cdeh_88591_to_utf8.cd1 = cd_88591_to_utf8;
+ cdeh_88591_to_utf8.cd2 = (iconv_t)(-1);
+
+ cdeh_utf8_to_88591.cd = cd_utf8_to_88591;
+ cdeh_utf8_to_88591.cd1 = (iconv_t)(-1);
+ cdeh_utf8_to_88591.cd2 = cd_utf8_to_88591;
+
+ cdeh_utf7_to_utf8.cd = cd_utf7_to_utf8;
+ cdeh_utf7_to_utf8.cd1 = cd_utf7_to_utf8;
+ cdeh_utf7_to_utf8.cd2 = (iconv_t)(-1);
+
/* ------------------------ Test mem_cd_iconveh() ------------------------ */
/* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
char *result = NULL;
size_t length = 0;
int retval = mem_cd_iconveh (input, strlen (input),
- cd_88592_to_88591,
- cd_88592_to_utf8, cd_utf8_to_88591,
+ &cdeh_88592_to_88591,
handler,
offsets,
&result, &length);
char *result = NULL;
size_t length = 0;
int retval = mem_cd_iconveh (input, strlen (input),
- cd_88592_to_88591,
- cd_88592_to_utf8, cd_utf8_to_88591,
+ &cdeh_88592_to_88591,
handler,
offsets,
&result, &length);
char *result = NULL;
size_t length = 0;
int retval = mem_cd_iconveh (input, strlen (input),
- cd_88591_to_utf8,
- cd_88591_to_utf8, (iconv_t)(-1),
+ &cdeh_88591_to_utf8,
handler,
offsets,
&result, &length);
char *result = NULL;
size_t length = 0;
int retval = mem_cd_iconveh (input, strlen (input),
- cd_utf8_to_88591,
- (iconv_t)(-1), cd_utf8_to_88591,
+ &cdeh_utf8_to_88591,
handler,
offsets,
&result, &length);
char *result = NULL;
size_t length = 0;
int retval = mem_cd_iconveh (input, strlen (input),
- cd_utf8_to_88591,
- (iconv_t)(-1), cd_utf8_to_88591,
+ &cdeh_utf8_to_88591,
handler,
offsets,
&result, &length);
char *result = NULL;
size_t length = 0;
int retval = mem_cd_iconveh (input, strlen (input),
- cd_utf8_to_88591,
- (iconv_t)(-1), cd_utf8_to_88591,
+ &cdeh_utf8_to_88591,
handler,
offsets,
&result, &length);
char *result = NULL;
size_t length = 0;
int retval = mem_cd_iconveh (input, 7,
- cd_utf7_to_utf8,
- cd_utf7_to_utf8, (iconv_t)(-1),
+ &cdeh_utf7_to_utf8,
handler,
NULL,
&result, &length);
char *result = NULL;
size_t length = 0;
int retval = mem_cd_iconveh (input, strlen (input),
- cd_utf7_to_utf8,
- cd_utf7_to_utf8, (iconv_t)(-1),
+ &cdeh_utf7_to_utf8,
handler,
NULL,
&result, &length);
static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
char *result = str_cd_iconveh (input,
- cd_88592_to_88591,
- cd_88592_to_utf8, cd_utf8_to_88591,
+ &cdeh_88592_to_88591,
handler);
ASSERT (result != NULL);
ASSERT (strcmp (result, expected) == 0);
enum iconv_ilseq_handler handler = handlers[h];
static const char input[] = "Rafa\263 Maszkowski"; /* RafaĆ Maszkowski */
char *result = str_cd_iconveh (input,
- cd_88592_to_88591,
- cd_88592_to_utf8, cd_utf8_to_88591,
+ &cdeh_88592_to_88591,
handler);
switch (handler)
{
static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
char *result = str_cd_iconveh (input,
- cd_88591_to_utf8,
- cd_88591_to_utf8, (iconv_t)(-1),
+ &cdeh_88591_to_utf8,
handler);
ASSERT (result != NULL);
ASSERT (strcmp (result, expected) == 0);
static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
char *result = str_cd_iconveh (input,
- cd_utf8_to_88591,
- (iconv_t)(-1), cd_utf8_to_88591,
+ &cdeh_utf8_to_88591,
handler);
ASSERT (result != NULL);
ASSERT (strcmp (result, expected) == 0);
enum iconv_ilseq_handler handler = handlers[h];
static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
char *result = str_cd_iconveh (input,
- cd_utf8_to_88591,
- (iconv_t)(-1), cd_utf8_to_88591,
+ &cdeh_utf8_to_88591,
handler);
switch (handler)
{
enum iconv_ilseq_handler handler = handlers[h];
static const char input[] = "\342";
char *result = str_cd_iconveh (input,
- cd_utf8_to_88591,
- (iconv_t)(-1), cd_utf8_to_88591,
+ &cdeh_utf8_to_88591,
handler);
ASSERT (result != NULL);
ASSERT (strcmp (result, "") == 0);