X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flibpspp%2Fi18n.c;h=cb155723b3e8bc9867c848bc12a9ab8c85fb42c3;hb=28bebf0851bac332b55cdc03e55165dcc8e713c2;hp=0e9b2b1b3863bf6326d238d7f9508aefb17f8894;hpb=f9c7f743be4a93db4bf21861b62a984186f32308;p=pspp-builds.git diff --git a/src/libpspp/i18n.c b/src/libpspp/i18n.c index 0e9b2b1b..cb155723 100644 --- a/src/libpspp/i18n.c +++ b/src/libpspp/i18n.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2006, 2009 Free Software Foundation, Inc. + Copyright (C) 2006, 2009, 2010, 2011 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -15,31 +15,30 @@ along with this program. If not, see . */ #include -#include + +#include "libpspp/i18n.h" + #include +#include +#include +#include +#include #include -#include #include +#include #include -#include -#include -#include -#include -#include "assertion.h" -#include "hmapx.h" -#include "hash-functions.h" -#include "pool.h" - -#include "i18n.h" -#include "version.h" +#include "libpspp/assertion.h" +#include "libpspp/hmapx.h" +#include "libpspp/hash-functions.h" +#include "libpspp/pool.h" +#include "libpspp/str.h" +#include "libpspp/version.h" -#include -#include "xstrndup.h" - -#if HAVE_NL_LANGINFO -#include -#endif +#include "gl/localcharset.h" +#include "gl/xalloc.h" +#include "gl/relocatable.h" +#include "gl/xstrndup.h" struct converter { @@ -79,16 +78,31 @@ create_iconv (const char* tocode, const char* fromcode) const int err = errno; fprintf (stderr, "Warning: " - "cannot create a converter for \"%s\" to \"%s\": %s\n", + "cannot create a converter for `%s' to `%s': %s\n", fromcode, tocode, strerror (err)); } return converter->conv; } +/* Converts the single byte C from encoding FROM to TO, returning the first + byte of the result. -/* Similar to recode_string_pool, but allocates the returned value on the heap instead of - in a pool. It is the caller's responsibility to free the returned value. */ + This function probably shouldn't be used at all, but some code still does + use it. */ +char +recode_byte (const char *to, const char *from, char c) +{ + char x; + char *s = recode_string (to, from, &c, 1); + x = s[0]; + free (s); + return x; +} + +/* Similar to recode_string_pool, but allocates the returned value on the heap + instead of in a pool. It is the caller's responsibility to free the + returned value. */ char * recode_string (const char *to, const char *from, const char *text, int length) @@ -97,131 +111,145 @@ recode_string (const char *to, const char *from, } -/* -Converts the string TEXT, which should be encoded in FROM-encoding, to a -dynamically allocated string in TO-encoding. Any characters which cannot -be converted will be represented by '?'. +/* Uses CONV to convert the INBYTES starting at IP into the OUTBYTES starting + at OP, and appends a null terminator to the output. -LENGTH should be the length of the string or -1, if null terminated. + Returns the output length if successful, -1 if the output buffer is too + small. */ +static ssize_t +try_recode (iconv_t conv, + const char *ip, size_t inbytes, + char *op_, size_t outbytes) +{ + /* FIXME: Need to ensure that this char is valid in the target encoding */ + const char fallbackchar = '?'; + char *op = op_; -The returned string will be allocated on POOL. + /* Put the converter into the initial shift state, in case there was any + state information left over from its last usage. */ + iconv (conv, NULL, 0, NULL, 0); -This function's behaviour differs from that of g_convert_with_fallback provided -by GLib. The GLib function will fail (returns NULL) if any part of the input -string is not valid in the declared input encoding. This function however perseveres -even in the presence of badly encoded input. -*/ + while (iconv (conv, (ICONV_CONST char **) &ip, &inbytes, + &op, &outbytes) == -1) + switch (errno) + { + case EINVAL: + if (outbytes < 2) + return -1; + *op++ = fallbackchar; + *op = '\0'; + return op - op_; + + case EILSEQ: + if (outbytes == 0) + return -1; + *op++ = fallbackchar; + outbytes--; + ip++; + inbytes--; + break; + + case E2BIG: + return -1; + + default: + /* should never happen */ + fprintf (stderr, "Character conversion error: %s\n", strerror (errno)); + NOT_REACHED (); + break; + } + + if (outbytes == 0) + return -1; + + *op = '\0'; + return op - op_; +} + +/* Converts the string TEXT, which should be encoded in FROM-encoding, to a + dynamically allocated string in TO-encoding. Any characters which cannot be + converted will be represented by '?'. + + LENGTH should be the length of the string or -1, if null terminated. + + The returned string will be allocated on POOL. + + This function's behaviour differs from that of g_convert_with_fallback + provided by GLib. The GLib function will fail (returns NULL) if any part of + the input string is not valid in the declared input encoding. This function + however perseveres even in the presence of badly encoded input. */ char * recode_string_pool (const char *to, const char *from, - const char *text, int length, struct pool *pool) + const char *text, int length, struct pool *pool) { - char *outbuf = 0; - size_t outbufferlength; - size_t result; - char *op ; - size_t inbytes = 0; - size_t outbytes ; - iconv_t conv ; - - /* FIXME: Need to ensure that this char is valid in the target encoding */ - const char fallbackchar = '?'; + struct substring out; if ( text == NULL ) return NULL; if ( length == -1 ) - length = strlen(text); + length = strlen (text); - if (to == NULL) - to = default_encoding; + out = recode_substring_pool (to, from, ss_buffer (text, length), pool); + return out.string; +} - if (from == NULL) - from = default_encoding; +/* Converts the string TEXT, which should be encoded in FROM-encoding, to a + dynamically allocated string in TO-encoding. Any characters which cannot be + converted will be represented by '?'. - for ( outbufferlength = 1 ; outbufferlength != 0; outbufferlength <<= 1 ) - if ( outbufferlength > length) - break; + The returned string will be null-terminated and allocated on POOL. - outbuf = pool_malloc (pool, outbufferlength); - op = outbuf; + This function's behaviour differs from that of g_convert_with_fallback + provided by GLib. The GLib function will fail (returns NULL) if any part of + the input string is not valid in the declared input encoding. This function + however perseveres even in the presence of badly encoded input. */ +struct substring +recode_substring_pool (const char *to, const char *from, + struct substring text, struct pool *pool) +{ + size_t outbufferlength; + iconv_t conv ; - outbytes = outbufferlength; - inbytes = length; + if (to == NULL) + to = default_encoding; + if (from == NULL) + from = default_encoding; conv = create_iconv (to, from); if ( (iconv_t) -1 == conv ) - return xstrdup (text); - - do { - const char *ip = text; - result = iconv (conv, (ICONV_CONST char **) &text, &inbytes, - &op, &outbytes); - - if ( -1 == result ) - { - int the_error = errno; - - switch (the_error) - { - case EILSEQ: - case EINVAL: - if ( outbytes > 0 ) - { - *op++ = fallbackchar; - outbytes--; - text++; - inbytes--; - break; - } - /* Fall through */ - case E2BIG: - free (outbuf); - outbufferlength <<= 1; - outbuf = pool_malloc (pool, outbufferlength); - op = outbuf; - outbytes = outbufferlength; - inbytes = length; - text = ip; - break; - default: - /* should never happen */ - fprintf (stderr, "Character conversion error: %s\n", strerror (the_error)); - NOT_REACHED (); - break; - } - } - } while ( -1 == result ); - - if (outbytes == 0 ) { - char *const oldaddr = outbuf; - outbuf = pool_realloc (pool, outbuf, outbufferlength + 1); - - op += (outbuf - oldaddr) ; + struct substring out; + ss_alloc_substring_pool (&out, text, pool); + return out; } - *op = '\0'; + for ( outbufferlength = 1 ; outbufferlength != 0; outbufferlength <<= 1 ) + if ( outbufferlength > text.length) + { + char *output = pool_malloc (pool, outbufferlength); + ssize_t output_len = try_recode (conv, text.string, text.length, + output, outbufferlength); + if (output_len >= 0) + return ss_buffer (output, output_len); + pool_free (pool, output); + } - return outbuf; + NOT_REACHED (); } - void i18n_init (void) { -#if ENABLE_NLS setlocale (LC_CTYPE, ""); -#ifdef LC_MESSAGES setlocale (LC_MESSAGES, ""); -#endif #if HAVE_LC_PAPER setlocale (LC_PAPER, ""); #endif bindtextdomain (PACKAGE, relocate(locale_dir)); textdomain (PACKAGE); -#endif /* ENABLE_NLS */ assert (default_encoding == NULL); default_encoding = xstrdup (locale_charset ()); @@ -311,7 +339,7 @@ i18n_done (void) bool valid_encoding (const char *enc) { - iconv_t conv = iconv_open ("UTF8", enc); + iconv_t conv = iconv_open (UTF8, enc); if ( conv == (iconv_t) -1) return false;