X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flibpspp%2Fi18n.c;h=60dc693db6585a967ddae3cc82c37f26455587eb;hb=2b84fc2b70df7767ee9a279fffab5db1b3c8023e;hp=b323bf8586a2370b2e072b69eb3a8d6cb10b68e1;hpb=503f53bfdde87fc40466dadb77bc04cee0be2567;p=pspp diff --git a/src/libpspp/i18n.c b/src/libpspp/i18n.c index b323bf8586..60dc693db6 100644 --- a/src/libpspp/i18n.c +++ b/src/libpspp/i18n.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2006 Free Software Foundation, Inc. + Copyright (C) 2006, 2009, 2010 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,12 +21,19 @@ #include #include #include +#include #include #include +#include #include "assertion.h" +#include "hmapx.h" +#include "hash-functions.h" +#include "pool.h" #include "i18n.h" +#include "version.h" + #include #include "xstrndup.h" @@ -34,151 +41,277 @@ #include #endif -static char *default_encoding; +struct converter + { + char *tocode; + char *fromcode; + iconv_t conv; + }; +static char *default_encoding; +static struct hmapx map; /* A wrapper around iconv_open */ static iconv_t create_iconv (const char* tocode, const char* fromcode) { - iconv_t conv = iconv_open (tocode, fromcode); + size_t hash; + struct hmapx_node *node; + struct converter *converter; + assert (fromcode); + + hash = hash_string (tocode, hash_string (fromcode, 0)); + HMAPX_FOR_EACH_WITH_HASH (converter, node, hash, &map) + if (!strcmp (tocode, converter->tocode) + && !strcmp (fromcode, converter->fromcode)) + return converter->conv; + + converter = xmalloc (sizeof *converter); + converter->tocode = xstrdup (tocode); + converter->fromcode = xstrdup (fromcode); + converter->conv = iconv_open (tocode, fromcode); + hmapx_insert (&map, converter, hash); /* I don't think it's safe to translate this string or to use messaging - as the convertors have not yet been set up */ - if ( (iconv_t) -1 == conv && 0 != strcmp (tocode, fromcode)) + as the converters have not yet been set up */ + if ( (iconv_t) -1 == converter->conv && 0 != strcmp (tocode, fromcode)) { const int err = errno; fprintf (stderr, - "Warning: cannot create a convertor for \"%s\" to \"%s\": %s\n", - fromcode, tocode, strerror (err)); + "Warning: " + "cannot create a converter for `%s' to `%s': %s\n", + fromcode, tocode, strerror (err)); } - return conv; + return converter->conv; } -/* Return a string based on TEXT converted according to HOW. - If length is not -1, then it must be the number of bytes in TEXT. - The returned string must be freed when no longer required. -*/ + +/* Similar to recode_string_pool, but allocates the returned value on the heap + instead of in a pool. It is the caller's responsibility to free the + returned value. */ char * recode_string (const char *to, const char *from, const char *text, int length) { - char *outbuf = 0; - size_t outbufferlength; - size_t result; - char *op ; - size_t inbytes = 0; - size_t outbytes ; - iconv_t conv ; + return recode_string_pool (to, from, text, length, NULL); +} + + +/* Uses CONV to convert the INBYTES starting at IP into the OUTBYTES starting + at OP, and appends a null terminator to the output. + Returns true if successful, false if the output buffer is too small. */ +static bool +try_recode (iconv_t conv, + const char *ip, size_t inbytes, + char *op, size_t outbytes) +{ /* FIXME: Need to ensure that this char is valid in the target encoding */ const char fallbackchar = '?'; + /* Put the converter into the initial shift state, in case there was any + state information left over from its last usage. */ + iconv (conv, NULL, 0, NULL, 0); + + while (iconv (conv, (ICONV_CONST char **) &ip, &inbytes, + &op, &outbytes) == -1) + switch (errno) + { + case EINVAL: + if (outbytes < 2) + return false; + *op++ = fallbackchar; + *op++ = '\0'; + return true; + + case EILSEQ: + if (outbytes == 0) + return false; + *op++ = fallbackchar; + outbytes--; + ip++; + inbytes--; + break; + + case E2BIG: + return false; + + default: + /* should never happen */ + fprintf (stderr, "Character conversion error: %s\n", strerror (errno)); + NOT_REACHED (); + break; + } + + if (outbytes == 0) + return false; + + *op = '\0'; + return true; +} + +/* Converts the string TEXT, which should be encoded in FROM-encoding, to a + dynamically allocated string in TO-encoding. Any characters which cannot be + converted will be represented by '?'. + + LENGTH should be the length of the string or -1, if null terminated. + + The returned string will be allocated on POOL. + + This function's behaviour differs from that of g_convert_with_fallback + provided by GLib. The GLib function will fail (returns NULL) if any part of + the input string is not valid in the declared input encoding. This function + however perseveres even in the presence of badly encoded input. */ +char * +recode_string_pool (const char *to, const char *from, + const char *text, int length, struct pool *pool) +{ + size_t outbufferlength; + iconv_t conv ; + if ( text == NULL ) return NULL; if ( length == -1 ) length = strlen(text); - if (to == NULL) to = default_encoding; if (from == NULL) from = default_encoding; - if ( 0 == strcmp (to, from)) - return xstrndup (text, length); + conv = create_iconv (to, from); + + if ( (iconv_t) -1 == conv ) + return xstrdup (text); for ( outbufferlength = 1 ; outbufferlength != 0; outbufferlength <<= 1 ) if ( outbufferlength > length) - break; + { + char *output = pool_malloc (pool, outbufferlength); + if (try_recode (conv, text, length, output, outbufferlength)) + return output; + pool_free (pool, output); + } - outbuf = xmalloc(outbufferlength); - op = outbuf; + NOT_REACHED (); +} - outbytes = outbufferlength; - inbytes = length; +void +i18n_init (void) +{ +#if ENABLE_NLS + setlocale (LC_CTYPE, ""); +#ifdef LC_MESSAGES + setlocale (LC_MESSAGES, ""); +#endif +#if HAVE_LC_PAPER + setlocale (LC_PAPER, ""); +#endif + bindtextdomain (PACKAGE, relocate(locale_dir)); + textdomain (PACKAGE); +#endif /* ENABLE_NLS */ + assert (default_encoding == NULL); + default_encoding = xstrdup (locale_charset ()); - conv = create_iconv (to, from); + hmapx_init (&map); +} - do { - const char *ip = text; - result = iconv (conv, (ICONV_CONST char **) &text, &inbytes, - &op, &outbytes); - if ( -1 == result ) - { - int the_error = errno; - - switch (the_error) - { - case EILSEQ: - case EINVAL: - if ( outbytes > 0 ) - { - *op++ = fallbackchar; - outbytes--; - text++; - inbytes--; - break; - } - /* Fall through */ - case E2BIG: - free (outbuf); - outbufferlength <<= 1; - outbuf = xmalloc (outbufferlength); - op = outbuf; - outbytes = outbufferlength; - inbytes = length; - text = ip; - break; - default: - /* should never happen */ - NOT_REACHED (); - break; - } - } - } while ( -1 == result ); +const char * +get_default_encoding (void) +{ + return default_encoding; +} +void +set_default_encoding (const char *enc) +{ + free (default_encoding); + default_encoding = xstrdup (enc); +} - iconv_close (conv); - if (outbytes == 0 ) - { - char *const oldaddr = outbuf; - outbuf = xrealloc (outbuf, outbufferlength + 1); +/* Attempts to set the encoding from a locale name + returns true if successfull. + This function does not (should not!) alter the current locale. +*/ +bool +set_encoding_from_locale (const char *loc) +{ + bool ok = true; + char *c_encoding; + char *loc_encoding; + char *tmp = xstrdup (setlocale (LC_CTYPE, NULL)); + + setlocale (LC_CTYPE, "C"); + c_encoding = xstrdup (locale_charset ()); + + setlocale (LC_CTYPE, loc); + loc_encoding = xstrdup (locale_charset ()); - op += (outbuf - oldaddr) ; + + if ( 0 == strcmp (loc_encoding, c_encoding)) + { + ok = false; } - *op = '\0'; - return outbuf; -} + setlocale (LC_CTYPE, tmp); + free (tmp); + if (ok) + { + free (default_encoding); + default_encoding = loc_encoding; + } + else + free (loc_encoding); + free (c_encoding); -void -i18n_init (void) -{ - free (default_encoding); - default_encoding = strdup (locale_charset ()); + return ok; } - void i18n_done (void) { + struct hmapx_node *node; + struct converter *cvtr; + + HMAPX_FOR_EACH (cvtr, node, &map) + { + free (cvtr->tocode); + free (cvtr->fromcode); + iconv_close (cvtr->conv); + free (cvtr); + } + + hmapx_destroy (&map); + free (default_encoding); default_encoding = NULL; } +bool +valid_encoding (const char *enc) +{ + iconv_t conv = iconv_open ("UTF8", enc); + + if ( conv == (iconv_t) -1) + return false; + + iconv_close (conv); + + return true; +} + /* Return the system local's idea of the decimal seperator character */ @@ -187,7 +320,7 @@ get_system_decimal (void) { char radix_char; - char *ol = strdup (setlocale (LC_NUMERIC, NULL)); + char *ol = xstrdup (setlocale (LC_NUMERIC, NULL)); setlocale (LC_NUMERIC, ""); #if HAVE_NL_LANGINFO