-/* PSPP - computes sample statistics.
- Copyright (C) 2006 Free Software Foundation, Inc.
- Written by John Darrington <john@darrington.wattle.id.au>
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2006, 2009, 2010 Free Software Foundation, Inc.
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
-#include <xalloc.h>
+
+#include "libpspp/i18n.h"
+
#include <assert.h>
-#include <stdlib.h>
+#include <errno.h>
+#include <iconv.h>
+#include <langinfo.h>
+#include <libintl.h>
+#include <locale.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
-#include <iconv.h>
-#include <errno.h>
-
-#include "i18n.h"
-
-#include <localcharset.h>
-#include "xstrndup.h"
+#include "libpspp/assertion.h"
+#include "libpspp/hmapx.h"
+#include "libpspp/hash-functions.h"
+#include "libpspp/pool.h"
+#include "libpspp/str.h"
+#include "libpspp/version.h"
-static char *locale = 0;
-static const char *charset;
+#include "gl/localcharset.h"
+#include "gl/xalloc.h"
+#include "gl/relocatable.h"
+#include "gl/xstrndup.h"
+struct converter
+ {
+ char *tocode;
+ char *fromcode;
+ iconv_t conv;
+ };
-static iconv_t convertor[n_CONV];
-
+static char *default_encoding;
+static struct hmapx map;
/* A wrapper around iconv_open */
-static iconv_t
+static iconv_t
create_iconv (const char* tocode, const char* fromcode)
{
- iconv_t conv = iconv_open (tocode, fromcode);
+ size_t hash;
+ struct hmapx_node *node;
+ struct converter *converter;
+ assert (fromcode);
+
+ hash = hash_string (tocode, hash_string (fromcode, 0));
+ HMAPX_FOR_EACH_WITH_HASH (converter, node, hash, &map)
+ if (!strcmp (tocode, converter->tocode)
+ && !strcmp (fromcode, converter->fromcode))
+ return converter->conv;
+
+ converter = xmalloc (sizeof *converter);
+ converter->tocode = xstrdup (tocode);
+ converter->fromcode = xstrdup (fromcode);
+ converter->conv = iconv_open (tocode, fromcode);
+ hmapx_insert (&map, converter, hash);
/* I don't think it's safe to translate this string or to use messaging
- as the convertors have not yet been set up */
- if ( (iconv_t) -1 == conv)
+ as the converters have not yet been set up */
+ if ( (iconv_t) -1 == converter->conv && 0 != strcmp (tocode, fromcode))
{
const int err = errno;
- fprintf (stderr,
- "Warning: cannot create a convertor for \"%s\" to \"%s\": %s\n",
- fromcode, tocode, strerror (err));
+ fprintf (stderr,
+ "Warning: "
+ "cannot create a converter for `%s' to `%s': %s\n",
+ fromcode, tocode, strerror (err));
}
-
- return conv;
+
+ return converter->conv;
}
-/* Return a string based on TEXT converted according to HOW.
- If length is not -1, then it must be the number of bytes in TEXT.
- The returned string must be freed when no longer required.
-*/
+
+/* Similar to recode_string_pool, but allocates the returned value on the heap
+ instead of in a pool. It is the caller's responsibility to free the
+ returned value. */
char *
-recode_string(enum conv_id how, const char *text, int length)
+recode_string (const char *to, const char *from,
+ const char *text, int length)
{
- char *outbuf = 0;
- size_t outbufferlength;
- size_t result;
- char *ip ;
- char *op ;
- size_t inbytes = 0;
- size_t outbytes ;
+ return recode_string_pool (to, from, text, length, NULL);
+}
+
+/* Uses CONV to convert the INBYTES starting at IP into the OUTBYTES starting
+ at OP, and appends a null terminator to the output.
+
+ Returns the output length if successful, -1 if the output buffer is too
+ small. */
+static ssize_t
+try_recode (iconv_t conv,
+ const char *ip, size_t inbytes,
+ char *op_, size_t outbytes)
+{
/* FIXME: Need to ensure that this char is valid in the target encoding */
const char fallbackchar = '?';
+ char *op = op_;
- if ( text == NULL )
+ /* Put the converter into the initial shift state, in case there was any
+ state information left over from its last usage. */
+ iconv (conv, NULL, 0, NULL, 0);
+
+ while (iconv (conv, (ICONV_CONST char **) &ip, &inbytes,
+ &op, &outbytes) == -1)
+ switch (errno)
+ {
+ case EINVAL:
+ if (outbytes < 2)
+ return -1;
+ *op++ = fallbackchar;
+ *op = '\0';
+ return op - op_;
+
+ case EILSEQ:
+ if (outbytes == 0)
+ return -1;
+ *op++ = fallbackchar;
+ outbytes--;
+ ip++;
+ inbytes--;
+ break;
+
+ case E2BIG:
+ return -1;
+
+ default:
+ /* should never happen */
+ fprintf (stderr, "Character conversion error: %s\n", strerror (errno));
+ NOT_REACHED ();
+ break;
+ }
+
+ if (outbytes == 0)
+ return -1;
+
+ *op = '\0';
+ return op - op_;
+}
+
+/* Converts the string TEXT, which should be encoded in FROM-encoding, to a
+ dynamically allocated string in TO-encoding. Any characters which cannot be
+ converted will be represented by '?'.
+
+ LENGTH should be the length of the string or -1, if null terminated.
+
+ The returned string will be allocated on POOL.
+
+ This function's behaviour differs from that of g_convert_with_fallback
+ provided by GLib. The GLib function will fail (returns NULL) if any part of
+ the input string is not valid in the declared input encoding. This function
+ however perseveres even in the presence of badly encoded input. */
+char *
+recode_string_pool (const char *to, const char *from,
+ const char *text, int length, struct pool *pool)
+{
+ struct substring out;
+
+ if ( text == NULL )
return NULL;
- if ( length == -1 )
- length = strlen(text);
+ if ( length == -1 )
+ length = strlen (text);
- assert(how < n_CONV);
+ out = recode_substring_pool (to, from, ss_buffer (text, length), pool);
+ return out.string;
+}
- if (convertor[how] == (iconv_t) -1)
- return xstrndup (text, length);
+/* Converts the string TEXT, which should be encoded in FROM-encoding, to a
+ dynamically allocated string in TO-encoding. Any characters which cannot be
+ converted will be represented by '?'.
- for ( outbufferlength = 1 ; outbufferlength != 0; outbufferlength <<= 1 )
- if ( outbufferlength > length)
- break;
+ The returned string will be null-terminated and allocated on POOL.
- outbuf = xmalloc(outbufferlength);
- op = outbuf;
- ip = (char *) text;
+ This function's behaviour differs from that of g_convert_with_fallback
+ provided by GLib. The GLib function will fail (returns NULL) if any part of
+ the input string is not valid in the declared input encoding. This function
+ however perseveres even in the presence of badly encoded input. */
+struct substring
+recode_substring_pool (const char *to, const char *from,
+ struct substring text, struct pool *pool)
+{
+ size_t outbufferlength;
+ iconv_t conv ;
- outbytes = outbufferlength;
- inbytes = length;
-
- do {
- result = iconv(convertor[how], &ip, &inbytes,
- &op, &outbytes);
+ if (to == NULL)
+ to = default_encoding;
- if ( -1 == result )
- {
- int the_error = errno;
-
- switch ( the_error)
- {
- case EILSEQ:
- case EINVAL:
- if ( outbytes > 0 )
- {
- *op++ = fallbackchar;
- outbytes--;
- ip++;
- inbytes--;
- break;
- }
- /* Fall through */
- case E2BIG:
- free(outbuf);
- outbufferlength <<= 1;
- outbuf = xmalloc(outbufferlength);
- op = outbuf;
- ip = (char *) text;
- outbytes = outbufferlength;
- inbytes = length;
- break;
- default:
- /* should never happen */
- break;
- }
+ if (from == NULL)
+ from = default_encoding;
- }
- } while ( -1 == result );
+ conv = create_iconv (to, from);
- if (outbytes == 0 )
+ if ( (iconv_t) -1 == conv )
{
- char *const oldaddr = outbuf;
- outbuf = xrealloc(outbuf, outbufferlength + 1);
-
- op += (outbuf - oldaddr) ;
+ struct substring out;
+ ss_alloc_substring (&out, text);
+ return out;
}
- *op = '\0';
+ for ( outbufferlength = 1 ; outbufferlength != 0; outbufferlength <<= 1 )
+ if ( outbufferlength > text.length)
+ {
+ char *output = pool_malloc (pool, outbufferlength);
+ ssize_t output_len = try_recode (conv, text.string, text.length,
+ output, outbufferlength);
+ if (output_len >= 0)
+ return ss_buffer (output, output_len);
+ pool_free (pool, output);
+ }
- return outbuf;
+ NOT_REACHED ();
+}
+
+void
+i18n_init (void)
+{
+ setlocale (LC_CTYPE, "");
+ setlocale (LC_MESSAGES, "");
+#if HAVE_LC_PAPER
+ setlocale (LC_PAPER, "");
+#endif
+ bindtextdomain (PACKAGE, relocate(locale_dir));
+ textdomain (PACKAGE);
+
+ assert (default_encoding == NULL);
+ default_encoding = xstrdup (locale_charset ());
+
+ hmapx_init (&map);
}
-/* Returns the current PSPP locale */
const char *
-get_pspp_locale(void)
+get_default_encoding (void)
{
- assert ( locale);
- return locale;
+ return default_encoding;
}
-/* Set the PSPP locale */
-void
-set_pspp_locale(const char *l)
+void
+set_default_encoding (const char *enc)
{
- char *current_locale;
- const char *current_charset;
+ free (default_encoding);
+ default_encoding = xstrdup (enc);
+}
+
+
+/* Attempts to set the encoding from a locale name
+ returns true if successfull.
+ This function does not (should not!) alter the current locale.
+*/
+bool
+set_encoding_from_locale (const char *loc)
+{
+ bool ok = true;
+ char *c_encoding;
+ char *loc_encoding;
+ char *tmp = xstrdup (setlocale (LC_CTYPE, NULL));
- free(locale);
- locale = strdup(l);
+ setlocale (LC_CTYPE, "C");
+ c_encoding = xstrdup (locale_charset ());
- current_locale = setlocale(LC_CTYPE, 0);
- current_charset = locale_charset();
- setlocale(LC_CTYPE, locale);
-
- charset = locale_charset();
- setlocale(LC_CTYPE, current_locale);
+ setlocale (LC_CTYPE, loc);
+ loc_encoding = xstrdup (locale_charset ());
+
+
+ if ( 0 == strcmp (loc_encoding, c_encoding))
+ {
+ ok = false;
+ }
- iconv_close(convertor[CONV_PSPP_TO_UTF8]);
- convertor[CONV_PSPP_TO_UTF8] = create_iconv ("UTF-8", charset);
- iconv_close(convertor[CONV_SYSTEM_TO_PSPP]);
- convertor[CONV_SYSTEM_TO_PSPP] = create_iconv (charset, current_charset);
+ setlocale (LC_CTYPE, tmp);
+
+ free (tmp);
+
+ if (ok)
+ {
+ free (default_encoding);
+ default_encoding = loc_encoding;
+ }
+ else
+ free (loc_encoding);
+
+ free (c_encoding);
+
+ return ok;
}
void
-i18n_init(void)
+i18n_done (void)
{
- assert ( ! locale) ;
- locale = strdup(setlocale(LC_CTYPE, NULL));
+ struct hmapx_node *node;
+ struct converter *cvtr;
+
+ HMAPX_FOR_EACH (cvtr, node, &map)
+ {
+ free (cvtr->tocode);
+ free (cvtr->fromcode);
+ iconv_close (cvtr->conv);
+ free (cvtr);
+ }
- setlocale(LC_CTYPE, locale);
- charset = locale_charset();
+ hmapx_destroy (&map);
- convertor[CONV_PSPP_TO_UTF8] = create_iconv ("UTF-8", charset);
- convertor[CONV_SYSTEM_TO_PSPP] = create_iconv (charset, charset);
+ free (default_encoding);
+ default_encoding = NULL;
}
-void
-i18n_done(void)
+
+bool
+valid_encoding (const char *enc)
{
- int i;
- free(locale);
- locale = 0;
+ iconv_t conv = iconv_open (UTF8, enc);
- for(i = 0 ; i < n_CONV; ++i )
- {
- if ( (iconv_t) -1 == convertor[i] )
- continue;
- iconv_close(convertor[i]);
- }
+ if ( conv == (iconv_t) -1)
+ return false;
+
+ iconv_close (conv);
+
+ return true;
+}
+
+
+/* Return the system local's idea of the
+ decimal seperator character */
+char
+get_system_decimal (void)
+{
+ char radix_char;
+
+ char *ol = xstrdup (setlocale (LC_NUMERIC, NULL));
+ setlocale (LC_NUMERIC, "");
+
+#if HAVE_NL_LANGINFO
+ radix_char = nl_langinfo (RADIXCHAR)[0];
+#else
+ {
+ char buf[10];
+ snprintf (buf, sizeof buf, "%f", 2.5);
+ radix_char = buf[1];
+ }
+#endif
+
+ /* We MUST leave LC_NUMERIC untouched, since it would
+ otherwise interfere with data_{in,out} */
+ setlocale (LC_NUMERIC, ol);
+ free (ol);
+ return radix_char;
}