1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2006, 2009, 2010, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "libpspp/i18n.h"
31 #include "libpspp/assertion.h"
32 #include "libpspp/hmapx.h"
33 #include "libpspp/hash-functions.h"
34 #include "libpspp/pool.h"
35 #include "libpspp/str.h"
36 #include "libpspp/version.h"
38 #include "gl/localcharset.h"
39 #include "gl/xalloc.h"
40 #include "gl/relocatable.h"
41 #include "gl/xstrndup.h"
50 static char *default_encoding;
51 static struct hmapx map;
53 /* A wrapper around iconv_open */
55 create_iconv (const char* tocode, const char* fromcode)
58 struct hmapx_node *node;
59 struct converter *converter;
62 hash = hash_string (tocode, hash_string (fromcode, 0));
63 HMAPX_FOR_EACH_WITH_HASH (converter, node, hash, &map)
64 if (!strcmp (tocode, converter->tocode)
65 && !strcmp (fromcode, converter->fromcode))
66 return converter->conv;
68 converter = xmalloc (sizeof *converter);
69 converter->tocode = xstrdup (tocode);
70 converter->fromcode = xstrdup (fromcode);
71 converter->conv = iconv_open (tocode, fromcode);
72 hmapx_insert (&map, converter, hash);
74 /* I don't think it's safe to translate this string or to use messaging
75 as the converters have not yet been set up */
76 if ( (iconv_t) -1 == converter->conv && 0 != strcmp (tocode, fromcode))
78 const int err = errno;
81 "cannot create a converter for `%s' to `%s': %s\n",
82 fromcode, tocode, strerror (err));
85 return converter->conv;
88 /* Converts the single byte C from encoding FROM to TO, returning the first
91 This function probably shouldn't be used at all, but some code still does
94 recode_byte (const char *to, const char *from, char c)
97 char *s = recode_string (to, from, &c, 1);
103 /* Similar to recode_string_pool, but allocates the returned value on the heap
104 instead of in a pool. It is the caller's responsibility to free the
107 recode_string (const char *to, const char *from,
108 const char *text, int length)
110 return recode_string_pool (to, from, text, length, NULL);
114 /* Uses CONV to convert the INBYTES starting at IP into the OUTBYTES starting
115 at OP, and appends a null terminator to the output.
117 Returns the output length if successful, -1 if the output buffer is too
120 try_recode (iconv_t conv,
121 const char *ip, size_t inbytes,
122 char *op_, size_t outbytes)
124 /* FIXME: Need to ensure that this char is valid in the target encoding */
125 const char fallbackchar = '?';
128 /* Put the converter into the initial shift state, in case there was any
129 state information left over from its last usage. */
130 iconv (conv, NULL, 0, NULL, 0);
132 while (iconv (conv, (ICONV_CONST char **) &ip, &inbytes,
133 &op, &outbytes) == -1)
139 *op++ = fallbackchar;
146 *op++ = fallbackchar;
156 /* should never happen */
157 fprintf (stderr, "Character conversion error: %s\n", strerror (errno));
169 /* Converts the string TEXT, which should be encoded in FROM-encoding, to a
170 dynamically allocated string in TO-encoding. Any characters which cannot be
171 converted will be represented by '?'.
173 LENGTH should be the length of the string or -1, if null terminated.
175 The returned string will be allocated on POOL.
177 This function's behaviour differs from that of g_convert_with_fallback
178 provided by GLib. The GLib function will fail (returns NULL) if any part of
179 the input string is not valid in the declared input encoding. This function
180 however perseveres even in the presence of badly encoded input. */
182 recode_string_pool (const char *to, const char *from,
183 const char *text, int length, struct pool *pool)
185 struct substring out;
191 length = strlen (text);
193 out = recode_substring_pool (to, from, ss_buffer (text, length), pool);
197 /* Converts the string TEXT, which should be encoded in FROM-encoding, to a
198 dynamically allocated string in TO-encoding. Any characters which cannot be
199 converted will be represented by '?'.
201 The returned string will be null-terminated and allocated on POOL.
203 This function's behaviour differs from that of g_convert_with_fallback
204 provided by GLib. The GLib function will fail (returns NULL) if any part of
205 the input string is not valid in the declared input encoding. This function
206 however perseveres even in the presence of badly encoded input. */
208 recode_substring_pool (const char *to, const char *from,
209 struct substring text, struct pool *pool)
211 size_t outbufferlength;
215 to = default_encoding;
218 from = default_encoding;
220 conv = create_iconv (to, from);
222 if ( (iconv_t) -1 == conv )
224 struct substring out;
225 ss_alloc_substring_pool (&out, text, pool);
229 for ( outbufferlength = 1 ; outbufferlength != 0; outbufferlength <<= 1 )
230 if ( outbufferlength > text.length)
232 char *output = pool_malloc (pool, outbufferlength);
233 ssize_t output_len = try_recode (conv, text.string, text.length,
234 output, outbufferlength);
236 return ss_buffer (output, output_len);
237 pool_free (pool, output);
246 setlocale (LC_CTYPE, "");
247 setlocale (LC_MESSAGES, "");
249 setlocale (LC_PAPER, "");
251 bindtextdomain (PACKAGE, relocate(locale_dir));
252 textdomain (PACKAGE);
254 assert (default_encoding == NULL);
255 default_encoding = xstrdup (locale_charset ());
262 get_default_encoding (void)
264 return default_encoding;
268 set_default_encoding (const char *enc)
270 free (default_encoding);
271 default_encoding = xstrdup (enc);
275 /* Attempts to set the encoding from a locale name
276 returns true if successfull.
277 This function does not (should not!) alter the current locale.
280 set_encoding_from_locale (const char *loc)
285 char *tmp = xstrdup (setlocale (LC_CTYPE, NULL));
287 setlocale (LC_CTYPE, "C");
288 c_encoding = xstrdup (locale_charset ());
290 setlocale (LC_CTYPE, loc);
291 loc_encoding = xstrdup (locale_charset ());
294 if ( 0 == strcmp (loc_encoding, c_encoding))
300 setlocale (LC_CTYPE, tmp);
306 free (default_encoding);
307 default_encoding = loc_encoding;
320 struct hmapx_node *node;
321 struct converter *cvtr;
323 HMAPX_FOR_EACH (cvtr, node, &map)
326 free (cvtr->fromcode);
327 iconv_close (cvtr->conv);
331 hmapx_destroy (&map);
333 free (default_encoding);
334 default_encoding = NULL;
340 valid_encoding (const char *enc)
342 iconv_t conv = iconv_open (UTF8, enc);
344 if ( conv == (iconv_t) -1)
353 /* Return the system local's idea of the
354 decimal seperator character */
356 get_system_decimal (void)
360 char *ol = xstrdup (setlocale (LC_NUMERIC, NULL));
361 setlocale (LC_NUMERIC, "");
364 radix_char = nl_langinfo (RADIXCHAR)[0];
368 snprintf (buf, sizeof buf, "%f", 2.5);
373 /* We MUST leave LC_NUMERIC untouched, since it would
374 otherwise interfere with data_{in,out} */
375 setlocale (LC_NUMERIC, ol);