1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2006, 2009, 2010 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "libpspp/i18n.h"
31 #include "libpspp/assertion.h"
32 #include "libpspp/hmapx.h"
33 #include "libpspp/hash-functions.h"
34 #include "libpspp/pool.h"
35 #include "libpspp/str.h"
36 #include "libpspp/version.h"
38 #include "gl/localcharset.h"
39 #include "gl/xalloc.h"
40 #include "gl/relocatable.h"
41 #include "gl/xstrndup.h"
50 static char *default_encoding;
51 static struct hmapx map;
53 /* A wrapper around iconv_open */
55 create_iconv (const char* tocode, const char* fromcode)
58 struct hmapx_node *node;
59 struct converter *converter;
62 hash = hash_string (tocode, hash_string (fromcode, 0));
63 HMAPX_FOR_EACH_WITH_HASH (converter, node, hash, &map)
64 if (!strcmp (tocode, converter->tocode)
65 && !strcmp (fromcode, converter->fromcode))
66 return converter->conv;
68 converter = xmalloc (sizeof *converter);
69 converter->tocode = xstrdup (tocode);
70 converter->fromcode = xstrdup (fromcode);
71 converter->conv = iconv_open (tocode, fromcode);
72 hmapx_insert (&map, converter, hash);
74 /* I don't think it's safe to translate this string or to use messaging
75 as the converters have not yet been set up */
76 if ( (iconv_t) -1 == converter->conv && 0 != strcmp (tocode, fromcode))
78 const int err = errno;
81 "cannot create a converter for `%s' to `%s': %s\n",
82 fromcode, tocode, strerror (err));
85 return converter->conv;
89 /* Similar to recode_string_pool, but allocates the returned value on the heap
90 instead of in a pool. It is the caller's responsibility to free the
93 recode_string (const char *to, const char *from,
94 const char *text, int length)
96 return recode_string_pool (to, from, text, length, NULL);
100 /* Uses CONV to convert the INBYTES starting at IP into the OUTBYTES starting
101 at OP, and appends a null terminator to the output.
103 Returns the output length if successful, -1 if the output buffer is too
106 try_recode (iconv_t conv,
107 const char *ip, size_t inbytes,
108 char *op_, size_t outbytes)
110 /* FIXME: Need to ensure that this char is valid in the target encoding */
111 const char fallbackchar = '?';
114 /* Put the converter into the initial shift state, in case there was any
115 state information left over from its last usage. */
116 iconv (conv, NULL, 0, NULL, 0);
118 while (iconv (conv, (ICONV_CONST char **) &ip, &inbytes,
119 &op, &outbytes) == -1)
125 *op++ = fallbackchar;
132 *op++ = fallbackchar;
142 /* should never happen */
143 fprintf (stderr, "Character conversion error: %s\n", strerror (errno));
155 /* Converts the string TEXT, which should be encoded in FROM-encoding, to a
156 dynamically allocated string in TO-encoding. Any characters which cannot be
157 converted will be represented by '?'.
159 LENGTH should be the length of the string or -1, if null terminated.
161 The returned string will be allocated on POOL.
163 This function's behaviour differs from that of g_convert_with_fallback
164 provided by GLib. The GLib function will fail (returns NULL) if any part of
165 the input string is not valid in the declared input encoding. This function
166 however perseveres even in the presence of badly encoded input. */
168 recode_string_pool (const char *to, const char *from,
169 const char *text, int length, struct pool *pool)
171 struct substring out;
177 length = strlen (text);
179 out = recode_substring_pool (to, from, ss_buffer (text, length), pool);
183 /* Converts the string TEXT, which should be encoded in FROM-encoding, to a
184 dynamically allocated string in TO-encoding. Any characters which cannot be
185 converted will be represented by '?'.
187 The returned string will be null-terminated and allocated on POOL.
189 This function's behaviour differs from that of g_convert_with_fallback
190 provided by GLib. The GLib function will fail (returns NULL) if any part of
191 the input string is not valid in the declared input encoding. This function
192 however perseveres even in the presence of badly encoded input. */
194 recode_substring_pool (const char *to, const char *from,
195 struct substring text, struct pool *pool)
197 size_t outbufferlength;
201 to = default_encoding;
204 from = default_encoding;
206 conv = create_iconv (to, from);
208 if ( (iconv_t) -1 == conv )
210 struct substring out;
211 ss_alloc_substring (&out, text);
215 for ( outbufferlength = 1 ; outbufferlength != 0; outbufferlength <<= 1 )
216 if ( outbufferlength > text.length)
218 char *output = pool_malloc (pool, outbufferlength);
219 ssize_t output_len = try_recode (conv, text.string, text.length,
220 output, outbufferlength);
222 return ss_buffer (output, output_len);
223 pool_free (pool, output);
232 setlocale (LC_CTYPE, "");
233 setlocale (LC_MESSAGES, "");
235 setlocale (LC_PAPER, "");
237 bindtextdomain (PACKAGE, relocate(locale_dir));
238 textdomain (PACKAGE);
240 assert (default_encoding == NULL);
241 default_encoding = xstrdup (locale_charset ());
248 get_default_encoding (void)
250 return default_encoding;
254 set_default_encoding (const char *enc)
256 free (default_encoding);
257 default_encoding = xstrdup (enc);
261 /* Attempts to set the encoding from a locale name
262 returns true if successfull.
263 This function does not (should not!) alter the current locale.
266 set_encoding_from_locale (const char *loc)
271 char *tmp = xstrdup (setlocale (LC_CTYPE, NULL));
273 setlocale (LC_CTYPE, "C");
274 c_encoding = xstrdup (locale_charset ());
276 setlocale (LC_CTYPE, loc);
277 loc_encoding = xstrdup (locale_charset ());
280 if ( 0 == strcmp (loc_encoding, c_encoding))
286 setlocale (LC_CTYPE, tmp);
292 free (default_encoding);
293 default_encoding = loc_encoding;
306 struct hmapx_node *node;
307 struct converter *cvtr;
309 HMAPX_FOR_EACH (cvtr, node, &map)
312 free (cvtr->fromcode);
313 iconv_close (cvtr->conv);
317 hmapx_destroy (&map);
319 free (default_encoding);
320 default_encoding = NULL;
326 valid_encoding (const char *enc)
328 iconv_t conv = iconv_open (UTF8, enc);
330 if ( conv == (iconv_t) -1)
339 /* Return the system local's idea of the
340 decimal seperator character */
342 get_system_decimal (void)
346 char *ol = xstrdup (setlocale (LC_NUMERIC, NULL));
347 setlocale (LC_NUMERIC, "");
350 radix_char = nl_langinfo (RADIXCHAR)[0];
354 snprintf (buf, sizeof buf, "%f", 2.5);
359 /* We MUST leave LC_NUMERIC untouched, since it would
360 otherwise interfere with data_{in,out} */
361 setlocale (LC_NUMERIC, ol);