1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2006, 2009, 2010, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "libpspp/i18n.h"
31 #include "libpspp/assertion.h"
32 #include "libpspp/hmapx.h"
33 #include "libpspp/hash-functions.h"
34 #include "libpspp/pool.h"
35 #include "libpspp/str.h"
36 #include "libpspp/version.h"
38 #include "gl/localcharset.h"
39 #include "gl/xalloc.h"
40 #include "gl/relocatable.h"
41 #include "gl/xstrndup.h"
50 static char *default_encoding;
51 static struct hmapx map;
53 /* A wrapper around iconv_open */
55 create_iconv (const char* tocode, const char* fromcode)
58 struct hmapx_node *node;
59 struct converter *converter;
62 hash = hash_string (tocode, hash_string (fromcode, 0));
63 HMAPX_FOR_EACH_WITH_HASH (converter, node, hash, &map)
64 if (!strcmp (tocode, converter->tocode)
65 && !strcmp (fromcode, converter->fromcode))
66 return converter->conv;
68 converter = xmalloc (sizeof *converter);
69 converter->tocode = xstrdup (tocode);
70 converter->fromcode = xstrdup (fromcode);
71 converter->conv = iconv_open (tocode, fromcode);
72 hmapx_insert (&map, converter, hash);
74 /* I don't think it's safe to translate this string or to use messaging
75 as the converters have not yet been set up */
76 if ( (iconv_t) -1 == converter->conv && 0 != strcmp (tocode, fromcode))
78 const int err = errno;
81 "cannot create a converter for `%s' to `%s': %s\n",
82 fromcode, tocode, strerror (err));
85 return converter->conv;
88 /* Converts the single byte C from encoding FROM to TO, returning the first
91 This function probably shouldn't be used at all, but some code still does
94 recode_byte (const char *to, const char *from, char c)
97 char *s = recode_string (to, from, &c, 1);
103 /* Similar to recode_string_pool, but allocates the returned value on the heap
104 instead of in a pool. It is the caller's responsibility to free the
107 recode_string (const char *to, const char *from,
108 const char *text, int length)
110 return recode_string_pool (to, from, text, length, NULL);
113 /* Returns the length, in bytes, of the string that a similar recode_string()
114 call would return. */
116 recode_string_len (const char *to, const char *from,
117 const char *text, int length)
119 char *s = recode_string (to, from, text, length);
120 size_t len = strlen (s);
125 /* Uses CONV to convert the INBYTES starting at IP into the OUTBYTES starting
126 at OP, and appends a null terminator to the output.
128 Returns the output length if successful, -1 if the output buffer is too
131 try_recode (iconv_t conv,
132 const char *ip, size_t inbytes,
133 char *op_, size_t outbytes)
135 /* FIXME: Need to ensure that this char is valid in the target encoding */
136 const char fallbackchar = '?';
139 /* Put the converter into the initial shift state, in case there was any
140 state information left over from its last usage. */
141 iconv (conv, NULL, 0, NULL, 0);
143 while (iconv (conv, (ICONV_CONST char **) &ip, &inbytes,
144 &op, &outbytes) == -1)
150 *op++ = fallbackchar;
157 *op++ = fallbackchar;
167 /* should never happen */
168 fprintf (stderr, "Character conversion error: %s\n", strerror (errno));
180 /* Converts the string TEXT, which should be encoded in FROM-encoding, to a
181 dynamically allocated string in TO-encoding. Any characters which cannot be
182 converted will be represented by '?'.
184 LENGTH should be the length of the string or -1, if null terminated.
186 The returned string will be allocated on POOL.
188 This function's behaviour differs from that of g_convert_with_fallback
189 provided by GLib. The GLib function will fail (returns NULL) if any part of
190 the input string is not valid in the declared input encoding. This function
191 however perseveres even in the presence of badly encoded input. */
193 recode_string_pool (const char *to, const char *from,
194 const char *text, int length, struct pool *pool)
196 struct substring out;
202 length = strlen (text);
204 out = recode_substring_pool (to, from, ss_buffer (text, length), pool);
208 /* Converts the string TEXT, which should be encoded in FROM-encoding, to a
209 dynamically allocated string in TO-encoding. Any characters which cannot be
210 converted will be represented by '?'.
212 The returned string will be null-terminated and allocated on POOL.
214 This function's behaviour differs from that of g_convert_with_fallback
215 provided by GLib. The GLib function will fail (returns NULL) if any part of
216 the input string is not valid in the declared input encoding. This function
217 however perseveres even in the presence of badly encoded input. */
219 recode_substring_pool (const char *to, const char *from,
220 struct substring text, struct pool *pool)
222 size_t outbufferlength;
226 to = default_encoding;
229 from = default_encoding;
231 conv = create_iconv (to, from);
233 if ( (iconv_t) -1 == conv )
235 struct substring out;
236 ss_alloc_substring_pool (&out, text, pool);
240 for ( outbufferlength = 1 ; outbufferlength != 0; outbufferlength <<= 1 )
241 if ( outbufferlength > text.length)
243 char *output = pool_malloc (pool, outbufferlength);
244 ssize_t output_len = try_recode (conv, text.string, text.length,
245 output, outbufferlength);
247 return ss_buffer (output, output_len);
248 pool_free (pool, output);
257 setlocale (LC_CTYPE, "");
258 setlocale (LC_MESSAGES, "");
260 setlocale (LC_PAPER, "");
262 bindtextdomain (PACKAGE, relocate(locale_dir));
263 textdomain (PACKAGE);
265 assert (default_encoding == NULL);
266 default_encoding = xstrdup (locale_charset ());
273 get_default_encoding (void)
275 return default_encoding;
279 set_default_encoding (const char *enc)
281 free (default_encoding);
282 default_encoding = xstrdup (enc);
286 /* Attempts to set the encoding from a locale name
287 returns true if successfull.
288 This function does not (should not!) alter the current locale.
291 set_encoding_from_locale (const char *loc)
296 char *tmp = xstrdup (setlocale (LC_CTYPE, NULL));
298 setlocale (LC_CTYPE, "C");
299 c_encoding = xstrdup (locale_charset ());
301 setlocale (LC_CTYPE, loc);
302 loc_encoding = xstrdup (locale_charset ());
305 if ( 0 == strcmp (loc_encoding, c_encoding))
311 setlocale (LC_CTYPE, tmp);
317 free (default_encoding);
318 default_encoding = loc_encoding;
331 struct hmapx_node *node;
332 struct converter *cvtr;
334 HMAPX_FOR_EACH (cvtr, node, &map)
337 free (cvtr->fromcode);
338 iconv_close (cvtr->conv);
342 hmapx_destroy (&map);
344 free (default_encoding);
345 default_encoding = NULL;
351 valid_encoding (const char *enc)
353 iconv_t conv = iconv_open (UTF8, enc);
355 if ( conv == (iconv_t) -1)
364 /* Return the system local's idea of the
365 decimal seperator character */
367 get_system_decimal (void)
371 char *ol = xstrdup (setlocale (LC_NUMERIC, NULL));
372 setlocale (LC_NUMERIC, "");
375 radix_char = nl_langinfo (RADIXCHAR)[0];
379 snprintf (buf, sizeof buf, "%f", 2.5);
384 /* We MUST leave LC_NUMERIC untouched, since it would
385 otherwise interfere with data_{in,out} */
386 setlocale (LC_NUMERIC, ol);
392 uc_name (ucs4_t uc, char buffer[16])
394 if (uc >= 0x20 && uc < 0x7f)
395 snprintf (buffer, 16, "`%c'", uc);
397 snprintf (buffer, 16, "U+%04X", uc);