1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2006, 2009, 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "libpspp/i18n.h"
33 #include "libpspp/assertion.h"
34 #include "libpspp/compiler.h"
35 #include "libpspp/hmapx.h"
36 #include "libpspp/hash-functions.h"
37 #include "libpspp/pool.h"
38 #include "libpspp/str.h"
39 #include "libpspp/version.h"
41 #include "gl/c-strcase.h"
42 #include "gl/localcharset.h"
43 #include "gl/minmax.h"
44 #include "gl/xalloc.h"
45 #include "gl/relocatable.h"
46 #include "gl/xstrndup.h"
49 #define _(msgid) gettext (msgid)
59 static char *default_encoding;
60 static struct hmapx map;
62 /* A wrapper around iconv_open */
63 static struct converter *
64 create_iconv__ (const char* tocode, const char* fromcode)
67 struct hmapx_node *node;
68 struct converter *converter;
71 hash = hash_string (tocode, hash_string (fromcode, 0));
72 HMAPX_FOR_EACH_WITH_HASH (converter, node, hash, &map)
73 if (!strcmp (tocode, converter->tocode)
74 && !strcmp (fromcode, converter->fromcode))
77 converter = xmalloc (sizeof *converter);
78 converter->tocode = xstrdup (tocode);
79 converter->fromcode = xstrdup (fromcode);
80 converter->conv = iconv_open (tocode, fromcode);
81 converter->error = converter->conv == (iconv_t) -1 ? errno : 0;
82 hmapx_insert (&map, converter, hash);
88 create_iconv (const char* tocode, const char* fromcode)
90 struct converter *converter;
92 converter = create_iconv__ (tocode, fromcode);
94 /* I don't think it's safe to translate this string or to use messaging
95 as the converters have not yet been set up */
96 if (converter->error && strcmp (tocode, fromcode))
100 "cannot create a converter for `%s' to `%s': %s\n",
101 fromcode, tocode, strerror (converter->error));
102 converter->error = 0;
105 return converter->conv;
108 /* Converts the single byte C from encoding FROM to TO, returning the first
111 This function probably shouldn't be used at all, but some code still does
114 recode_byte (const char *to, const char *from, char c)
117 char *s = recode_string (to, from, &c, 1);
123 /* Similar to recode_string_pool, but allocates the returned value on the heap
124 instead of in a pool. It is the caller's responsibility to free the
127 recode_string (const char *to, const char *from,
128 const char *text, int length)
130 return recode_string_pool (to, from, text, length, NULL);
133 /* Returns the length, in bytes, of the string that a similar recode_string()
134 call would return. */
136 recode_string_len (const char *to, const char *from,
137 const char *text, int length)
139 char *s = recode_string (to, from, text, length);
140 size_t len = strlen (s);
145 /* Uses CONV to convert the INBYTES starting at IP into the OUTBYTES starting
146 at OP, and appends a null terminator to the output.
148 Returns the output length if successful, -1 if the output buffer is too
151 try_recode (iconv_t conv,
152 const char *in, size_t inbytes,
153 char *out_, size_t outbytes)
155 /* FIXME: Need to ensure that this char is valid in the target encoding */
156 const char fallbackchar = '?';
160 /* Put the converter into the initial shift state, in case there was any
161 state information left over from its last usage. */
162 iconv (conv, NULL, 0, NULL, 0);
164 /* Do two rounds of iconv() calls:
166 - The first round does the bulk of the conversion using the
167 caller-supplied input data..
169 - The second round flushes any leftover output. This has a real effect
170 with input encodings that use combining diacritics, e.g. without the
171 second round the last character tends to gets dropped when converting
172 from windows-1258 to other encodings.
174 for (i = 0; i < 2; i++)
176 ICONV_CONST char **inp = i ? NULL : (ICONV_CONST char **) ∈
177 size_t *inbytesp = i ? NULL : &inbytes;
179 while (iconv (conv, inp, inbytesp, &out, &outbytes) == -1)
185 *out++ = fallbackchar;
192 *out++ = fallbackchar;
205 /* should never happen */
206 fprintf (stderr, "Character conversion error: %s\n",
220 /* Converts the string TEXT, which should be encoded in FROM-encoding, to a
221 dynamically allocated string in TO-encoding. Any characters which cannot be
222 converted will be represented by '?'.
224 LENGTH should be the length of the string or -1, if null terminated.
226 The returned string will be allocated on POOL.
228 This function's behaviour differs from that of g_convert_with_fallback
229 provided by GLib. The GLib function will fail (returns NULL) if any part of
230 the input string is not valid in the declared input encoding. This function
231 however perseveres even in the presence of badly encoded input. */
233 recode_string_pool (const char *to, const char *from,
234 const char *text, int length, struct pool *pool)
236 struct substring out;
242 length = strlen (text);
244 out = recode_substring_pool (to, from, ss_buffer (text, length), pool);
248 /* Returns the name of the encoding that should be used for file names.
250 This is meant to be the same encoding used by g_filename_from_uri() and
251 g_filename_to_uri() in GLib. */
253 filename_encoding (void)
255 #if defined _WIN32 || defined __WIN32__
258 return locale_charset ();
263 xconcat2 (const char *a, size_t a_len,
264 const char *b, size_t b_len)
266 char *s = xmalloc (a_len + b_len + 1);
267 memcpy (s, a, a_len);
268 memcpy (s + a_len, b, b_len);
269 s[a_len + b_len] = '\0';
273 /* Conceptually, this function concatenates HEAD_LEN-byte string HEAD and
274 TAIL_LEN-byte string TAIL, both encoded in UTF-8, then converts them to
275 ENCODING. If the re-encoded result is no more than MAX_LEN bytes long, then
276 it returns HEAD_LEN. Otherwise, it drops one character[*] from the end of
277 HEAD and tries again, repeating as necessary until the concatenated result
278 fits or until HEAD_LEN reaches 0.
280 [*] Actually this function drops grapheme clusters instead of characters, so
281 that, e.g. a Unicode character followed by a combining accent character
282 is either completely included or completely excluded from HEAD_LEN. See
283 UAX #29 at http://unicode.org/reports/tr29/ for more information on
286 A null ENCODING is treated as UTF-8.
288 Sometimes this function has to actually construct the concatenated string to
289 measure its length. When this happens, it sets *RESULTP to that
290 null-terminated string, allocated with malloc(), for the caller to use if it
291 needs it. Otherwise, it sets *RESULTP to NULL.
293 Simple examples for encoding="UTF-8", max_len=6:
295 head="abc", tail="xyz" => 3
296 head="abcd", tail="xyz" => 3 ("d" dropped).
297 head="abc", tail="uvwxyz" => 0 ("abc" dropped).
298 head="abc", tail="tuvwxyz" => 0 ("abc" dropped).
300 Examples for encoding="ISO-8859-1", max_len=6:
302 head="éèä", tail="xyz" => 6
303 (each letter in head is only 1 byte in ISO-8859-1 even though they
304 each take 2 bytes in UTF-8 encoding)
307 utf8_encoding_concat__ (const char *head, size_t head_len,
308 const char *tail, size_t tail_len,
309 const char *encoding, size_t max_len,
315 else if (encoding == NULL || !c_strcasecmp (encoding, "UTF-8"))
317 if (head_len + tail_len <= max_len)
319 else if (tail_len >= max_len)
329 for (ofs = u8_mbtouc (&prev, CHAR_CAST (const uint8_t *, head),
331 ofs <= max_len - tail_len;
336 mblen = u8_mbtouc (&next,
337 CHAR_CAST (const uint8_t *, head + ofs),
339 if (uc_is_grapheme_break (prev, next))
352 result = (tail_len > 0
353 ? xconcat2 (head, head_len, tail, tail_len)
354 : CONST_CAST (char *, head));
355 if (recode_string_len (encoding, "UTF-8", result,
356 head_len + tail_len) <= max_len)
358 *resultp = result != head ? result : NULL;
363 bool correct_result = false;
370 for (ofs = u8_mbtouc (&prev, CHAR_CAST (const uint8_t *, head),
377 mblen = u8_mbtouc (&next,
378 CHAR_CAST (const uint8_t *, head + ofs),
380 if (uc_is_grapheme_break (prev, next))
384 memcpy (result, head, ofs);
385 memcpy (result + ofs, tail, tail_len);
386 result[ofs + tail_len] = '\0';
389 if (recode_string_len (encoding, "UTF-8", result,
390 ofs + tail_len) <= max_len)
392 correct_result = true;
396 correct_result = false;
415 /* Concatenates a prefix of HEAD with all of TAIL and returns the result as a
416 null-terminated string owned by the caller. HEAD, TAIL, and the returned
417 string are all encoded in UTF-8. As many characters[*] from the beginning
418 of HEAD are included as will fit within MAX_LEN bytes supposing that the
419 resulting string were to be re-encoded in ENCODING. All of TAIL is always
420 included, even if TAIL by itself is longer than MAX_LEN in ENCODING.
422 [*] Actually this function drops grapheme clusters instead of characters, so
423 that, e.g. a Unicode character followed by a combining accent character
424 is either completely included or completely excluded from the returned
425 string. See UAX #29 at http://unicode.org/reports/tr29/ for more
426 information on grapheme clusters.
428 A null ENCODING is treated as UTF-8.
430 Simple examples for encoding="UTF-8", max_len=6:
432 head="abc", tail="xyz" => "abcxyz"
433 head="abcd", tail="xyz" => "abcxyz"
434 head="abc", tail="uvwxyz" => "uvwxyz"
435 head="abc", tail="tuvwxyz" => "tuvwxyz"
437 Examples for encoding="ISO-8859-1", max_len=6:
439 head="éèä", tail="xyz" => "éèäxyz"
440 (each letter in HEAD is only 1 byte in ISO-8859-1 even though they
441 each take 2 bytes in UTF-8 encoding)
444 utf8_encoding_concat (const char *head, const char *tail,
445 const char *encoding, size_t max_len)
447 size_t tail_len = strlen (tail);
451 prefix_len = utf8_encoding_concat__ (head, strlen (head), tail, tail_len,
452 encoding, max_len, &result);
453 return (result != NULL
455 : xconcat2 (head, prefix_len, tail, tail_len));
458 /* Returns the length, in bytes, of the string that would be returned by
459 utf8_encoding_concat() if passed the same arguments, but the implementation
460 is often more efficient. */
462 utf8_encoding_concat_len (const char *head, const char *tail,
463 const char *encoding, size_t max_len)
465 size_t tail_len = strlen (tail);
469 prefix_len = utf8_encoding_concat__ (head, strlen (head), tail, tail_len,
470 encoding, max_len, &result);
472 return prefix_len + tail_len;
475 /* Returns an allocated, null-terminated string, owned by the caller,
476 containing as many characters[*] from the beginning of S that would fit
477 within MAX_LEN bytes if the returned string were to be re-encoded in
478 ENCODING. Both S and the returned string are encoded in UTF-8.
480 [*] Actually this function drops grapheme clusters instead of characters, so
481 that, e.g. a Unicode character followed by a combining accent character
482 is either completely included or completely excluded from the returned
483 string. See UAX #29 at http://unicode.org/reports/tr29/ for more
484 information on grapheme clusters.
486 A null ENCODING is treated as UTF-8.
489 utf8_encoding_trunc (const char *s, const char *encoding, size_t max_len)
491 return utf8_encoding_concat (s, "", encoding, max_len);
494 /* Returns the length, in bytes, of the string that would be returned by
495 utf8_encoding_trunc() if passed the same arguments, but the implementation
496 is often more efficient. */
498 utf8_encoding_trunc_len (const char *s, const char *encoding, size_t max_len)
500 return utf8_encoding_concat_len (s, "", encoding, max_len);
503 /* Returns FILENAME converted from UTF-8 to the filename encoding.
504 On Windows the filename encoding is UTF-8; elsewhere it is based on the
507 utf8_to_filename (const char *filename)
509 return recode_string (filename_encoding (), "UTF-8", filename, -1);
512 /* Returns FILENAME converted from the filename encoding to UTF-8.
513 On Windows the filename encoding is UTF-8; elsewhere it is based on the
516 filename_to_utf8 (const char *filename)
518 return recode_string ("UTF-8", filename_encoding (), filename, -1);
521 /* Converts the string TEXT, which should be encoded in FROM-encoding, to a
522 dynamically allocated string in TO-encoding. Any characters which cannot be
523 converted will be represented by '?'.
525 The returned string will be null-terminated and allocated on POOL with
528 This function's behaviour differs from that of g_convert_with_fallback
529 provided by GLib. The GLib function will fail (returns NULL) if any part of
530 the input string is not valid in the declared input encoding. This function
531 however perseveres even in the presence of badly encoded input. */
533 recode_substring_pool (const char *to, const char *from,
534 struct substring text, struct pool *pool)
536 size_t outbufferlength;
540 to = default_encoding;
543 from = default_encoding;
545 conv = create_iconv (to, from);
547 if ( (iconv_t) -1 == conv )
549 struct substring out;
551 out.string = pool_malloc (pool, text.length + 1);
552 out.length = text.length;
553 memcpy (out.string, text.string, text.length);
554 out.string[out.length] = '\0';
558 for ( outbufferlength = 1 ; outbufferlength != 0; outbufferlength <<= 1 )
559 if ( outbufferlength > text.length)
561 char *output = pool_malloc (pool, outbufferlength);
562 ssize_t output_len = try_recode (conv, text.string, text.length,
563 output, outbufferlength);
565 return ss_buffer (output, output_len);
566 pool_free (pool, output);
575 setlocale (LC_ALL, "");
576 bindtextdomain (PACKAGE, relocate(locale_dir));
577 textdomain (PACKAGE);
579 assert (default_encoding == NULL);
580 default_encoding = xstrdup (locale_charset ());
586 get_default_encoding (void)
588 return default_encoding;
592 set_default_encoding (const char *enc)
594 free (default_encoding);
595 default_encoding = xstrdup (enc);
599 /* Attempts to set the encoding from a locale name
600 returns true if successfull.
601 This function does not (should not!) alter the current locale.
604 set_encoding_from_locale (const char *loc)
609 char *tmp = xstrdup (setlocale (LC_CTYPE, NULL));
611 setlocale (LC_CTYPE, "C");
612 c_encoding = xstrdup (locale_charset ());
614 setlocale (LC_CTYPE, loc);
615 loc_encoding = xstrdup (locale_charset ());
618 if ( 0 == strcmp (loc_encoding, c_encoding))
623 setlocale (LC_CTYPE, tmp);
629 free (default_encoding);
630 default_encoding = loc_encoding;
643 struct hmapx_node *node;
644 struct converter *cvtr;
646 HMAPX_FOR_EACH (cvtr, node, &map)
649 free (cvtr->fromcode);
650 if (cvtr->conv != (iconv_t) -1)
651 iconv_close (cvtr->conv);
655 hmapx_destroy (&map);
657 free (default_encoding);
658 default_encoding = NULL;
664 valid_encoding (const char *enc)
666 iconv_t conv = iconv_open (UTF8, enc);
668 if ( conv == (iconv_t) -1)
677 /* Return the system local's idea of the
678 decimal seperator character */
680 get_system_decimal (void)
685 radix_char = nl_langinfo (RADIXCHAR)[0];
689 snprintf (buf, sizeof buf, "%f", 2.5);
698 uc_name (ucs4_t uc, char buffer[16])
700 if (uc >= 0x20 && uc < 0x7f)
701 snprintf (buffer, 16, "`%c'", uc);
703 snprintf (buffer, 16, "U+%04X", uc);
707 /* UTF-8 functions that deal with uppercase/lowercase distinctions. */
709 /* Returns a hash value for the N bytes of UTF-8 encoded data starting at S,
710 with lowercase and uppercase letters treated as equal, starting from
713 utf8_hash_case_bytes (const char *s, size_t n, unsigned int basis)
715 uint8_t folded_buf[2048];
716 size_t folded_len = sizeof folded_buf;
720 folded_s = u8_casefold (CHAR_CAST (const uint8_t *, s), n,
721 NULL, UNINORM_NFKD, folded_buf, &folded_len);
722 if (folded_s != NULL)
724 hash = hash_bytes (folded_s, folded_len, basis);
725 if (folded_s != folded_buf)
732 hash = hash_bytes (s, n, basis);
738 /* Returns a hash value for null-terminated UTF-8 string S, with lowercase and
739 uppercase letters treated as equal, starting from BASIS. */
741 utf8_hash_case_string (const char *s, unsigned int basis)
743 return utf8_hash_case_bytes (s, strlen (s), basis);
746 /* Compares UTF-8 strings A and B case-insensitively.
747 Returns a negative value if A < B, zero if A == B, positive if A > B. */
749 utf8_strcasecmp (const char *a, const char *b)
751 return utf8_strncasecmp (a, strlen (a), b, strlen (b));
754 /* Compares UTF-8 strings A (with length AN) and B (with length BN)
756 Returns a negative value if A < B, zero if A == B, positive if A > B. */
758 utf8_strncasecmp (const char *a, size_t an, const char *b, size_t bn)
762 if (u8_casecmp (CHAR_CAST (const uint8_t *, a), an,
763 CHAR_CAST (const uint8_t *, b), bn,
764 NULL, UNINORM_NFKD, &result))
769 result = memcmp (a, b, MIN (an, bn));
771 result = an < bn ? -1 : an > bn;
778 utf8_casemap (const char *s,
779 uint8_t *(*f) (const uint8_t *, size_t, const char *, uninorm_t,
780 uint8_t *, size_t *))
785 result = CHAR_CAST (char *,
786 f (CHAR_CAST (const uint8_t *, s), strlen (s) + 1,
787 NULL, NULL, NULL, &size));
793 result = xstrdup (s);
799 utf8_to_upper (const char *s)
801 return utf8_casemap (s, u8_toupper);
805 utf8_to_lower (const char *s)
807 return utf8_casemap (s, u8_tolower);
811 get_encoding_info (struct encoding_info *e, const char *name)
813 const struct substring in = SS_LITERAL_INITIALIZER (
815 "!\"#$%&'()*+,-./0123456789:;<=>?@"
816 "ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`"
817 "abcdefghijklmnopqrstuvwxyz{|}~");
819 struct substring out, cr, lf, space;
822 memset (e, 0, sizeof *e);
824 cr = recode_substring_pool (name, "UTF-8", ss_cstr ("\r"), NULL);
825 lf = recode_substring_pool (name, "UTF-8", ss_cstr ("\n"), NULL);
826 space = recode_substring_pool (name, "UTF-8", ss_cstr (" "), NULL);
828 && cr.length <= MAX_UNIT
829 && cr.length == lf.length
830 && cr.length == space.length);
833 fprintf (stderr, "warning: encoding `%s' is not supported.\n", name);
837 ss_alloc_substring (&cr, ss_cstr ("\r"));
838 ss_alloc_substring (&lf, ss_cstr ("\n"));
839 ss_alloc_substring (&space, ss_cstr (" "));
843 memcpy (e->cr, cr.string, e->unit);
844 memcpy (e->lf, lf.string, e->unit);
845 memcpy (e->space, space.string, e->unit);
851 out = recode_substring_pool ("UTF-8", name, in, NULL);
852 e->is_ascii_compatible = ss_equals (in, out);
855 if (!e->is_ascii_compatible && e->unit == 1)
857 out = recode_substring_pool ("UTF-8", name, ss_cstr ("A"), NULL);
858 e->is_ebcdic_compatible = (out.length == 1
859 && (uint8_t) out.string[0] == 0xc1);
863 e->is_ebcdic_compatible = false;
869 is_encoding_ascii_compatible (const char *encoding)
871 struct encoding_info e;
873 get_encoding_info (&e, encoding);
874 return e.is_ascii_compatible;
878 is_encoding_ebcdic_compatible (const char *encoding)
880 struct encoding_info e;
882 get_encoding_info (&e, encoding);
883 return e.is_ebcdic_compatible;
886 /* Returns true if iconv can convert ENCODING to and from UTF-8,
889 is_encoding_supported (const char *encoding)
891 return (create_iconv__ ("UTF-8", encoding)->conv != (iconv_t) -1
892 && create_iconv__ (encoding, "UTF-8")->conv != (iconv_t) -1);
895 /* Returns true if E is the name of a UTF-8 encoding.
897 XXX Possibly we should test not E as a string but its properties via
900 is_encoding_utf8 (const char *e)
902 return ((e[0] == 'u' || e[0] == 'U')
903 && (e[1] == 't' || e[1] == 'T')
904 && (e[2] == 'f' || e[2] == 'F')
905 && ((e[3] == '8' && e[4] == '\0')
906 || (e[3] == '-' && e[4] == '8' && e[5] == '\0')));
909 static struct encoding_category *categories;
910 static int n_categories;
912 static void SENTINEL (0)
913 add_category (size_t *allocated_categories, const char *category, ...)
915 struct encoding_category *c;
916 const char *encodings[16];
920 /* Count encoding arguments. */
921 va_start (args, category);
923 while ((encodings[n] = va_arg (args, const char *)) != NULL)
925 const char *encoding = encodings[n];
926 if (!strcmp (encoding, "Auto") || is_encoding_supported (encoding))
929 assert (n < sizeof encodings / sizeof *encodings);
935 if (n_categories >= *allocated_categories)
936 categories = x2nrealloc (categories,
937 allocated_categories, sizeof *categories);
939 c = &categories[n_categories++];
940 c->category = category;
941 c->encodings = xmalloc (n * sizeof *c->encodings);
942 for (i = 0; i < n; i++)
943 c->encodings[i] = encodings[i];
948 init_encoding_categories (void)
958 add_category (&alloc, "Unicode", "UTF-8", "UTF-16", "UTF-16BE", "UTF-16LE",
959 "UTF-32", "UTF-32BE", "UTF-32LE", NULL_SENTINEL);
960 add_category (&alloc, _("Arabic"), "IBM864", "ISO-8859-6", "Windows-1256",
962 add_category (&alloc, _("Armenian"), "ARMSCII-8", NULL_SENTINEL);
963 add_category (&alloc, _("Baltic"), "ISO-8859-13", "ISO-8859-4",
964 "Windows-1257", NULL_SENTINEL);
965 add_category (&alloc, _("Celtic"), "ISO-8859-14", NULL_SENTINEL);
966 add_category (&alloc, _("Central European"), "IBM852", "ISO-8859-2",
967 "Mac-CentralEurope", "Windows-1250", NULL_SENTINEL);
968 add_category (&alloc, _("Chinese Simplified"), "GB18030", "GB2312", "GBK",
969 "HZ-GB-2312", "ISO-2022-CN", NULL_SENTINEL);
970 add_category (&alloc, _("Chinese Traditional"), "Big5", "Big5-HKSCS",
971 "EUC-TW", NULL_SENTINEL);
972 add_category (&alloc, _("Croatian"), "MacCroatian", NULL_SENTINEL);
973 add_category (&alloc, _("Cyrillic"), "IBM855", "ISO-8859-5", "ISO-IR-111",
974 "KOI8-R", "MacCyrillic", NULL_SENTINEL);
975 add_category (&alloc, _("Cyrillic/Russian"), "IBM866", NULL_SENTINEL);
976 add_category (&alloc, _("Cyrillic/Ukrainian"), "KOI8-U", "MacUkrainian",
978 add_category (&alloc, _("Georgian"), "GEOSTD8", NULL_SENTINEL);
979 add_category (&alloc, _("Greek"), "ISO-8859-7", "MacGreek", NULL_SENTINEL);
980 add_category (&alloc, _("Gujarati"), "MacGujarati", NULL_SENTINEL);
981 add_category (&alloc, _("Gurmukhi"), "MacGurmukhi", NULL_SENTINEL);
982 add_category (&alloc, _("Hebrew"), "IBM862", "ISO-8859-8-I", "Windows-1255",
984 add_category (&alloc, _("Hebrew Visual"), "ISO-8859-8", NULL_SENTINEL);
985 add_category (&alloc, _("Hindi"), "MacDevangari", NULL_SENTINEL);
986 add_category (&alloc, _("Icelandic"), "MacIcelandic", NULL_SENTINEL);
987 add_category (&alloc, _("Japanese"), "EUC-JP", "ISO-2022-JP", "Shift_JIS",
989 add_category (&alloc, _("Korean"), "EUC-KR", "ISO-2022-KR", "JOHAB", "UHC",
991 add_category (&alloc, _("Nordic"), "ISO-8859-10", NULL_SENTINEL);
992 add_category (&alloc, _("Romanian"), "ISO-8859-16", "MacRomanian",
994 add_category (&alloc, _("South European"), "ISO-8859-3", NULL_SENTINEL);
995 add_category (&alloc, _("Thai"), "ISO-8859-11", "TIS-620", "Windows-874",
997 add_category (&alloc, _("Turkish"), "IBM857", "ISO-8859-9", "Windows-1254",
999 add_category (&alloc, _("Vietnamese"), "TVCN", "VISCII", "VPS",
1000 "Windows-1258", NULL_SENTINEL);
1001 add_category (&alloc, _("Western European"), "ISO-8859-1", "ISO-8859-15",
1002 "Windows-1252", "IBM850", "MacRoman", NULL_SENTINEL);
1005 /* Returns an array of "struct encoding_category" that contains only the
1006 categories and encodings that the system supports. */
1007 struct encoding_category *
1008 get_encoding_categories (void)
1010 init_encoding_categories ();
1014 /* Returns the number of elements in the array returned by
1015 get_encoding_categories(). */
1017 get_n_encoding_categories (void)
1019 init_encoding_categories ();
1020 return n_categories;