1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2006, 2009, 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "libpspp/i18n.h"
33 #include "libpspp/assertion.h"
34 #include "libpspp/compiler.h"
35 #include "libpspp/hmapx.h"
36 #include "libpspp/hash-functions.h"
37 #include "libpspp/pool.h"
38 #include "libpspp/str.h"
39 #include "libpspp/version.h"
41 #include "gl/c-strcase.h"
42 #include "gl/localcharset.h"
43 #include "gl/minmax.h"
44 #include "gl/xalloc.h"
45 #include "gl/relocatable.h"
46 #include "gl/xstrndup.h"
49 #define _(msgid) gettext (msgid)
59 static char *default_encoding;
60 static struct hmapx map;
62 /* A wrapper around iconv_open */
63 static struct converter *
64 create_iconv__ (const char* tocode, const char* fromcode)
67 struct hmapx_node *node;
68 struct converter *converter;
71 hash = hash_string (tocode, hash_string (fromcode, 0));
72 HMAPX_FOR_EACH_WITH_HASH (converter, node, hash, &map)
73 if (!strcmp (tocode, converter->tocode)
74 && !strcmp (fromcode, converter->fromcode))
77 converter = xmalloc (sizeof *converter);
78 converter->tocode = xstrdup (tocode);
79 converter->fromcode = xstrdup (fromcode);
80 converter->conv = iconv_open (tocode, fromcode);
81 converter->error = converter->conv == (iconv_t) -1 ? errno : 0;
82 hmapx_insert (&map, converter, hash);
88 create_iconv (const char* tocode, const char* fromcode)
90 struct converter *converter;
92 converter = create_iconv__ (tocode, fromcode);
94 /* I don't think it's safe to translate this string or to use messaging
95 as the converters have not yet been set up */
96 if (converter->error && strcmp (tocode, fromcode))
100 "cannot create a converter for `%s' to `%s': %s\n",
101 fromcode, tocode, strerror (converter->error));
102 converter->error = 0;
105 return converter->conv;
108 /* Converts the single byte C from encoding FROM to TO, returning the first
111 This function probably shouldn't be used at all, but some code still does
114 recode_byte (const char *to, const char *from, char c)
117 char *s = recode_string (to, from, &c, 1);
123 /* Similar to recode_string_pool, but allocates the returned value on the heap
124 instead of in a pool. It is the caller's responsibility to free the
127 recode_string (const char *to, const char *from,
128 const char *text, int length)
130 return recode_string_pool (to, from, text, length, NULL);
133 /* Returns the length, in bytes, of the string that a similar recode_string()
134 call would return. */
136 recode_string_len (const char *to, const char *from,
137 const char *text, int length)
139 char *s = recode_string (to, from, text, length);
140 size_t len = strlen (s);
145 /* Uses CONV to convert the INBYTES starting at IP into the OUTBYTES starting
146 at OP, and appends a null terminator to the output.
148 Returns the output length if successful, -1 if the output buffer is too
151 try_recode (iconv_t conv, char fallbackchar,
152 const char *in, size_t inbytes,
153 char *out_, size_t outbytes)
158 /* Put the converter into the initial shift state, in case there was any
159 state information left over from its last usage. */
160 iconv (conv, NULL, 0, NULL, 0);
162 /* Do two rounds of iconv() calls:
164 - The first round does the bulk of the conversion using the
165 caller-supplied input data..
167 - The second round flushes any leftover output. This has a real effect
168 with input encodings that use combining diacritics, e.g. without the
169 second round the last character tends to gets dropped when converting
170 from windows-1258 to other encodings.
172 for (i = 0; i < 2; i++)
174 ICONV_CONST char **inp = i ? NULL : (ICONV_CONST char **) ∈
175 size_t *inbytesp = i ? NULL : &inbytes;
177 while (iconv (conv, inp, inbytesp, &out, &outbytes) == -1)
185 *out++ = fallbackchar;
194 *out++ = fallbackchar;
207 /* should never happen */
208 fprintf (stderr, "Character conversion error: %s\n",
222 /* Converts the string TEXT, which should be encoded in FROM-encoding, to a
223 dynamically allocated string in TO-encoding. Any characters which cannot be
224 converted will be represented by '?'.
226 LENGTH should be the length of the string or -1, if null terminated.
228 The returned string will be allocated on POOL.
230 This function's behaviour differs from that of g_convert_with_fallback
231 provided by GLib. The GLib function will fail (returns NULL) if any part of
232 the input string is not valid in the declared input encoding. This function
233 however perseveres even in the presence of badly encoded input. */
235 recode_string_pool (const char *to, const char *from,
236 const char *text, int length, struct pool *pool)
238 struct substring out;
244 length = strlen (text);
246 out = recode_substring_pool (to, from, ss_buffer (text, length), pool);
250 /* Returns the name of the encoding that should be used for file names.
252 This is meant to be the same encoding used by g_filename_from_uri() and
253 g_filename_to_uri() in GLib. */
255 filename_encoding (void)
257 #if defined _WIN32 || defined __WIN32__
260 return locale_charset ();
265 xconcat2 (const char *a, size_t a_len,
266 const char *b, size_t b_len)
268 char *s = xmalloc (a_len + b_len + 1);
269 memcpy (s, a, a_len);
270 memcpy (s + a_len, b, b_len);
271 s[a_len + b_len] = '\0';
275 /* Conceptually, this function concatenates HEAD_LEN-byte string HEAD and
276 TAIL_LEN-byte string TAIL, both encoded in UTF-8, then converts them to
277 ENCODING. If the re-encoded result is no more than MAX_LEN bytes long, then
278 it returns HEAD_LEN. Otherwise, it drops one character[*] from the end of
279 HEAD and tries again, repeating as necessary until the concatenated result
280 fits or until HEAD_LEN reaches 0.
282 [*] Actually this function drops grapheme clusters instead of characters, so
283 that, e.g. a Unicode character followed by a combining accent character
284 is either completely included or completely excluded from HEAD_LEN. See
285 UAX #29 at http://unicode.org/reports/tr29/ for more information on
288 A null ENCODING is treated as UTF-8.
290 Sometimes this function has to actually construct the concatenated string to
291 measure its length. When this happens, it sets *RESULTP to that
292 null-terminated string, allocated with malloc(), for the caller to use if it
293 needs it. Otherwise, it sets *RESULTP to NULL.
295 Simple examples for encoding="UTF-8", max_len=6:
297 head="abc", tail="xyz" => 3
298 head="abcd", tail="xyz" => 3 ("d" dropped).
299 head="abc", tail="uvwxyz" => 0 ("abc" dropped).
300 head="abc", tail="tuvwxyz" => 0 ("abc" dropped).
302 Examples for encoding="ISO-8859-1", max_len=6:
304 head="éèä", tail="xyz" => 6
305 (each letter in head is only 1 byte in ISO-8859-1 even though they
306 each take 2 bytes in UTF-8 encoding)
309 utf8_encoding_concat__ (const char *head, size_t head_len,
310 const char *tail, size_t tail_len,
311 const char *encoding, size_t max_len,
317 else if (encoding == NULL || !c_strcasecmp (encoding, "UTF-8"))
319 if (head_len + tail_len <= max_len)
321 else if (tail_len >= max_len)
331 for (ofs = u8_mbtouc (&prev, CHAR_CAST (const uint8_t *, head),
333 ofs <= max_len - tail_len;
338 mblen = u8_mbtouc (&next,
339 CHAR_CAST (const uint8_t *, head + ofs),
341 if (uc_is_grapheme_break (prev, next))
354 result = (tail_len > 0
355 ? xconcat2 (head, head_len, tail, tail_len)
356 : CONST_CAST (char *, head));
357 if (recode_string_len (encoding, "UTF-8", result,
358 head_len + tail_len) <= max_len)
360 *resultp = result != head ? result : NULL;
365 bool correct_result = false;
372 for (ofs = u8_mbtouc (&prev, CHAR_CAST (const uint8_t *, head),
379 mblen = u8_mbtouc (&next,
380 CHAR_CAST (const uint8_t *, head + ofs),
382 if (uc_is_grapheme_break (prev, next))
386 memcpy (result, head, ofs);
387 memcpy (result + ofs, tail, tail_len);
388 result[ofs + tail_len] = '\0';
391 if (recode_string_len (encoding, "UTF-8", result,
392 ofs + tail_len) <= max_len)
394 correct_result = true;
398 correct_result = false;
417 /* Concatenates a prefix of HEAD with all of TAIL and returns the result as a
418 null-terminated string owned by the caller. HEAD, TAIL, and the returned
419 string are all encoded in UTF-8. As many characters[*] from the beginning
420 of HEAD are included as will fit within MAX_LEN bytes supposing that the
421 resulting string were to be re-encoded in ENCODING. All of TAIL is always
422 included, even if TAIL by itself is longer than MAX_LEN in ENCODING.
424 [*] Actually this function drops grapheme clusters instead of characters, so
425 that, e.g. a Unicode character followed by a combining accent character
426 is either completely included or completely excluded from the returned
427 string. See UAX #29 at http://unicode.org/reports/tr29/ for more
428 information on grapheme clusters.
430 A null ENCODING is treated as UTF-8.
432 Simple examples for encoding="UTF-8", max_len=6:
434 head="abc", tail="xyz" => "abcxyz"
435 head="abcd", tail="xyz" => "abcxyz"
436 head="abc", tail="uvwxyz" => "uvwxyz"
437 head="abc", tail="tuvwxyz" => "tuvwxyz"
439 Examples for encoding="ISO-8859-1", max_len=6:
441 head="éèä", tail="xyz" => "éèäxyz"
442 (each letter in HEAD is only 1 byte in ISO-8859-1 even though they
443 each take 2 bytes in UTF-8 encoding)
446 utf8_encoding_concat (const char *head, const char *tail,
447 const char *encoding, size_t max_len)
449 size_t tail_len = strlen (tail);
453 prefix_len = utf8_encoding_concat__ (head, strlen (head), tail, tail_len,
454 encoding, max_len, &result);
455 return (result != NULL
457 : xconcat2 (head, prefix_len, tail, tail_len));
460 /* Returns the length, in bytes, of the string that would be returned by
461 utf8_encoding_concat() if passed the same arguments, but the implementation
462 is often more efficient. */
464 utf8_encoding_concat_len (const char *head, const char *tail,
465 const char *encoding, size_t max_len)
467 size_t tail_len = strlen (tail);
471 prefix_len = utf8_encoding_concat__ (head, strlen (head), tail, tail_len,
472 encoding, max_len, &result);
474 return prefix_len + tail_len;
477 /* Returns an allocated, null-terminated string, owned by the caller,
478 containing as many characters[*] from the beginning of S that would fit
479 within MAX_LEN bytes if the returned string were to be re-encoded in
480 ENCODING. Both S and the returned string are encoded in UTF-8.
482 [*] Actually this function drops grapheme clusters instead of characters, so
483 that, e.g. a Unicode character followed by a combining accent character
484 is either completely included or completely excluded from the returned
485 string. See UAX #29 at http://unicode.org/reports/tr29/ for more
486 information on grapheme clusters.
488 A null ENCODING is treated as UTF-8.
491 utf8_encoding_trunc (const char *s, const char *encoding, size_t max_len)
493 return utf8_encoding_concat (s, "", encoding, max_len);
496 /* Returns the length, in bytes, of the string that would be returned by
497 utf8_encoding_trunc() if passed the same arguments, but the implementation
498 is often more efficient. */
500 utf8_encoding_trunc_len (const char *s, const char *encoding, size_t max_len)
502 return utf8_encoding_concat_len (s, "", encoding, max_len);
505 /* Returns FILENAME converted from UTF-8 to the filename encoding.
506 On Windows the filename encoding is UTF-8; elsewhere it is based on the
509 utf8_to_filename (const char *filename)
511 return recode_string (filename_encoding (), "UTF-8", filename, -1);
514 /* Returns FILENAME converted from the filename encoding to UTF-8.
515 On Windows the filename encoding is UTF-8; elsewhere it is based on the
518 filename_to_utf8 (const char *filename)
520 return recode_string ("UTF-8", filename_encoding (), filename, -1);
524 recode_substring_pool__ (const char *to, const char *from,
525 struct substring text, char fallbackchar,
526 struct pool *pool, struct substring *out)
532 to = default_encoding;
535 from = default_encoding;
537 conv = create_iconv (to, from);
539 if ( (iconv_t) -1 == conv )
543 out->string = pool_malloc (pool, text.length + 1);
544 out->length = text.length;
545 memcpy (out->string, text.string, text.length);
546 out->string[out->length] = '\0';
553 for (bufsize = text.length + 1; bufsize > text.length; bufsize *= 2)
555 char *output = pool_malloc (pool, bufsize);
558 retval = try_recode (conv, fallbackchar, text.string, text.length,
562 *out = ss_buffer (output, retval);
565 pool_free (pool, output);
567 if (retval != -E2BIG)
574 /* Converts the string TEXT, which should be encoded in FROM-encoding, to a
575 dynamically allocated string in TO-encoding. Any characters which cannot be
576 converted will be represented by '?'.
578 The returned string will be null-terminated and allocated on POOL with
581 This function's behaviour differs from that of g_convert_with_fallback
582 provided by GLib. The GLib function will fail (returns NULL) if any part of
583 the input string is not valid in the declared input encoding. This function
584 however perseveres even in the presence of badly encoded input. */
586 recode_substring_pool (const char *to, const char *from,
587 struct substring text, struct pool *pool)
589 struct substring out;
591 recode_substring_pool__ (to, from, text, '?', pool, &out);
595 /* Converts the string TEXT, which should be encoded in FROM-encoding, to a
596 dynamically allocated string in TO-encoding. On success, returns 0, and the
597 converted null-terminated string, allocated from POOL with pool_malloc(), is
598 stored in *OUT. On failure, returns a positive errno value.
600 The function fails with an error if any part of the input string is not
601 valid in the declared input encoding. */
603 recode_pedantically (const char *to, const char *from,
604 struct substring text, struct pool *pool,
605 struct substring *out)
609 error = recode_substring_pool__ (to, from, text, 0, pool, out);
618 setlocale (LC_ALL, "");
619 bindtextdomain (PACKAGE, relocate(locale_dir));
620 textdomain (PACKAGE);
622 assert (default_encoding == NULL);
623 default_encoding = xstrdup (locale_charset ());
629 get_default_encoding (void)
631 return default_encoding;
635 set_default_encoding (const char *enc)
637 free (default_encoding);
638 default_encoding = xstrdup (enc);
642 /* Attempts to set the encoding from a locale name
643 returns true if successfull.
644 This function does not (should not!) alter the current locale.
647 set_encoding_from_locale (const char *loc)
652 char *tmp = xstrdup (setlocale (LC_CTYPE, NULL));
654 setlocale (LC_CTYPE, "C");
655 c_encoding = xstrdup (locale_charset ());
657 setlocale (LC_CTYPE, loc);
658 loc_encoding = xstrdup (locale_charset ());
661 if ( 0 == strcmp (loc_encoding, c_encoding))
666 setlocale (LC_CTYPE, tmp);
672 free (default_encoding);
673 default_encoding = loc_encoding;
686 struct hmapx_node *node;
687 struct converter *cvtr;
689 HMAPX_FOR_EACH (cvtr, node, &map)
692 free (cvtr->fromcode);
693 if (cvtr->conv != (iconv_t) -1)
694 iconv_close (cvtr->conv);
698 hmapx_destroy (&map);
700 free (default_encoding);
701 default_encoding = NULL;
707 valid_encoding (const char *enc)
709 iconv_t conv = iconv_open (UTF8, enc);
711 if ( conv == (iconv_t) -1)
720 /* Return the system local's idea of the
721 decimal seperator character */
723 get_system_decimal (void)
728 radix_char = nl_langinfo (RADIXCHAR)[0];
732 snprintf (buf, sizeof buf, "%f", 2.5);
741 uc_name (ucs4_t uc, char buffer[16])
743 if (uc >= 0x20 && uc < 0x7f)
744 snprintf (buffer, 16, "`%c'", uc);
746 snprintf (buffer, 16, "U+%04X", uc);
750 /* UTF-8 functions that deal with uppercase/lowercase distinctions. */
752 /* Returns a hash value for the N bytes of UTF-8 encoded data starting at S,
753 with lowercase and uppercase letters treated as equal, starting from
756 utf8_hash_case_bytes (const char *s, size_t n, unsigned int basis)
758 uint8_t folded_buf[2048];
759 size_t folded_len = sizeof folded_buf;
763 folded_s = u8_casefold (CHAR_CAST (const uint8_t *, s), n,
764 NULL, UNINORM_NFKD, folded_buf, &folded_len);
765 if (folded_s != NULL)
767 hash = hash_bytes (folded_s, folded_len, basis);
768 if (folded_s != folded_buf)
775 hash = hash_bytes (s, n, basis);
781 /* Returns a hash value for null-terminated UTF-8 string S, with lowercase and
782 uppercase letters treated as equal, starting from BASIS. */
784 utf8_hash_case_string (const char *s, unsigned int basis)
786 return utf8_hash_case_bytes (s, strlen (s), basis);
789 /* Compares UTF-8 strings A and B case-insensitively.
790 Returns a negative value if A < B, zero if A == B, positive if A > B. */
792 utf8_strcasecmp (const char *a, const char *b)
794 return utf8_strncasecmp (a, strlen (a), b, strlen (b));
797 /* Compares UTF-8 strings A (with length AN) and B (with length BN)
799 Returns a negative value if A < B, zero if A == B, positive if A > B. */
801 utf8_strncasecmp (const char *a, size_t an, const char *b, size_t bn)
805 if (u8_casecmp (CHAR_CAST (const uint8_t *, a), an,
806 CHAR_CAST (const uint8_t *, b), bn,
807 NULL, UNINORM_NFKD, &result))
812 result = memcmp (a, b, MIN (an, bn));
814 result = an < bn ? -1 : an > bn;
821 utf8_casemap (const char *s,
822 uint8_t *(*f) (const uint8_t *, size_t, const char *, uninorm_t,
823 uint8_t *, size_t *))
828 result = CHAR_CAST (char *,
829 f (CHAR_CAST (const uint8_t *, s), strlen (s) + 1,
830 NULL, NULL, NULL, &size));
836 result = xstrdup (s);
842 utf8_to_upper (const char *s)
844 return utf8_casemap (s, u8_toupper);
848 utf8_to_lower (const char *s)
850 return utf8_casemap (s, u8_tolower);
854 get_encoding_info (struct encoding_info *e, const char *name)
856 const struct substring in = SS_LITERAL_INITIALIZER (
858 "!\"#$%&'()*+,-./0123456789:;<=>?@"
859 "ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`"
860 "abcdefghijklmnopqrstuvwxyz{|}~");
862 struct substring out, cr, lf, space;
865 memset (e, 0, sizeof *e);
867 cr = recode_substring_pool (name, "UTF-8", ss_cstr ("\r"), NULL);
868 lf = recode_substring_pool (name, "UTF-8", ss_cstr ("\n"), NULL);
869 space = recode_substring_pool (name, "UTF-8", ss_cstr (" "), NULL);
871 && cr.length <= MAX_UNIT
872 && cr.length == lf.length
873 && cr.length == space.length);
876 fprintf (stderr, "warning: encoding `%s' is not supported.\n", name);
880 ss_alloc_substring (&cr, ss_cstr ("\r"));
881 ss_alloc_substring (&lf, ss_cstr ("\n"));
882 ss_alloc_substring (&space, ss_cstr (" "));
886 memcpy (e->cr, cr.string, e->unit);
887 memcpy (e->lf, lf.string, e->unit);
888 memcpy (e->space, space.string, e->unit);
894 out = recode_substring_pool ("UTF-8", name, in, NULL);
895 e->is_ascii_compatible = ss_equals (in, out);
898 if (!e->is_ascii_compatible && e->unit == 1)
900 out = recode_substring_pool ("UTF-8", name, ss_cstr ("A"), NULL);
901 e->is_ebcdic_compatible = (out.length == 1
902 && (uint8_t) out.string[0] == 0xc1);
906 e->is_ebcdic_compatible = false;
912 is_encoding_ascii_compatible (const char *encoding)
914 struct encoding_info e;
916 get_encoding_info (&e, encoding);
917 return e.is_ascii_compatible;
921 is_encoding_ebcdic_compatible (const char *encoding)
923 struct encoding_info e;
925 get_encoding_info (&e, encoding);
926 return e.is_ebcdic_compatible;
929 /* Returns true if iconv can convert ENCODING to and from UTF-8,
932 is_encoding_supported (const char *encoding)
934 return (create_iconv__ ("UTF-8", encoding)->conv != (iconv_t) -1
935 && create_iconv__ (encoding, "UTF-8")->conv != (iconv_t) -1);
938 /* Returns true if E is the name of a UTF-8 encoding.
940 XXX Possibly we should test not E as a string but its properties via
943 is_encoding_utf8 (const char *e)
945 return ((e[0] == 'u' || e[0] == 'U')
946 && (e[1] == 't' || e[1] == 'T')
947 && (e[2] == 'f' || e[2] == 'F')
948 && ((e[3] == '8' && e[4] == '\0')
949 || (e[3] == '-' && e[4] == '8' && e[5] == '\0')));
952 static struct encoding_category *categories;
953 static int n_categories;
955 static void SENTINEL (0)
956 add_category (size_t *allocated_categories, const char *category, ...)
958 struct encoding_category *c;
959 const char *encodings[16];
963 /* Count encoding arguments. */
964 va_start (args, category);
966 while ((encodings[n] = va_arg (args, const char *)) != NULL)
968 const char *encoding = encodings[n];
969 if (!strcmp (encoding, "Auto") || is_encoding_supported (encoding))
972 assert (n < sizeof encodings / sizeof *encodings);
978 if (n_categories >= *allocated_categories)
979 categories = x2nrealloc (categories,
980 allocated_categories, sizeof *categories);
982 c = &categories[n_categories++];
983 c->category = category;
984 c->encodings = xmalloc (n * sizeof *c->encodings);
985 for (i = 0; i < n; i++)
986 c->encodings[i] = encodings[i];
991 init_encoding_categories (void)
1001 add_category (&alloc, "Unicode", "UTF-8", "UTF-16", "UTF-16BE", "UTF-16LE",
1002 "UTF-32", "UTF-32BE", "UTF-32LE", NULL_SENTINEL);
1003 add_category (&alloc, _("Arabic"), "IBM864", "ISO-8859-6", "Windows-1256",
1005 add_category (&alloc, _("Armenian"), "ARMSCII-8", NULL_SENTINEL);
1006 add_category (&alloc, _("Baltic"), "ISO-8859-13", "ISO-8859-4",
1007 "Windows-1257", NULL_SENTINEL);
1008 add_category (&alloc, _("Celtic"), "ISO-8859-14", NULL_SENTINEL);
1009 add_category (&alloc, _("Central European"), "IBM852", "ISO-8859-2",
1010 "Mac-CentralEurope", "Windows-1250", NULL_SENTINEL);
1011 add_category (&alloc, _("Chinese Simplified"), "GB18030", "GB2312", "GBK",
1012 "HZ-GB-2312", "ISO-2022-CN", NULL_SENTINEL);
1013 add_category (&alloc, _("Chinese Traditional"), "Big5", "Big5-HKSCS",
1014 "EUC-TW", NULL_SENTINEL);
1015 add_category (&alloc, _("Croatian"), "MacCroatian", NULL_SENTINEL);
1016 add_category (&alloc, _("Cyrillic"), "IBM855", "ISO-8859-5", "ISO-IR-111",
1017 "KOI8-R", "MacCyrillic", NULL_SENTINEL);
1018 add_category (&alloc, _("Cyrillic/Russian"), "IBM866", NULL_SENTINEL);
1019 add_category (&alloc, _("Cyrillic/Ukrainian"), "KOI8-U", "MacUkrainian",
1021 add_category (&alloc, _("Georgian"), "GEOSTD8", NULL_SENTINEL);
1022 add_category (&alloc, _("Greek"), "ISO-8859-7", "MacGreek", NULL_SENTINEL);
1023 add_category (&alloc, _("Gujarati"), "MacGujarati", NULL_SENTINEL);
1024 add_category (&alloc, _("Gurmukhi"), "MacGurmukhi", NULL_SENTINEL);
1025 add_category (&alloc, _("Hebrew"), "IBM862", "ISO-8859-8-I", "Windows-1255",
1027 add_category (&alloc, _("Hebrew Visual"), "ISO-8859-8", NULL_SENTINEL);
1028 add_category (&alloc, _("Hindi"), "MacDevangari", NULL_SENTINEL);
1029 add_category (&alloc, _("Icelandic"), "MacIcelandic", NULL_SENTINEL);
1030 add_category (&alloc, _("Japanese"), "EUC-JP", "ISO-2022-JP", "Shift_JIS",
1032 add_category (&alloc, _("Korean"), "EUC-KR", "ISO-2022-KR", "JOHAB", "UHC",
1034 add_category (&alloc, _("Nordic"), "ISO-8859-10", NULL_SENTINEL);
1035 add_category (&alloc, _("Romanian"), "ISO-8859-16", "MacRomanian",
1037 add_category (&alloc, _("South European"), "ISO-8859-3", NULL_SENTINEL);
1038 add_category (&alloc, _("Thai"), "ISO-8859-11", "TIS-620", "Windows-874",
1040 add_category (&alloc, _("Turkish"), "IBM857", "ISO-8859-9", "Windows-1254",
1042 add_category (&alloc, _("Vietnamese"), "TVCN", "VISCII", "VPS",
1043 "Windows-1258", NULL_SENTINEL);
1044 add_category (&alloc, _("Western European"), "ISO-8859-1", "ISO-8859-15",
1045 "Windows-1252", "IBM850", "MacRoman", NULL_SENTINEL);
1048 /* Returns an array of "struct encoding_category" that contains only the
1049 categories and encodings that the system supports. */
1050 struct encoding_category *
1051 get_encoding_categories (void)
1053 init_encoding_categories ();
1057 /* Returns the number of elements in the array returned by
1058 get_encoding_categories(). */
1060 get_n_encoding_categories (void)
1062 init_encoding_categories ();
1063 return n_categories;