1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2006, 2009, 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "libpspp/i18n.h"
32 #include "libpspp/assertion.h"
33 #include "libpspp/compiler.h"
34 #include "libpspp/hmapx.h"
35 #include "libpspp/hash-functions.h"
36 #include "libpspp/pool.h"
37 #include "libpspp/str.h"
38 #include "libpspp/version.h"
40 #include "gl/c-strcase.h"
41 #include "gl/localcharset.h"
42 #include "gl/minmax.h"
43 #include "gl/xalloc.h"
44 #include "gl/relocatable.h"
45 #include "gl/xstrndup.h"
48 #define _(msgid) gettext (msgid)
58 static char *default_encoding;
59 static struct hmapx map;
61 /* A wrapper around iconv_open */
62 static struct converter *
63 create_iconv__ (const char* tocode, const char* fromcode)
66 struct hmapx_node *node;
67 struct converter *converter;
70 hash = hash_string (tocode, hash_string (fromcode, 0));
71 HMAPX_FOR_EACH_WITH_HASH (converter, node, hash, &map)
72 if (!strcmp (tocode, converter->tocode)
73 && !strcmp (fromcode, converter->fromcode))
76 converter = xmalloc (sizeof *converter);
77 converter->tocode = xstrdup (tocode);
78 converter->fromcode = xstrdup (fromcode);
79 converter->conv = iconv_open (tocode, fromcode);
80 converter->error = converter->conv == (iconv_t) -1 ? errno : 0;
81 hmapx_insert (&map, converter, hash);
87 create_iconv (const char* tocode, const char* fromcode)
89 struct converter *converter;
91 converter = create_iconv__ (tocode, fromcode);
93 /* I don't think it's safe to translate this string or to use messaging
94 as the converters have not yet been set up */
95 if (converter->error && strcmp (tocode, fromcode))
99 "cannot create a converter for `%s' to `%s': %s\n",
100 fromcode, tocode, strerror (converter->error));
101 converter->error = 0;
104 return converter->conv;
107 /* Converts the single byte C from encoding FROM to TO, returning the first
110 This function probably shouldn't be used at all, but some code still does
113 recode_byte (const char *to, const char *from, char c)
116 char *s = recode_string (to, from, &c, 1);
122 /* Similar to recode_string_pool, but allocates the returned value on the heap
123 instead of in a pool. It is the caller's responsibility to free the
126 recode_string (const char *to, const char *from,
127 const char *text, int length)
129 return recode_string_pool (to, from, text, length, NULL);
132 /* Returns the length, in bytes, of the string that a similar recode_string()
133 call would return. */
135 recode_string_len (const char *to, const char *from,
136 const char *text, int length)
138 char *s = recode_string (to, from, text, length);
139 size_t len = strlen (s);
144 /* Uses CONV to convert the INBYTES starting at IP into the OUTBYTES starting
145 at OP, and appends a null terminator to the output.
147 Returns the output length if successful, -1 if the output buffer is too
150 try_recode (iconv_t conv, char fallbackchar,
151 const char *in, size_t inbytes,
152 char *out_, size_t outbytes)
157 /* Put the converter into the initial shift state, in case there was any
158 state information left over from its last usage. */
159 iconv (conv, NULL, 0, NULL, 0);
161 /* Do two rounds of iconv() calls:
163 - The first round does the bulk of the conversion using the
164 caller-supplied input data..
166 - The second round flushes any leftover output. This has a real effect
167 with input encodings that use combining diacritics, e.g. without the
168 second round the last character tends to gets dropped when converting
169 from windows-1258 to other encodings.
171 for (i = 0; i < 2; i++)
173 ICONV_CONST char **inp = i ? NULL : (ICONV_CONST char **) ∈
174 size_t *inbytesp = i ? NULL : &inbytes;
176 while (iconv (conv, inp, inbytesp, &out, &outbytes) == -1)
184 *out++ = fallbackchar;
193 *out++ = fallbackchar;
206 /* should never happen */
207 fprintf (stderr, "Character conversion error: %s\n",
221 /* Converts the string TEXT, which should be encoded in FROM-encoding, to a
222 dynamically allocated string in TO-encoding. Any characters which cannot be
223 converted will be represented by '?'.
225 LENGTH should be the length of the string or -1, if null terminated.
227 The returned string will be allocated on POOL.
229 This function's behaviour differs from that of g_convert_with_fallback
230 provided by GLib. The GLib function will fail (returns NULL) if any part of
231 the input string is not valid in the declared input encoding. This function
232 however perseveres even in the presence of badly encoded input. */
234 recode_string_pool (const char *to, const char *from,
235 const char *text, int length, struct pool *pool)
237 struct substring out;
243 length = strlen (text);
245 out = recode_substring_pool (to, from, ss_buffer (text, length), pool);
249 /* Returns the name of the encoding that should be used for file names.
251 This is meant to be the same encoding used by g_filename_from_uri() and
252 g_filename_to_uri() in GLib. */
254 filename_encoding (void)
256 #if defined _WIN32 || defined __WIN32__
259 return locale_charset ();
264 xconcat2 (const char *a, size_t a_len,
265 const char *b, size_t b_len)
267 char *s = xmalloc (a_len + b_len + 1);
268 memcpy (s, a, a_len);
269 memcpy (s + a_len, b, b_len);
270 s[a_len + b_len] = '\0';
274 /* Conceptually, this function concatenates HEAD_LEN-byte string HEAD and
275 TAIL_LEN-byte string TAIL, both encoded in UTF-8, then converts them to
276 ENCODING. If the re-encoded result is no more than MAX_LEN bytes long, then
277 it returns HEAD_LEN. Otherwise, it drops one character[*] from the end of
278 HEAD and tries again, repeating as necessary until the concatenated result
279 fits or until HEAD_LEN reaches 0.
281 [*] Actually this function drops grapheme clusters instead of characters, so
282 that, e.g. a Unicode character followed by a combining accent character
283 is either completely included or completely excluded from HEAD_LEN. See
284 UAX #29 at http://unicode.org/reports/tr29/ for more information on
287 A null ENCODING is treated as UTF-8.
289 Sometimes this function has to actually construct the concatenated string to
290 measure its length. When this happens, it sets *RESULTP to that
291 null-terminated string, allocated with malloc(), for the caller to use if it
292 needs it. Otherwise, it sets *RESULTP to NULL.
294 Simple examples for encoding="UTF-8", max_len=6:
296 head="abc", tail="xyz" => 3
297 head="abcd", tail="xyz" => 3 ("d" dropped).
298 head="abc", tail="uvwxyz" => 0 ("abc" dropped).
299 head="abc", tail="tuvwxyz" => 0 ("abc" dropped).
301 Examples for encoding="ISO-8859-1", max_len=6:
303 head="éèä", tail="xyz" => 6
304 (each letter in head is only 1 byte in ISO-8859-1 even though they
305 each take 2 bytes in UTF-8 encoding)
308 utf8_encoding_concat__ (const char *head, size_t head_len,
309 const char *tail, size_t tail_len,
310 const char *encoding, size_t max_len,
316 else if (encoding == NULL || !c_strcasecmp (encoding, "UTF-8"))
318 if (head_len + tail_len <= max_len)
320 else if (tail_len >= max_len)
330 for (ofs = u8_mbtouc (&prev, CHAR_CAST (const uint8_t *, head),
332 ofs <= max_len - tail_len;
337 mblen = u8_mbtouc (&next,
338 CHAR_CAST (const uint8_t *, head + ofs),
340 if (uc_is_grapheme_break (prev, next))
353 result = (tail_len > 0
354 ? xconcat2 (head, head_len, tail, tail_len)
355 : CONST_CAST (char *, head));
356 if (recode_string_len (encoding, "UTF-8", result,
357 head_len + tail_len) <= max_len)
359 *resultp = result != head ? result : NULL;
364 bool correct_result = false;
371 for (ofs = u8_mbtouc (&prev, CHAR_CAST (const uint8_t *, head),
378 mblen = u8_mbtouc (&next,
379 CHAR_CAST (const uint8_t *, head + ofs),
381 if (uc_is_grapheme_break (prev, next))
385 memcpy (result, head, ofs);
386 memcpy (result + ofs, tail, tail_len);
387 result[ofs + tail_len] = '\0';
390 if (recode_string_len (encoding, "UTF-8", result,
391 ofs + tail_len) <= max_len)
393 correct_result = true;
397 correct_result = false;
416 /* Concatenates a prefix of HEAD with all of TAIL and returns the result as a
417 null-terminated string owned by the caller. HEAD, TAIL, and the returned
418 string are all encoded in UTF-8. As many characters[*] from the beginning
419 of HEAD are included as will fit within MAX_LEN bytes supposing that the
420 resulting string were to be re-encoded in ENCODING. All of TAIL is always
421 included, even if TAIL by itself is longer than MAX_LEN in ENCODING.
423 [*] Actually this function drops grapheme clusters instead of characters, so
424 that, e.g. a Unicode character followed by a combining accent character
425 is either completely included or completely excluded from the returned
426 string. See UAX #29 at http://unicode.org/reports/tr29/ for more
427 information on grapheme clusters.
429 A null ENCODING is treated as UTF-8.
431 Simple examples for encoding="UTF-8", max_len=6:
433 head="abc", tail="xyz" => "abcxyz"
434 head="abcd", tail="xyz" => "abcxyz"
435 head="abc", tail="uvwxyz" => "uvwxyz"
436 head="abc", tail="tuvwxyz" => "tuvwxyz"
438 Examples for encoding="ISO-8859-1", max_len=6:
440 head="éèä", tail="xyz" => "éèäxyz"
441 (each letter in HEAD is only 1 byte in ISO-8859-1 even though they
442 each take 2 bytes in UTF-8 encoding)
445 utf8_encoding_concat (const char *head, const char *tail,
446 const char *encoding, size_t max_len)
448 size_t tail_len = strlen (tail);
452 prefix_len = utf8_encoding_concat__ (head, strlen (head), tail, tail_len,
453 encoding, max_len, &result);
454 return (result != NULL
456 : xconcat2 (head, prefix_len, tail, tail_len));
459 /* Returns the length, in bytes, of the string that would be returned by
460 utf8_encoding_concat() if passed the same arguments, but the implementation
461 is often more efficient. */
463 utf8_encoding_concat_len (const char *head, const char *tail,
464 const char *encoding, size_t max_len)
466 size_t tail_len = strlen (tail);
470 prefix_len = utf8_encoding_concat__ (head, strlen (head), tail, tail_len,
471 encoding, max_len, &result);
473 return prefix_len + tail_len;
476 /* Returns an allocated, null-terminated string, owned by the caller,
477 containing as many characters[*] from the beginning of S that would fit
478 within MAX_LEN bytes if the returned string were to be re-encoded in
479 ENCODING. Both S and the returned string are encoded in UTF-8.
481 [*] Actually this function drops grapheme clusters instead of characters, so
482 that, e.g. a Unicode character followed by a combining accent character
483 is either completely included or completely excluded from the returned
484 string. See UAX #29 at http://unicode.org/reports/tr29/ for more
485 information on grapheme clusters.
487 A null ENCODING is treated as UTF-8.
490 utf8_encoding_trunc (const char *s, const char *encoding, size_t max_len)
492 return utf8_encoding_concat (s, "", encoding, max_len);
495 /* Returns the length, in bytes, of the string that would be returned by
496 utf8_encoding_trunc() if passed the same arguments, but the implementation
497 is often more efficient. */
499 utf8_encoding_trunc_len (const char *s, const char *encoding, size_t max_len)
501 return utf8_encoding_concat_len (s, "", encoding, max_len);
504 /* Returns FILENAME converted from UTF-8 to the filename encoding.
505 On Windows the filename encoding is UTF-8; elsewhere it is based on the
508 utf8_to_filename (const char *filename)
510 return recode_string (filename_encoding (), "UTF-8", filename, -1);
513 /* Returns FILENAME converted from the filename encoding to UTF-8.
514 On Windows the filename encoding is UTF-8; elsewhere it is based on the
517 filename_to_utf8 (const char *filename)
519 return recode_string ("UTF-8", filename_encoding (), filename, -1);
523 recode_substring_pool__ (const char *to, const char *from,
524 struct substring text, char fallbackchar,
525 struct pool *pool, struct substring *out)
531 to = default_encoding;
534 from = default_encoding;
536 conv = create_iconv (to, from);
538 if ( (iconv_t) -1 == conv )
542 out->string = pool_malloc (pool, text.length + 1);
543 out->length = text.length;
544 memcpy (out->string, text.string, text.length);
545 out->string[out->length] = '\0';
552 for (bufsize = text.length + 1; bufsize > text.length; bufsize *= 2)
554 char *output = pool_malloc (pool, bufsize);
557 retval = try_recode (conv, fallbackchar, text.string, text.length,
561 *out = ss_buffer (output, retval);
564 pool_free (pool, output);
566 if (retval != -E2BIG)
573 /* Converts the string TEXT, which should be encoded in FROM-encoding, to a
574 dynamically allocated string in TO-encoding. Any characters which cannot be
575 converted will be represented by '?'.
577 The returned string will be null-terminated and allocated on POOL with
580 This function's behaviour differs from that of g_convert_with_fallback
581 provided by GLib. The GLib function will fail (returns NULL) if any part of
582 the input string is not valid in the declared input encoding. This function
583 however perseveres even in the presence of badly encoded input. */
585 recode_substring_pool (const char *to, const char *from,
586 struct substring text, struct pool *pool)
588 struct substring out;
590 recode_substring_pool__ (to, from, text, '?', pool, &out);
594 /* Converts the string TEXT, which should be encoded in FROM-encoding, to a
595 dynamically allocated string in TO-encoding. On success, returns 0, and the
596 converted null-terminated string, allocated from POOL with pool_malloc(), is
597 stored in *OUT. On failure, returns a positive errno value.
599 The function fails with an error if any part of the input string is not
600 valid in the declared input encoding. */
602 recode_pedantically (const char *to, const char *from,
603 struct substring text, struct pool *pool,
604 struct substring *out)
608 error = recode_substring_pool__ (to, from, text, 0, pool, out);
617 setlocale (LC_ALL, "");
618 bindtextdomain (PACKAGE, relocate(locale_dir));
619 textdomain (PACKAGE);
621 assert (default_encoding == NULL);
622 default_encoding = xstrdup (locale_charset ());
628 get_default_encoding (void)
630 return default_encoding;
634 set_default_encoding (const char *enc)
636 free (default_encoding);
637 default_encoding = xstrdup (enc);
641 /* Attempts to set the encoding from a locale name
642 returns true if successfull.
643 This function does not (should not!) alter the current locale.
646 set_encoding_from_locale (const char *loc)
651 char *tmp = xstrdup (setlocale (LC_CTYPE, NULL));
653 setlocale (LC_CTYPE, "C");
654 c_encoding = xstrdup (locale_charset ());
656 setlocale (LC_CTYPE, loc);
657 loc_encoding = xstrdup (locale_charset ());
660 if ( 0 == strcmp (loc_encoding, c_encoding))
665 setlocale (LC_CTYPE, tmp);
671 free (default_encoding);
672 default_encoding = loc_encoding;
685 struct hmapx_node *node;
686 struct converter *cvtr;
688 HMAPX_FOR_EACH (cvtr, node, &map)
691 free (cvtr->fromcode);
692 if (cvtr->conv != (iconv_t) -1)
693 iconv_close (cvtr->conv);
697 hmapx_destroy (&map);
699 free (default_encoding);
700 default_encoding = NULL;
706 valid_encoding (const char *enc)
708 iconv_t conv = iconv_open (UTF8, enc);
710 if ( conv == (iconv_t) -1)
719 /* Return the system local's idea of the
720 decimal seperator character */
722 get_system_decimal (void)
727 radix_char = nl_langinfo (RADIXCHAR)[0];
731 snprintf (buf, sizeof buf, "%f", 2.5);
740 uc_name (ucs4_t uc, char buffer[16])
742 if (uc >= 0x20 && uc < 0x7f)
743 snprintf (buffer, 16, "`%c'", uc);
745 snprintf (buffer, 16, "U+%04X", uc);
749 /* UTF-8 functions that deal with uppercase/lowercase distinctions. */
751 /* Returns a hash value for the N bytes of UTF-8 encoded data starting at S,
752 with lowercase and uppercase letters treated as equal, starting from
755 utf8_hash_case_bytes (const char *s, size_t n, unsigned int basis)
757 uint8_t folded_buf[2048];
758 size_t folded_len = sizeof folded_buf;
762 folded_s = u8_casefold (CHAR_CAST (const uint8_t *, s), n,
763 NULL, UNINORM_NFKD, folded_buf, &folded_len);
764 if (folded_s != NULL)
766 hash = hash_bytes (folded_s, folded_len, basis);
767 if (folded_s != folded_buf)
774 hash = hash_bytes (s, n, basis);
780 /* Returns a hash value for null-terminated UTF-8 string S, with lowercase and
781 uppercase letters treated as equal, starting from BASIS. */
783 utf8_hash_case_string (const char *s, unsigned int basis)
785 return utf8_hash_case_bytes (s, strlen (s), basis);
788 /* Compares UTF-8 strings A and B case-insensitively.
789 Returns a negative value if A < B, zero if A == B, positive if A > B. */
791 utf8_strcasecmp (const char *a, const char *b)
793 return utf8_strncasecmp (a, strlen (a), b, strlen (b));
796 /* Compares UTF-8 strings A (with length AN) and B (with length BN)
798 Returns a negative value if A < B, zero if A == B, positive if A > B. */
800 utf8_strncasecmp (const char *a, size_t an, const char *b, size_t bn)
804 if (u8_casecmp (CHAR_CAST (const uint8_t *, a), an,
805 CHAR_CAST (const uint8_t *, b), bn,
806 NULL, UNINORM_NFKD, &result))
811 result = memcmp (a, b, MIN (an, bn));
813 result = an < bn ? -1 : an > bn;
820 utf8_casemap (const char *s,
821 uint8_t *(*f) (const uint8_t *, size_t, const char *, uninorm_t,
822 uint8_t *, size_t *))
827 result = CHAR_CAST (char *,
828 f (CHAR_CAST (const uint8_t *, s), strlen (s) + 1,
829 NULL, NULL, NULL, &size));
835 result = xstrdup (s);
841 utf8_to_upper (const char *s)
843 return utf8_casemap (s, u8_toupper);
847 utf8_to_lower (const char *s)
849 return utf8_casemap (s, u8_tolower);
853 get_encoding_info (struct encoding_info *e, const char *name)
855 const struct substring in = SS_LITERAL_INITIALIZER (
857 "!\"#$%&'()*+,-./0123456789:;<=>?@"
858 "ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`"
859 "abcdefghijklmnopqrstuvwxyz{|}~");
861 struct substring out, cr, lf, space;
864 memset (e, 0, sizeof *e);
866 cr = recode_substring_pool (name, "UTF-8", ss_cstr ("\r"), NULL);
867 lf = recode_substring_pool (name, "UTF-8", ss_cstr ("\n"), NULL);
868 space = recode_substring_pool (name, "UTF-8", ss_cstr (" "), NULL);
870 && cr.length <= MAX_UNIT
871 && cr.length == lf.length
872 && cr.length == space.length);
875 fprintf (stderr, "warning: encoding `%s' is not supported.\n", name);
879 ss_alloc_substring (&cr, ss_cstr ("\r"));
880 ss_alloc_substring (&lf, ss_cstr ("\n"));
881 ss_alloc_substring (&space, ss_cstr (" "));
885 memcpy (e->cr, cr.string, e->unit);
886 memcpy (e->lf, lf.string, e->unit);
887 memcpy (e->space, space.string, e->unit);
893 out = recode_substring_pool ("UTF-8", name, in, NULL);
894 e->is_ascii_compatible = ss_equals (in, out);
897 if (!e->is_ascii_compatible && e->unit == 1)
899 out = recode_substring_pool ("UTF-8", name, ss_cstr ("A"), NULL);
900 e->is_ebcdic_compatible = (out.length == 1
901 && (uint8_t) out.string[0] == 0xc1);
905 e->is_ebcdic_compatible = false;
911 is_encoding_ascii_compatible (const char *encoding)
913 struct encoding_info e;
915 get_encoding_info (&e, encoding);
916 return e.is_ascii_compatible;
920 is_encoding_ebcdic_compatible (const char *encoding)
922 struct encoding_info e;
924 get_encoding_info (&e, encoding);
925 return e.is_ebcdic_compatible;
928 /* Returns true if iconv can convert ENCODING to and from UTF-8,
931 is_encoding_supported (const char *encoding)
933 return (create_iconv__ ("UTF-8", encoding)->conv != (iconv_t) -1
934 && create_iconv__ (encoding, "UTF-8")->conv != (iconv_t) -1);
937 /* Returns true if E is the name of a UTF-8 encoding.
939 XXX Possibly we should test not E as a string but its properties via
942 is_encoding_utf8 (const char *e)
944 return ((e[0] == 'u' || e[0] == 'U')
945 && (e[1] == 't' || e[1] == 'T')
946 && (e[2] == 'f' || e[2] == 'F')
947 && ((e[3] == '8' && e[4] == '\0')
948 || (e[3] == '-' && e[4] == '8' && e[5] == '\0')));
951 static struct encoding_category *categories;
952 static int n_categories;
954 static void SENTINEL (0)
955 add_category (size_t *allocated_categories, const char *category, ...)
957 struct encoding_category *c;
958 const char *encodings[16];
962 /* Count encoding arguments. */
963 va_start (args, category);
965 while ((encodings[n] = va_arg (args, const char *)) != NULL)
967 const char *encoding = encodings[n];
968 if (!strcmp (encoding, "Auto") || is_encoding_supported (encoding))
971 assert (n < sizeof encodings / sizeof *encodings);
977 if (n_categories >= *allocated_categories)
978 categories = x2nrealloc (categories,
979 allocated_categories, sizeof *categories);
981 c = &categories[n_categories++];
982 c->category = category;
983 c->encodings = xmalloc (n * sizeof *c->encodings);
984 for (i = 0; i < n; i++)
985 c->encodings[i] = encodings[i];
990 init_encoding_categories (void)
1000 add_category (&alloc, "Unicode", "UTF-8", "UTF-16", "UTF-16BE", "UTF-16LE",
1001 "UTF-32", "UTF-32BE", "UTF-32LE", NULL_SENTINEL);
1002 add_category (&alloc, _("Arabic"), "IBM864", "ISO-8859-6", "Windows-1256",
1004 add_category (&alloc, _("Armenian"), "ARMSCII-8", NULL_SENTINEL);
1005 add_category (&alloc, _("Baltic"), "ISO-8859-13", "ISO-8859-4",
1006 "Windows-1257", NULL_SENTINEL);
1007 add_category (&alloc, _("Celtic"), "ISO-8859-14", NULL_SENTINEL);
1008 add_category (&alloc, _("Central European"), "IBM852", "ISO-8859-2",
1009 "Mac-CentralEurope", "Windows-1250", NULL_SENTINEL);
1010 add_category (&alloc, _("Chinese Simplified"), "GB18030", "GB2312", "GBK",
1011 "HZ-GB-2312", "ISO-2022-CN", NULL_SENTINEL);
1012 add_category (&alloc, _("Chinese Traditional"), "Big5", "Big5-HKSCS",
1013 "EUC-TW", NULL_SENTINEL);
1014 add_category (&alloc, _("Croatian"), "MacCroatian", NULL_SENTINEL);
1015 add_category (&alloc, _("Cyrillic"), "IBM855", "ISO-8859-5", "ISO-IR-111",
1016 "KOI8-R", "MacCyrillic", NULL_SENTINEL);
1017 add_category (&alloc, _("Cyrillic/Russian"), "IBM866", NULL_SENTINEL);
1018 add_category (&alloc, _("Cyrillic/Ukrainian"), "KOI8-U", "MacUkrainian",
1020 add_category (&alloc, _("Georgian"), "GEOSTD8", NULL_SENTINEL);
1021 add_category (&alloc, _("Greek"), "ISO-8859-7", "MacGreek", NULL_SENTINEL);
1022 add_category (&alloc, _("Gujarati"), "MacGujarati", NULL_SENTINEL);
1023 add_category (&alloc, _("Gurmukhi"), "MacGurmukhi", NULL_SENTINEL);
1024 add_category (&alloc, _("Hebrew"), "IBM862", "ISO-8859-8-I", "Windows-1255",
1026 add_category (&alloc, _("Hebrew Visual"), "ISO-8859-8", NULL_SENTINEL);
1027 add_category (&alloc, _("Hindi"), "MacDevangari", NULL_SENTINEL);
1028 add_category (&alloc, _("Icelandic"), "MacIcelandic", NULL_SENTINEL);
1029 add_category (&alloc, _("Japanese"), "EUC-JP", "ISO-2022-JP", "Shift_JIS",
1031 add_category (&alloc, _("Korean"), "EUC-KR", "ISO-2022-KR", "JOHAB", "UHC",
1033 add_category (&alloc, _("Nordic"), "ISO-8859-10", NULL_SENTINEL);
1034 add_category (&alloc, _("Romanian"), "ISO-8859-16", "MacRomanian",
1036 add_category (&alloc, _("South European"), "ISO-8859-3", NULL_SENTINEL);
1037 add_category (&alloc, _("Thai"), "ISO-8859-11", "TIS-620", "Windows-874",
1039 add_category (&alloc, _("Turkish"), "IBM857", "ISO-8859-9", "Windows-1254",
1041 add_category (&alloc, _("Vietnamese"), "TVCN", "VISCII", "VPS",
1042 "Windows-1258", NULL_SENTINEL);
1043 add_category (&alloc, _("Western European"), "ISO-8859-1", "ISO-8859-15",
1044 "Windows-1252", "IBM850", "MacRoman", NULL_SENTINEL);
1047 /* Returns an array of "struct encoding_category" that contains only the
1048 categories and encodings that the system supports. */
1049 struct encoding_category *
1050 get_encoding_categories (void)
1052 init_encoding_categories ();
1056 /* Returns the number of elements in the array returned by
1057 get_encoding_categories(). */
1059 get_n_encoding_categories (void)
1061 init_encoding_categories ();
1062 return n_categories;