1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2006, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "libpspp/i18n.h"
32 #include "libpspp/assertion.h"
33 #include "libpspp/compiler.h"
34 #include "libpspp/hmapx.h"
35 #include "libpspp/hash-functions.h"
36 #include "libpspp/pool.h"
37 #include "libpspp/str.h"
38 #include "libpspp/version.h"
40 #include "gl/c-strcase.h"
41 #include "gl/localcharset.h"
42 #include "gl/minmax.h"
43 #include "gl/xalloc.h"
44 #include "gl/relocatable.h"
45 #include "gl/xstrndup.h"
48 #define _(msgid) gettext (msgid)
58 static char *default_encoding;
59 static struct hmapx map;
61 /* A wrapper around iconv_open */
62 static struct converter *
63 create_iconv (const char* tocode, const char* fromcode)
66 struct hmapx_node *node;
67 struct converter *converter;
70 hash = hash_string (tocode, hash_string (fromcode, 0));
71 HMAPX_FOR_EACH_WITH_HASH (converter, node, hash, &map)
76 if (!strcmp (tocode, converter->tocode)
77 && !strcmp (fromcode, converter->fromcode))
81 converter = xmalloc (sizeof *converter);
82 converter->tocode = xstrdup (tocode);
83 converter->fromcode = xstrdup (fromcode);
84 converter->conv = iconv_open (tocode, fromcode);
85 int error = converter->conv == (iconv_t) ~0 ? errno : 0;
86 /* I don't think it's safe to translate this string or to use messaging
87 as the converters have not yet been set up */
88 if (error && strcmp (tocode, fromcode))
92 "cannot create a converter for `%s' to `%s': %s\n",
93 fromcode, tocode, strerror (error));
95 free (converter->tocode);
96 free (converter->fromcode);
99 hmapx_insert (&map, NULL, hash);
103 /* Find out how many bytes there are in a null char in the target
105 iconv_t bconv = iconv_open (tocode, "ASCII");
106 if (bconv != (iconv_t) -1)
108 ICONV_CONST char *nullstr = strdup ("");
109 ICONV_CONST char *outbuf = strdup ("XXXXXXXX");
110 ICONV_CONST char *snullstr = nullstr;
111 ICONV_CONST char *soutbuf = outbuf;
114 const size_t bytes = 8;
115 size_t outbytes = bytes;
116 if (-1 != iconv (bconv, &nullstr, &inbytes, &outbuf, &outbytes))
117 converter->null_char_width = bytes - outbytes;
123 hmapx_insert (&map, converter, hash);
129 /* Converts the single byte C from encoding FROM to TO, returning the first
132 This function probably shouldn't be used at all, but some code still does
135 recode_byte (const char *to, const char *from, char c)
138 char *s = recode_string (to, from, &c, 1);
144 /* Similar to recode_string_pool, but allocates the returned value on the heap
145 instead of in a pool. It is the caller's responsibility to free the
148 recode_string (const char *to, const char *from,
149 const char *text, int length)
151 return recode_string_pool (to, from, text, length, NULL);
154 /* Returns the length, in bytes, of the string that a similar recode_string()
155 call would return. */
157 recode_string_len (const char *to, const char *from,
158 const char *text, int length)
160 char *s = recode_string (to, from, text, length);
161 size_t len = strlen (s);
166 /* Uses CONV to convert the INBYTES starting at IP into the OUTBYTES starting
167 at OP, and appends a null terminator to the output.
169 Returns the output length if successful, -1 if the output buffer is too
172 try_recode (struct converter *cvtr, char fallbackchar,
173 const char *in, size_t inbytes,
174 char *out_, size_t outbytes)
179 int null_bytes = cvtr->null_char_width;
181 /* Put the converter into the initial shift state, in case there was any
182 state information left over from its last usage. */
183 iconv (cvtr->conv, NULL, 0, NULL, 0);
185 /* Do two rounds of iconv() calls:
187 - The first round does the bulk of the conversion using the
188 caller-supplied input data..
190 - The second round flushes any leftover output. This has a real effect
191 with input encodings that use combining diacritics, e.g. without the
192 second round the last character tends to gets dropped when converting
193 from windows-1258 to other encodings.
195 for (i = 0; i < 2; i++)
197 ICONV_CONST char **inp = i ? NULL : (ICONV_CONST char **) ∈
198 size_t *inbytesp = i ? NULL : &inbytes;
200 while (iconv (cvtr->conv, inp, inbytesp, &out, &outbytes) == -1)
204 if (outbytes < null_bytes + 1)
208 *out++ = fallbackchar;
209 for (j = 0 ; j < null_bytes ; ++j)
211 return out - 1 - out_;
218 *out++ = fallbackchar;
231 /* should never happen */
232 fprintf (stderr, "Character conversion error: %s\n",
239 if (outbytes <= null_bytes - 1)
242 for (i = 0 ; i < null_bytes ; ++i)
245 return out - 1 - out_;
248 /* Converts the string TEXT, which should be encoded in FROM-encoding, to a
249 dynamically allocated string in TO-encoding. Any characters which cannot be
250 converted will be represented by '?'.
252 LENGTH should be the length of the string or -1, if null terminated.
254 The returned string will be allocated on POOL.
256 This function's behaviour differs from that of g_convert_with_fallback
257 provided by GLib. The GLib function will fail (returns NULL) if any part of
258 the input string is not valid in the declared input encoding. This function
259 however perseveres even in the presence of badly encoded input. */
261 recode_string_pool (const char *to, const char *from,
262 const char *text, int length, struct pool *pool)
264 struct substring out;
270 length = strlen (text);
272 out = recode_substring_pool (to, from, ss_buffer (text, length), pool);
276 /* Returns the name of the encoding that should be used for file names.
278 This is meant to be the same encoding used by g_filename_from_uri() and
279 g_filename_to_uri() in GLib. */
281 filename_encoding (void)
283 #if defined _WIN32 || defined __WIN32__
286 return locale_charset ();
291 xconcat2 (const char *a, size_t a_len,
292 const char *b, size_t b_len)
294 char *s = xmalloc (a_len + b_len + 1);
295 memcpy (s, a, a_len);
296 memcpy (s + a_len, b, b_len);
297 s[a_len + b_len] = '\0';
301 /* Conceptually, this function concatenates HEAD_LEN-byte string HEAD and
302 TAIL_LEN-byte string TAIL, both encoded in UTF-8, then converts them to
303 ENCODING. If the re-encoded result is no more than MAX_LEN bytes long, then
304 it returns HEAD_LEN. Otherwise, it drops one character[*] from the end of
305 HEAD and tries again, repeating as necessary until the concatenated result
306 fits or until HEAD_LEN reaches 0.
308 [*] Actually this function drops grapheme clusters instead of characters, so
309 that, e.g. a Unicode character followed by a combining accent character
310 is either completely included or completely excluded from HEAD_LEN. See
311 UAX #29 at http://unicode.org/reports/tr29/ for more information on
314 A null ENCODING is treated as UTF-8.
316 Sometimes this function has to actually construct the concatenated string to
317 measure its length. When this happens, it sets *RESULTP to that
318 null-terminated string, allocated with malloc(), for the caller to use if it
319 needs it. Otherwise, it sets *RESULTP to NULL.
321 Simple examples for encoding="UTF-8", max_len=6:
323 head="abc", tail="xyz" => 3
324 head="abcd", tail="xyz" => 3 ("d" dropped).
325 head="abc", tail="uvwxyz" => 0 ("abc" dropped).
326 head="abc", tail="tuvwxyz" => 0 ("abc" dropped).
328 Examples for encoding="ISO-8859-1", max_len=6:
330 head="éèä", tail="xyz" => 6
331 (each letter in head is only 1 byte in ISO-8859-1 even though they
332 each take 2 bytes in UTF-8 encoding)
335 utf8_encoding_concat__ (const char *head, size_t head_len,
336 const char *tail, size_t tail_len,
337 const char *encoding, size_t max_len,
343 else if (encoding == NULL || !c_strcasecmp (encoding, "UTF-8"))
345 if (head_len + tail_len <= max_len)
347 else if (tail_len >= max_len)
357 for (ofs = u8_mbtouc (&prev, CHAR_CAST (const uint8_t *, head),
359 ofs <= max_len - tail_len;
364 mblen = u8_mbtouc (&next,
365 CHAR_CAST (const uint8_t *, head + ofs),
367 if (uc_is_grapheme_break (prev, next))
380 result = (tail_len > 0
381 ? xconcat2 (head, head_len, tail, tail_len)
382 : CONST_CAST (char *, head));
383 if (recode_string_len (encoding, "UTF-8", result,
384 head_len + tail_len) <= max_len)
386 *resultp = result != head ? result : NULL;
391 bool correct_result = false;
398 for (ofs = u8_mbtouc (&prev, CHAR_CAST (const uint8_t *, head),
405 mblen = u8_mbtouc (&next,
406 CHAR_CAST (const uint8_t *, head + ofs),
408 if (uc_is_grapheme_break (prev, next))
412 memcpy (result, head, ofs);
413 memcpy (result + ofs, tail, tail_len);
414 result[ofs + tail_len] = '\0';
417 if (recode_string_len (encoding, "UTF-8", result,
418 ofs + tail_len) <= max_len)
420 correct_result = true;
424 correct_result = false;
443 /* Concatenates a prefix of HEAD with all of TAIL and returns the result as a
444 null-terminated string owned by the caller. HEAD, TAIL, and the returned
445 string are all encoded in UTF-8. As many characters[*] from the beginning
446 of HEAD are included as will fit within MAX_LEN bytes supposing that the
447 resulting string were to be re-encoded in ENCODING. All of TAIL is always
448 included, even if TAIL by itself is longer than MAX_LEN in ENCODING.
450 [*] Actually this function drops grapheme clusters instead of characters, so
451 that, e.g. a Unicode character followed by a combining accent character
452 is either completely included or completely excluded from the returned
453 string. See UAX #29 at http://unicode.org/reports/tr29/ for more
454 information on grapheme clusters.
456 A null ENCODING is treated as UTF-8.
458 Simple examples for encoding="UTF-8", max_len=6:
460 head="abc", tail="xyz" => "abcxyz"
461 head="abcd", tail="xyz" => "abcxyz"
462 head="abc", tail="uvwxyz" => "uvwxyz"
463 head="abc", tail="tuvwxyz" => "tuvwxyz"
465 Examples for encoding="ISO-8859-1", max_len=6:
467 head="éèä", tail="xyz" => "éèäxyz"
468 (each letter in HEAD is only 1 byte in ISO-8859-1 even though they
469 each take 2 bytes in UTF-8 encoding)
472 utf8_encoding_concat (const char *head, const char *tail,
473 const char *encoding, size_t max_len)
475 size_t tail_len = strlen (tail);
479 prefix_len = utf8_encoding_concat__ (head, strlen (head), tail, tail_len,
480 encoding, max_len, &result);
481 return (result != NULL
483 : xconcat2 (head, prefix_len, tail, tail_len));
486 /* Returns the length, in bytes, of the string that would be returned by
487 utf8_encoding_concat() if passed the same arguments, but the implementation
488 is often more efficient. */
490 utf8_encoding_concat_len (const char *head, const char *tail,
491 const char *encoding, size_t max_len)
493 size_t tail_len = strlen (tail);
497 prefix_len = utf8_encoding_concat__ (head, strlen (head), tail, tail_len,
498 encoding, max_len, &result);
500 return prefix_len + tail_len;
503 /* Returns an allocated, null-terminated string, owned by the caller,
504 containing as many characters[*] from the beginning of S that would fit
505 within MAX_LEN bytes if the returned string were to be re-encoded in
506 ENCODING. Both S and the returned string are encoded in UTF-8.
508 [*] Actually this function drops grapheme clusters instead of characters, so
509 that, e.g. a Unicode character followed by a combining accent character
510 is either completely included or completely excluded from the returned
511 string. See UAX #29 at http://unicode.org/reports/tr29/ for more
512 information on grapheme clusters.
514 A null ENCODING is treated as UTF-8.
517 utf8_encoding_trunc (const char *s, const char *encoding, size_t max_len)
519 return utf8_encoding_concat (s, "", encoding, max_len);
522 /* Returns the length, in bytes, of the string that would be returned by
523 utf8_encoding_trunc() if passed the same arguments, but the implementation
524 is often more efficient. */
526 utf8_encoding_trunc_len (const char *s, const char *encoding, size_t max_len)
528 return utf8_encoding_concat_len (s, "", encoding, max_len);
531 /* Returns FILENAME converted from UTF-8 to the filename encoding.
532 On Windows the filename encoding is UTF-8; elsewhere it is based on the
535 utf8_to_filename (const char *filename)
537 return recode_string (filename_encoding (), "UTF-8", filename, -1);
540 /* Returns FILENAME converted from the filename encoding to UTF-8.
541 On Windows the filename encoding is UTF-8; elsewhere it is based on the
544 filename_to_utf8 (const char *filename)
546 return recode_string ("UTF-8", filename_encoding (), filename, -1);
550 recode_substring_pool__ (const char *to, const char *from,
551 struct substring text, char fallbackchar,
552 struct pool *pool, struct substring *out)
555 struct converter *conv;
558 to = default_encoding;
561 from = default_encoding;
563 conv = create_iconv (to, from);
569 out->string = pool_malloc (pool, text.length + 1);
570 out->length = text.length;
571 memcpy (out->string, text.string, text.length);
572 out->string[out->length] = '\0';
579 for (bufsize = text.length + 1; bufsize > text.length; bufsize *= 2)
581 char *output = pool_malloc (pool, bufsize);
584 retval = try_recode (conv, fallbackchar, text.string, text.length,
588 *out = ss_buffer (output, retval);
591 pool_free (pool, output);
593 if (retval != -E2BIG)
600 /* Converts the string TEXT, which should be encoded in FROM-encoding, to a
601 dynamically allocated string in TO-encoding. Any characters which cannot be
602 converted will be represented by '?'.
604 The returned string will be null-terminated and allocated on POOL with
607 This function's behaviour differs from that of g_convert_with_fallback
608 provided by GLib. The GLib function will fail (returns NULL) if any part of
609 the input string is not valid in the declared input encoding. This function
610 however perseveres even in the presence of badly encoded input. */
612 recode_substring_pool (const char *to, const char *from,
613 struct substring text, struct pool *pool)
615 struct substring out;
617 recode_substring_pool__ (to, from, text, '?', pool, &out);
621 /* Converts the string TEXT, which should be encoded in FROM-encoding, to a
622 dynamically allocated string in TO-encoding. On success, returns 0, and the
623 converted null-terminated string, allocated from POOL with pool_malloc(), is
624 stored in *OUT. On failure, returns a positive errno value.
626 The function fails with an error if any part of the input string is not
627 valid in the declared input encoding. */
629 recode_pedantically (const char *to, const char *from,
630 struct substring text, struct pool *pool,
631 struct substring *out)
635 error = recode_substring_pool__ (to, from, text, 0, pool, out);
644 setlocale (LC_ALL, "");
645 bindtextdomain (PACKAGE, relocate(locale_dir));
646 textdomain (PACKAGE);
648 assert (default_encoding == NULL);
649 default_encoding = xstrdup (locale_charset ());
655 get_default_encoding (void)
657 return default_encoding;
661 set_default_encoding (const char *enc)
663 free (default_encoding);
664 default_encoding = xstrdup (enc);
668 /* Attempts to set the encoding from a locale name
669 returns true if successfull.
670 This function does not (should not!) alter the current locale.
673 set_encoding_from_locale (const char *loc)
678 char *tmp = xstrdup (setlocale (LC_CTYPE, NULL));
680 setlocale (LC_CTYPE, "C");
681 c_encoding = xstrdup (locale_charset ());
683 setlocale (LC_CTYPE, loc);
684 loc_encoding = xstrdup (locale_charset ());
687 if ( 0 == strcmp (loc_encoding, c_encoding))
692 setlocale (LC_CTYPE, tmp);
698 free (default_encoding);
699 default_encoding = loc_encoding;
712 struct hmapx_node *node;
713 struct converter *cvtr;
715 HMAPX_FOR_EACH (cvtr, node, &map)
720 free (cvtr->fromcode);
721 if (cvtr->conv != (iconv_t) -1)
722 iconv_close (cvtr->conv);
726 hmapx_destroy (&map);
728 free (default_encoding);
729 default_encoding = NULL;
735 valid_encoding (const char *enc)
737 iconv_t conv = iconv_open (UTF8, enc);
739 if ( conv == (iconv_t) -1)
748 /* Return the system local's idea of the
749 decimal seperator character */
751 get_system_decimal (void)
756 radix_char = nl_langinfo (RADIXCHAR)[0];
760 snprintf (buf, sizeof buf, "%f", 2.5);
769 uc_name (ucs4_t uc, char buffer[16])
771 if (uc >= 0x20 && uc < 0x7f)
772 snprintf (buffer, 16, "`%c'", uc);
774 snprintf (buffer, 16, "U+%04X", uc);
778 /* UTF-8 functions that deal with uppercase/lowercase distinctions. */
780 /* Returns a hash value for the N bytes of UTF-8 encoded data starting at S,
781 with lowercase and uppercase letters treated as equal, starting from
784 utf8_hash_case_bytes (const char *s, size_t n, unsigned int basis)
786 uint8_t folded_buf[2048];
787 size_t folded_len = sizeof folded_buf;
791 folded_s = u8_casefold (CHAR_CAST (const uint8_t *, s), n,
792 NULL, UNINORM_NFKD, folded_buf, &folded_len);
793 if (folded_s != NULL)
795 hash = hash_bytes (folded_s, folded_len, basis);
796 if (folded_s != folded_buf)
803 hash = hash_bytes (s, n, basis);
809 /* Returns a hash value for null-terminated UTF-8 string S, with lowercase and
810 uppercase letters treated as equal, starting from BASIS. */
812 utf8_hash_case_string (const char *s, unsigned int basis)
814 return utf8_hash_case_bytes (s, strlen (s), basis);
817 /* Compares UTF-8 strings A and B case-insensitively.
818 Returns a negative value if A < B, zero if A == B, positive if A > B. */
820 utf8_strcasecmp (const char *a, const char *b)
822 return utf8_strncasecmp (a, strlen (a), b, strlen (b));
825 /* Compares UTF-8 strings A (with length AN) and B (with length BN)
827 Returns a negative value if A < B, zero if A == B, positive if A > B. */
829 utf8_strncasecmp (const char *a, size_t an, const char *b, size_t bn)
833 if (u8_casecmp (CHAR_CAST (const uint8_t *, a), an,
834 CHAR_CAST (const uint8_t *, b), bn,
835 NULL, UNINORM_NFKD, &result))
840 result = memcmp (a, b, MIN (an, bn));
842 result = an < bn ? -1 : an > bn;
849 utf8_casemap (const char *s,
850 uint8_t *(*f) (const uint8_t *, size_t, const char *, uninorm_t,
851 uint8_t *, size_t *))
856 result = CHAR_CAST (char *,
857 f (CHAR_CAST (const uint8_t *, s), strlen (s) + 1,
858 NULL, NULL, NULL, &size));
864 result = xstrdup (s);
870 utf8_to_upper (const char *s)
872 return utf8_casemap (s, u8_toupper);
876 utf8_to_lower (const char *s)
878 return utf8_casemap (s, u8_tolower);
882 get_encoding_info (struct encoding_info *e, const char *name)
884 const struct substring in = SS_LITERAL_INITIALIZER (
886 "!\"#$%&'()*+,-./0123456789:;<=>?@"
887 "ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`"
888 "abcdefghijklmnopqrstuvwxyz{|}~");
890 struct substring out, cr, lf, space;
893 memset (e, 0, sizeof *e);
895 cr = recode_substring_pool (name, "UTF-8", ss_cstr ("\r"), NULL);
896 lf = recode_substring_pool (name, "UTF-8", ss_cstr ("\n"), NULL);
897 space = recode_substring_pool (name, "UTF-8", ss_cstr (" "), NULL);
899 && cr.length <= MAX_UNIT
900 && cr.length == lf.length
901 && cr.length == space.length);
904 fprintf (stderr, "warning: encoding `%s' is not supported.\n", name);
908 ss_alloc_substring (&cr, ss_cstr ("\r"));
909 ss_alloc_substring (&lf, ss_cstr ("\n"));
910 ss_alloc_substring (&space, ss_cstr (" "));
914 memcpy (e->cr, cr.string, e->unit);
915 memcpy (e->lf, lf.string, e->unit);
916 memcpy (e->space, space.string, e->unit);
922 out = recode_substring_pool ("UTF-8", name, in, NULL);
923 e->is_ascii_compatible = ss_equals (in, out);
926 if (!e->is_ascii_compatible && e->unit == 1)
928 out = recode_substring_pool ("UTF-8", name, ss_cstr ("A"), NULL);
929 e->is_ebcdic_compatible = (out.length == 1
930 && (uint8_t) out.string[0] == 0xc1);
934 e->is_ebcdic_compatible = false;
940 is_encoding_ascii_compatible (const char *encoding)
942 struct encoding_info e;
944 get_encoding_info (&e, encoding);
945 return e.is_ascii_compatible;
949 is_encoding_ebcdic_compatible (const char *encoding)
951 struct encoding_info e;
953 get_encoding_info (&e, encoding);
954 return e.is_ebcdic_compatible;
957 /* Returns true if iconv can convert ENCODING to and from UTF-8,
960 is_encoding_supported (const char *encoding)
962 return (create_iconv ("UTF-8", encoding)
963 && create_iconv (encoding, "UTF-8"));
966 /* Returns true if E is the name of a UTF-8 encoding.
968 XXX Possibly we should test not E as a string but its properties via
971 is_encoding_utf8 (const char *e)
973 return ((e[0] == 'u' || e[0] == 'U')
974 && (e[1] == 't' || e[1] == 'T')
975 && (e[2] == 'f' || e[2] == 'F')
976 && ((e[3] == '8' && e[4] == '\0')
977 || (e[3] == '-' && e[4] == '8' && e[5] == '\0')));
980 static struct encoding_category *categories;
981 static int n_categories;
983 static void SENTINEL (0)
984 add_category (size_t *allocated_categories, const char *category, ...)
986 struct encoding_category *c;
987 const char *encodings[16];
991 /* Count encoding arguments. */
992 va_start (args, category);
994 while ((encodings[n] = va_arg (args, const char *)) != NULL)
996 const char *encoding = encodings[n];
997 if (!strcmp (encoding, "Auto") || is_encoding_supported (encoding))
1000 assert (n < sizeof encodings / sizeof *encodings);
1006 if (n_categories >= *allocated_categories)
1007 categories = x2nrealloc (categories,
1008 allocated_categories, sizeof *categories);
1010 c = &categories[n_categories++];
1011 c->category = category;
1012 c->encodings = xmalloc (n * sizeof *c->encodings);
1013 for (i = 0; i < n; i++)
1014 c->encodings[i] = encodings[i];
1019 init_encoding_categories (void)
1029 add_category (&alloc, "Unicode", "UTF-8", "UTF-16", "UTF-16BE", "UTF-16LE",
1030 "UTF-32", "UTF-32BE", "UTF-32LE", NULL_SENTINEL);
1031 add_category (&alloc, _("Arabic"), "IBM864", "ISO-8859-6", "Windows-1256",
1033 add_category (&alloc, _("Armenian"), "ARMSCII-8", NULL_SENTINEL);
1034 add_category (&alloc, _("Baltic"), "ISO-8859-13", "ISO-8859-4",
1035 "Windows-1257", NULL_SENTINEL);
1036 add_category (&alloc, _("Celtic"), "ISO-8859-14", NULL_SENTINEL);
1037 add_category (&alloc, _("Central European"), "IBM852", "ISO-8859-2",
1038 "Mac-CentralEurope", "Windows-1250", NULL_SENTINEL);
1039 add_category (&alloc, _("Chinese Simplified"), "GB18030", "GB2312", "GBK",
1040 "HZ-GB-2312", "ISO-2022-CN", NULL_SENTINEL);
1041 add_category (&alloc, _("Chinese Traditional"), "Big5", "Big5-HKSCS",
1042 "EUC-TW", NULL_SENTINEL);
1043 add_category (&alloc, _("Croatian"), "MacCroatian", NULL_SENTINEL);
1044 add_category (&alloc, _("Cyrillic"), "IBM855", "ISO-8859-5", "ISO-IR-111",
1045 "KOI8-R", "MacCyrillic", NULL_SENTINEL);
1046 add_category (&alloc, _("Cyrillic/Russian"), "IBM866", NULL_SENTINEL);
1047 add_category (&alloc, _("Cyrillic/Ukrainian"), "KOI8-U", "MacUkrainian",
1049 add_category (&alloc, _("Georgian"), "GEOSTD8", NULL_SENTINEL);
1050 add_category (&alloc, _("Greek"), "ISO-8859-7", "MacGreek", NULL_SENTINEL);
1051 add_category (&alloc, _("Gujarati"), "MacGujarati", NULL_SENTINEL);
1052 add_category (&alloc, _("Gurmukhi"), "MacGurmukhi", NULL_SENTINEL);
1053 add_category (&alloc, _("Hebrew"), "IBM862", "ISO-8859-8-I", "Windows-1255",
1055 add_category (&alloc, _("Hebrew Visual"), "ISO-8859-8", NULL_SENTINEL);
1056 add_category (&alloc, _("Hindi"), "MacDevangari", NULL_SENTINEL);
1057 add_category (&alloc, _("Icelandic"), "MacIcelandic", NULL_SENTINEL);
1058 add_category (&alloc, _("Japanese"), "EUC-JP", "ISO-2022-JP", "Shift_JIS",
1060 add_category (&alloc, _("Korean"), "EUC-KR", "ISO-2022-KR", "JOHAB", "UHC",
1062 add_category (&alloc, _("Nordic"), "ISO-8859-10", NULL_SENTINEL);
1063 add_category (&alloc, _("Romanian"), "ISO-8859-16", "MacRomanian",
1065 add_category (&alloc, _("South European"), "ISO-8859-3", NULL_SENTINEL);
1066 add_category (&alloc, _("Thai"), "ISO-8859-11", "TIS-620", "Windows-874",
1068 add_category (&alloc, _("Turkish"), "IBM857", "ISO-8859-9", "Windows-1254",
1070 add_category (&alloc, _("Vietnamese"), "TVCN", "VISCII", "VPS",
1071 "Windows-1258", NULL_SENTINEL);
1072 add_category (&alloc, _("Western European"), "ISO-8859-1", "ISO-8859-15",
1073 "Windows-1252", "IBM850", "MacRoman", NULL_SENTINEL);
1076 /* Returns an array of "struct encoding_category" that contains only the
1077 categories and encodings that the system supports. */
1078 struct encoding_category *
1079 get_encoding_categories (void)
1081 init_encoding_categories ();
1085 /* Returns the number of elements in the array returned by
1086 get_encoding_categories(). */
1088 get_n_encoding_categories (void)
1090 init_encoding_categories ();
1091 return n_categories;