1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2006, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "libpspp/i18n.h"
33 #include "libpspp/assertion.h"
34 #include "libpspp/compiler.h"
35 #include "libpspp/hmapx.h"
36 #include "libpspp/hash-functions.h"
37 #include "libpspp/pool.h"
38 #include "libpspp/str.h"
39 #include "libpspp/version.h"
41 #include "gl/c-strcase.h"
42 #include "gl/localcharset.h"
43 #include "gl/minmax.h"
44 #include "gl/xalloc.h"
45 #include "gl/relocatable.h"
46 #include "gl/xstrndup.h"
49 #define _(msgid) gettext (msgid)
59 static char *default_encoding;
60 static struct hmapx map;
62 /* A wrapper around iconv_open */
63 static struct converter *
64 create_iconv__ (const char* tocode, const char* fromcode)
67 struct hmapx_node *node;
68 struct converter *converter;
71 hash = hash_string (tocode, hash_string (fromcode, 0));
72 HMAPX_FOR_EACH_WITH_HASH (converter, node, hash, &map)
73 if (!strcmp (tocode, converter->tocode)
74 && !strcmp (fromcode, converter->fromcode))
77 converter = xmalloc (sizeof *converter);
78 converter->tocode = xstrdup (tocode);
79 converter->fromcode = xstrdup (fromcode);
80 converter->conv = iconv_open (tocode, fromcode);
81 converter->error = converter->conv == (iconv_t) -1 ? errno : 0;
82 hmapx_insert (&map, converter, hash);
88 create_iconv (const char* tocode, const char* fromcode)
90 struct converter *converter;
92 converter = create_iconv__ (tocode, fromcode);
94 /* I don't think it's safe to translate this string or to use messaging
95 as the converters have not yet been set up */
96 if (converter->error && strcmp (tocode, fromcode))
100 "cannot create a converter for `%s' to `%s': %s\n",
101 fromcode, tocode, strerror (converter->error));
102 converter->error = 0;
105 return converter->conv;
108 /* Converts the single byte C from encoding FROM to TO, returning the first
111 This function probably shouldn't be used at all, but some code still does
114 recode_byte (const char *to, const char *from, char c)
117 char *s = recode_string (to, from, &c, 1);
123 /* Similar to recode_string_pool, but allocates the returned value on the heap
124 instead of in a pool. It is the caller's responsibility to free the
127 recode_string (const char *to, const char *from,
128 const char *text, int length)
130 return recode_string_pool (to, from, text, length, NULL);
133 /* Returns the length, in bytes, of the string that a similar recode_string()
134 call would return. */
136 recode_string_len (const char *to, const char *from,
137 const char *text, int length)
139 char *s = recode_string (to, from, text, length);
140 size_t len = strlen (s);
145 /* Uses CONV to convert the INBYTES starting at IP into the OUTBYTES starting
146 at OP, and appends a null terminator to the output.
148 Returns the output length if successful, -1 if the output buffer is too
151 try_recode (iconv_t conv,
152 const char *ip, size_t inbytes,
153 char *op_, size_t outbytes)
155 /* FIXME: Need to ensure that this char is valid in the target encoding */
156 const char fallbackchar = '?';
159 /* Put the converter into the initial shift state, in case there was any
160 state information left over from its last usage. */
161 iconv (conv, NULL, 0, NULL, 0);
163 while (iconv (conv, (ICONV_CONST char **) &ip, &inbytes,
164 &op, &outbytes) == -1)
170 *op++ = fallbackchar;
177 *op++ = fallbackchar;
187 /* should never happen */
188 fprintf (stderr, "Character conversion error: %s\n", strerror (errno));
200 /* Converts the string TEXT, which should be encoded in FROM-encoding, to a
201 dynamically allocated string in TO-encoding. Any characters which cannot be
202 converted will be represented by '?'.
204 LENGTH should be the length of the string or -1, if null terminated.
206 The returned string will be allocated on POOL.
208 This function's behaviour differs from that of g_convert_with_fallback
209 provided by GLib. The GLib function will fail (returns NULL) if any part of
210 the input string is not valid in the declared input encoding. This function
211 however perseveres even in the presence of badly encoded input. */
213 recode_string_pool (const char *to, const char *from,
214 const char *text, int length, struct pool *pool)
216 struct substring out;
222 length = strlen (text);
224 out = recode_substring_pool (to, from, ss_buffer (text, length), pool);
228 /* Returns the name of the encoding that should be used for file names.
230 This is meant to be the same encoding used by g_filename_from_uri() and
231 g_filename_to_uri() in GLib. */
233 filename_encoding (void)
235 #if defined _WIN32 || defined __WIN32__
238 return locale_charset ();
243 xconcat2 (const char *a, size_t a_len,
244 const char *b, size_t b_len)
246 char *s = xmalloc (a_len + b_len + 1);
247 memcpy (s, a, a_len);
248 memcpy (s + a_len, b, b_len);
249 s[a_len + b_len] = '\0';
253 /* Conceptually, this function concatenates HEAD_LEN-byte string HEAD and
254 TAIL_LEN-byte string TAIL, both encoded in UTF-8, then converts them to
255 ENCODING. If the re-encoded result is no more than MAX_LEN bytes long, then
256 it returns HEAD_LEN. Otherwise, it drops one character[*] from the end of
257 HEAD and tries again, repeating as necessary until the concatenated result
258 fits or until HEAD_LEN reaches 0.
260 [*] Actually this function drops grapheme clusters instead of characters, so
261 that, e.g. a Unicode character followed by a combining accent character
262 is either completely included or completely excluded from HEAD_LEN. See
263 UAX #29 at http://unicode.org/reports/tr29/ for more information on
266 A null ENCODING is treated as UTF-8.
268 Sometimes this function has to actually construct the concatenated string to
269 measure its length. When this happens, it sets *RESULTP to that
270 null-terminated string, allocated with malloc(), for the caller to use if it
271 needs it. Otherwise, it sets *RESULTP to NULL.
273 Simple examples for encoding="UTF-8", max_len=6:
275 head="abc", tail="xyz" => 3
276 head="abcd", tail="xyz" => 3 ("d" dropped).
277 head="abc", tail="uvwxyz" => 0 ("abc" dropped).
278 head="abc", tail="tuvwxyz" => 0 ("abc" dropped).
280 Examples for encoding="ISO-8859-1", max_len=6:
282 head="éèä", tail="xyz" => 6
283 (each letter in head is only 1 byte in ISO-8859-1 even though they
284 each take 2 bytes in UTF-8 encoding)
287 utf8_encoding_concat__ (const char *head, size_t head_len,
288 const char *tail, size_t tail_len,
289 const char *encoding, size_t max_len,
295 else if (encoding == NULL || !c_strcasecmp (encoding, "UTF-8"))
297 if (head_len + tail_len <= max_len)
299 else if (tail_len >= max_len)
309 for (ofs = u8_mbtouc (&prev, CHAR_CAST (const uint8_t *, head),
311 ofs <= max_len - tail_len;
316 mblen = u8_mbtouc (&next,
317 CHAR_CAST (const uint8_t *, head + ofs),
319 if (uc_is_grapheme_break (prev, next))
332 result = (tail_len > 0
333 ? xconcat2 (head, head_len, tail, tail_len)
334 : CONST_CAST (char *, head));
335 if (recode_string_len (encoding, "UTF-8", result,
336 head_len + tail_len) <= max_len)
338 *resultp = result != head ? result : NULL;
343 bool correct_result = false;
350 for (ofs = u8_mbtouc (&prev, CHAR_CAST (const uint8_t *, head),
357 mblen = u8_mbtouc (&next,
358 CHAR_CAST (const uint8_t *, head + ofs),
360 if (uc_is_grapheme_break (prev, next))
364 memcpy (result, head, ofs);
365 memcpy (result + ofs, tail, tail_len);
366 result[ofs + tail_len] = '\0';
369 if (recode_string_len (encoding, "UTF-8", result,
370 ofs + tail_len) <= max_len)
372 correct_result = true;
376 correct_result = false;
395 /* Concatenates a prefix of HEAD with all of TAIL and returns the result as a
396 null-terminated string owned by the caller. HEAD, TAIL, and the returned
397 string are all encoded in UTF-8. As many characters[*] from the beginning
398 of HEAD are included as will fit within MAX_LEN bytes supposing that the
399 resulting string were to be re-encoded in ENCODING. All of TAIL is always
400 included, even if TAIL by itself is longer than MAX_LEN in ENCODING.
402 [*] Actually this function drops grapheme clusters instead of characters, so
403 that, e.g. a Unicode character followed by a combining accent character
404 is either completely included or completely excluded from the returned
405 string. See UAX #29 at http://unicode.org/reports/tr29/ for more
406 information on grapheme clusters.
408 A null ENCODING is treated as UTF-8.
410 Simple examples for encoding="UTF-8", max_len=6:
412 head="abc", tail="xyz" => "abcxyz"
413 head="abcd", tail="xyz" => "abcxyz"
414 head="abc", tail="uvwxyz" => "uvwxyz"
415 head="abc", tail="tuvwxyz" => "tuvwxyz"
417 Examples for encoding="ISO-8859-1", max_len=6:
419 head="éèä", tail="xyz" => "éèäxyz"
420 (each letter in HEAD is only 1 byte in ISO-8859-1 even though they
421 each take 2 bytes in UTF-8 encoding)
424 utf8_encoding_concat (const char *head, const char *tail,
425 const char *encoding, size_t max_len)
427 size_t tail_len = strlen (tail);
431 prefix_len = utf8_encoding_concat__ (head, strlen (head), tail, tail_len,
432 encoding, max_len, &result);
433 return (result != NULL
435 : xconcat2 (head, prefix_len, tail, tail_len));
438 /* Returns the length, in bytes, of the string that would be returned by
439 utf8_encoding_concat() if passed the same arguments, but the implementation
440 is often more efficient. */
442 utf8_encoding_concat_len (const char *head, const char *tail,
443 const char *encoding, size_t max_len)
445 size_t tail_len = strlen (tail);
449 prefix_len = utf8_encoding_concat__ (head, strlen (head), tail, tail_len,
450 encoding, max_len, &result);
452 return prefix_len + tail_len;
455 /* Returns an allocated, null-terminated string, owned by the caller,
456 containing as many characters[*] from the beginning of S that would fit
457 within MAX_LEN bytes if the returned string were to be re-encoded in
458 ENCODING. Both S and the returned string are encoded in UTF-8.
460 [*] Actually this function drops grapheme clusters instead of characters, so
461 that, e.g. a Unicode character followed by a combining accent character
462 is either completely included or completely excluded from the returned
463 string. See UAX #29 at http://unicode.org/reports/tr29/ for more
464 information on grapheme clusters.
466 A null ENCODING is treated as UTF-8.
469 utf8_encoding_trunc (const char *s, const char *encoding, size_t max_len)
471 return utf8_encoding_concat (s, "", encoding, max_len);
474 /* Returns the length, in bytes, of the string that would be returned by
475 utf8_encoding_trunc() if passed the same arguments, but the implementation
476 is often more efficient. */
478 utf8_encoding_trunc_len (const char *s, const char *encoding, size_t max_len)
480 return utf8_encoding_concat_len (s, "", encoding, max_len);
483 /* Returns FILENAME converted from UTF-8 to the filename encoding.
484 On Windows the filename encoding is UTF-8; elsewhere it is based on the
487 utf8_to_filename (const char *filename)
489 return recode_string (filename_encoding (), "UTF-8", filename, -1);
492 /* Returns FILENAME converted from the filename encoding to UTF-8.
493 On Windows the filename encoding is UTF-8; elsewhere it is based on the
496 filename_to_utf8 (const char *filename)
498 return recode_string ("UTF-8", filename_encoding (), filename, -1);
501 /* Converts the string TEXT, which should be encoded in FROM-encoding, to a
502 dynamically allocated string in TO-encoding. Any characters which cannot be
503 converted will be represented by '?'.
505 The returned string will be null-terminated and allocated on POOL.
507 This function's behaviour differs from that of g_convert_with_fallback
508 provided by GLib. The GLib function will fail (returns NULL) if any part of
509 the input string is not valid in the declared input encoding. This function
510 however perseveres even in the presence of badly encoded input. */
512 recode_substring_pool (const char *to, const char *from,
513 struct substring text, struct pool *pool)
515 size_t outbufferlength;
519 to = default_encoding;
522 from = default_encoding;
524 conv = create_iconv (to, from);
526 if ( (iconv_t) -1 == conv )
528 struct substring out;
529 ss_alloc_substring_pool (&out, text, pool);
533 for ( outbufferlength = 1 ; outbufferlength != 0; outbufferlength <<= 1 )
534 if ( outbufferlength > text.length)
536 char *output = pool_malloc (pool, outbufferlength);
537 ssize_t output_len = try_recode (conv, text.string, text.length,
538 output, outbufferlength);
540 return ss_buffer (output, output_len);
541 pool_free (pool, output);
550 setlocale (LC_ALL, "");
551 bindtextdomain (PACKAGE, relocate(locale_dir));
552 textdomain (PACKAGE);
554 assert (default_encoding == NULL);
555 default_encoding = xstrdup (locale_charset ());
561 get_default_encoding (void)
563 return default_encoding;
567 set_default_encoding (const char *enc)
569 free (default_encoding);
570 default_encoding = xstrdup (enc);
574 /* Attempts to set the encoding from a locale name
575 returns true if successfull.
576 This function does not (should not!) alter the current locale.
579 set_encoding_from_locale (const char *loc)
584 char *tmp = xstrdup (setlocale (LC_CTYPE, NULL));
586 setlocale (LC_CTYPE, "C");
587 c_encoding = xstrdup (locale_charset ());
589 setlocale (LC_CTYPE, loc);
590 loc_encoding = xstrdup (locale_charset ());
593 if ( 0 == strcmp (loc_encoding, c_encoding))
598 setlocale (LC_CTYPE, tmp);
604 free (default_encoding);
605 default_encoding = loc_encoding;
618 struct hmapx_node *node;
619 struct converter *cvtr;
621 HMAPX_FOR_EACH (cvtr, node, &map)
624 free (cvtr->fromcode);
625 if (cvtr->conv != (iconv_t) -1)
626 iconv_close (cvtr->conv);
630 hmapx_destroy (&map);
632 free (default_encoding);
633 default_encoding = NULL;
639 valid_encoding (const char *enc)
641 iconv_t conv = iconv_open (UTF8, enc);
643 if ( conv == (iconv_t) -1)
652 /* Return the system local's idea of the
653 decimal seperator character */
655 get_system_decimal (void)
660 radix_char = nl_langinfo (RADIXCHAR)[0];
664 snprintf (buf, sizeof buf, "%f", 2.5);
673 uc_name (ucs4_t uc, char buffer[16])
675 if (uc >= 0x20 && uc < 0x7f)
676 snprintf (buffer, 16, "`%c'", uc);
678 snprintf (buffer, 16, "U+%04X", uc);
682 /* UTF-8 functions that deal with uppercase/lowercase distinctions. */
684 /* Returns a hash value for the N bytes of UTF-8 encoded data starting at S,
685 with lowercase and uppercase letters treated as equal, starting from
688 utf8_hash_case_bytes (const char *s, size_t n, unsigned int basis)
690 uint8_t folded_buf[2048];
691 size_t folded_len = sizeof folded_buf;
695 folded_s = u8_casefold (CHAR_CAST (const uint8_t *, s), n,
696 NULL, UNINORM_NFKD, folded_buf, &folded_len);
697 if (folded_s != NULL)
699 hash = hash_bytes (folded_s, folded_len, basis);
700 if (folded_s != folded_buf)
707 hash = hash_bytes (s, n, basis);
713 /* Returns a hash value for null-terminated UTF-8 string S, with lowercase and
714 uppercase letters treated as equal, starting from BASIS. */
716 utf8_hash_case_string (const char *s, unsigned int basis)
718 return utf8_hash_case_bytes (s, strlen (s), basis);
721 /* Compares UTF-8 strings A and B case-insensitively.
722 Returns a negative value if A < B, zero if A == B, positive if A > B. */
724 utf8_strcasecmp (const char *a, const char *b)
726 return utf8_strncasecmp (a, strlen (a), b, strlen (b));
729 /* Compares UTF-8 strings A (with length AN) and B (with length BN)
731 Returns a negative value if A < B, zero if A == B, positive if A > B. */
733 utf8_strncasecmp (const char *a, size_t an, const char *b, size_t bn)
737 if (u8_casecmp (CHAR_CAST (const uint8_t *, a), an,
738 CHAR_CAST (const uint8_t *, b), bn,
739 NULL, UNINORM_NFKD, &result))
744 result = memcmp (a, b, MIN (an, bn));
746 result = an < bn ? -1 : an > bn;
753 utf8_casemap (const char *s,
754 uint8_t *(*f) (const uint8_t *, size_t, const char *, uninorm_t,
755 uint8_t *, size_t *))
760 result = CHAR_CAST (char *,
761 f (CHAR_CAST (const uint8_t *, s), strlen (s) + 1,
762 NULL, NULL, NULL, &size));
768 result = xstrdup (s);
774 utf8_to_upper (const char *s)
776 return utf8_casemap (s, u8_toupper);
780 utf8_to_lower (const char *s)
782 return utf8_casemap (s, u8_tolower);
786 get_encoding_info (struct encoding_info *e, const char *name)
788 const struct substring in = SS_LITERAL_INITIALIZER (
790 "!\"#$%&'()*+,-./0123456789:;<=>?@"
791 "ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`"
792 "abcdefghijklmnopqrstuvwxyz{|}~");
794 struct substring out, cr, lf, space;
797 memset (e, 0, sizeof *e);
799 cr = recode_substring_pool (name, "UTF-8", ss_cstr ("\r"), NULL);
800 lf = recode_substring_pool (name, "UTF-8", ss_cstr ("\n"), NULL);
801 space = recode_substring_pool (name, "UTF-8", ss_cstr (" "), NULL);
803 && cr.length <= MAX_UNIT
804 && cr.length == lf.length
805 && cr.length == space.length);
808 fprintf (stderr, "warning: encoding `%s' is not supported.\n", name);
812 ss_alloc_substring (&cr, ss_cstr ("\r"));
813 ss_alloc_substring (&lf, ss_cstr ("\n"));
814 ss_alloc_substring (&space, ss_cstr (" "));
818 memcpy (e->cr, cr.string, e->unit);
819 memcpy (e->lf, lf.string, e->unit);
820 memcpy (e->space, space.string, e->unit);
826 out = recode_substring_pool ("UTF-8", name, in, NULL);
827 e->is_ascii_compatible = ss_equals (in, out);
830 if (!e->is_ascii_compatible && e->unit == 1)
832 out = recode_substring_pool ("UTF-8", name, ss_cstr ("A"), NULL);
833 e->is_ebcdic_compatible = (out.length == 1
834 && (uint8_t) out.string[0] == 0xc1);
838 e->is_ebcdic_compatible = false;
844 is_encoding_ascii_compatible (const char *encoding)
846 struct encoding_info e;
848 get_encoding_info (&e, encoding);
849 return e.is_ascii_compatible;
853 is_encoding_ebcdic_compatible (const char *encoding)
855 struct encoding_info e;
857 get_encoding_info (&e, encoding);
858 return e.is_ebcdic_compatible;
861 /* Returns true if iconv can convert ENCODING to and from UTF-8,
864 is_encoding_supported (const char *encoding)
866 return (create_iconv__ ("UTF-8", encoding)->conv != (iconv_t) -1
867 && create_iconv__ (encoding, "UTF-8")->conv != (iconv_t) -1);
870 /* Returns true if E is the name of a UTF-8 encoding.
872 XXX Possibly we should test not E as a string but its properties via
875 is_encoding_utf8 (const char *e)
877 return ((e[0] == 'u' || e[0] == 'U')
878 && (e[1] == 't' || e[1] == 'T')
879 && (e[2] == 'f' || e[2] == 'F')
880 && ((e[3] == '8' && e[4] == '\0')
881 || (e[3] == '-' && e[4] == '8' && e[5] == '\0')));
884 static struct encoding_category *categories;
885 static int n_categories;
887 static void SENTINEL (0)
888 add_category (size_t *allocated_categories, const char *category, ...)
890 struct encoding_category *c;
891 const char *encodings[16];
895 /* Count encoding arguments. */
896 va_start (args, category);
898 while ((encodings[n] = va_arg (args, const char *)) != NULL)
900 const char *encoding = encodings[n];
901 if (!strcmp (encoding, "Auto") || is_encoding_supported (encoding))
904 assert (n < sizeof encodings / sizeof *encodings);
910 if (n_categories >= *allocated_categories)
911 categories = x2nrealloc (categories,
912 allocated_categories, sizeof *categories);
914 c = &categories[n_categories++];
915 c->category = category;
916 c->encodings = xmalloc (n * sizeof *c->encodings);
917 for (i = 0; i < n; i++)
918 c->encodings[i] = encodings[i];
923 init_encoding_categories (void)
933 add_category (&alloc, "Unicode", "UTF-8", "UTF-16", "UTF-16BE", "UTF-16LE",
934 "UTF-32", "UTF-32BE", "UTF-32LE", NULL_SENTINEL);
935 add_category (&alloc, _("Arabic"), "IBM864", "ISO-8859-6", "Windows-1256",
937 add_category (&alloc, _("Armenian"), "ARMSCII-8", NULL_SENTINEL);
938 add_category (&alloc, _("Baltic"), "ISO-8859-13", "ISO-8859-4",
939 "Windows-1257", NULL_SENTINEL);
940 add_category (&alloc, _("Celtic"), "ISO-8859-14", NULL_SENTINEL);
941 add_category (&alloc, _("Central European"), "IBM852", "ISO-8859-2",
942 "Mac-CentralEurope", "Windows-1250", NULL_SENTINEL);
943 add_category (&alloc, _("Chinese Simplified"), "GB18030", "GB2312", "GBK",
944 "HZ-GB-2312", "ISO-2022-CN", NULL_SENTINEL);
945 add_category (&alloc, _("Chinese Traditional"), "Big5", "Big5-HKSCS",
946 "EUC-TW", NULL_SENTINEL);
947 add_category (&alloc, _("Croatian"), "MacCroatian", NULL_SENTINEL);
948 add_category (&alloc, _("Cyrillic"), "IBM855", "ISO-8859-5", "ISO-IR-111",
949 "KOI8-R", "MacCyrillic", NULL_SENTINEL);
950 add_category (&alloc, _("Cyrillic/Russian"), "IBM866", NULL_SENTINEL);
951 add_category (&alloc, _("Cyrillic/Ukrainian"), "KOI8-U", "MacUkrainian",
953 add_category (&alloc, _("Georgian"), "GEOSTD8", NULL_SENTINEL);
954 add_category (&alloc, _("Greek"), "ISO-8859-7", "MacGreek", NULL_SENTINEL);
955 add_category (&alloc, _("Gujarati"), "MacGujarati", NULL_SENTINEL);
956 add_category (&alloc, _("Gurmukhi"), "MacGurmukhi", NULL_SENTINEL);
957 add_category (&alloc, _("Hebrew"), "IBM862", "ISO-8859-8-I", "Windows-1255",
959 add_category (&alloc, _("Hebrew Visual"), "ISO-8859-8", NULL_SENTINEL);
960 add_category (&alloc, _("Hindi"), "MacDevangari", NULL_SENTINEL);
961 add_category (&alloc, _("Icelandic"), "MacIcelandic", NULL_SENTINEL);
962 add_category (&alloc, _("Japanese"), "EUC-JP", "ISO-2022-JP", "Shift_JIS",
964 add_category (&alloc, _("Korean"), "EUC-KR", "ISO-2022-KR", "JOHAB", "UHC",
966 add_category (&alloc, _("Nordic"), "ISO-8859-10", NULL_SENTINEL);
967 add_category (&alloc, _("Romanian"), "ISO-8859-16", "MacRomanian",
969 add_category (&alloc, _("South European"), "ISO-8859-3", NULL_SENTINEL);
970 add_category (&alloc, _("Thai"), "ISO-8859-11", "TIS-620", "Windows-874",
972 add_category (&alloc, _("Turkish"), "IBM857", "ISO-8859-9", "Windows-1254",
974 add_category (&alloc, _("Vietnamese"), "TVCN", "VISCII", "VPS",
975 "Windows-1258", NULL_SENTINEL);
976 add_category (&alloc, _("Western European"), "ISO-8859-1", "ISO-8859-15",
977 "Windows-1252", "IBM850", "MacRoman", NULL_SENTINEL);
980 /* Returns an array of "struct encoding_category" that contains only the
981 categories and encodings that the system supports. */
982 struct encoding_category *
983 get_encoding_categories (void)
985 init_encoding_categories ();
989 /* Returns the number of elements in the array returned by
990 get_encoding_categories(). */
992 get_n_encoding_categories (void)
994 init_encoding_categories ();