1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2006, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "libpspp/i18n.h"
33 #include "libpspp/assertion.h"
34 #include "libpspp/compiler.h"
35 #include "libpspp/hmapx.h"
36 #include "libpspp/hash-functions.h"
37 #include "libpspp/pool.h"
38 #include "libpspp/str.h"
39 #include "libpspp/version.h"
41 #include "gl/c-strcase.h"
42 #include "gl/localcharset.h"
43 #include "gl/minmax.h"
44 #include "gl/xalloc.h"
45 #include "gl/relocatable.h"
46 #include "gl/xstrndup.h"
49 #define _(msgid) gettext (msgid)
59 static char *default_encoding;
60 static struct hmapx map;
62 /* A wrapper around iconv_open */
63 static struct converter *
64 create_iconv__ (const char* tocode, const char* fromcode)
67 struct hmapx_node *node;
68 struct converter *converter;
71 hash = hash_string (tocode, hash_string (fromcode, 0));
72 HMAPX_FOR_EACH_WITH_HASH (converter, node, hash, &map)
73 if (!strcmp (tocode, converter->tocode)
74 && !strcmp (fromcode, converter->fromcode))
77 converter = xmalloc (sizeof *converter);
78 converter->tocode = xstrdup (tocode);
79 converter->fromcode = xstrdup (fromcode);
80 converter->conv = iconv_open (tocode, fromcode);
81 converter->error = converter->conv == (iconv_t) -1 ? errno : 0;
82 hmapx_insert (&map, converter, hash);
88 create_iconv (const char* tocode, const char* fromcode)
90 struct converter *converter;
92 converter = create_iconv__ (tocode, fromcode);
94 /* I don't think it's safe to translate this string or to use messaging
95 as the converters have not yet been set up */
96 if (converter->error && strcmp (tocode, fromcode))
100 "cannot create a converter for `%s' to `%s': %s\n",
101 fromcode, tocode, strerror (converter->error));
102 converter->error = 0;
105 return converter->conv;
108 /* Converts the single byte C from encoding FROM to TO, returning the first
111 This function probably shouldn't be used at all, but some code still does
114 recode_byte (const char *to, const char *from, char c)
117 char *s = recode_string (to, from, &c, 1);
123 /* Similar to recode_string_pool, but allocates the returned value on the heap
124 instead of in a pool. It is the caller's responsibility to free the
127 recode_string (const char *to, const char *from,
128 const char *text, int length)
130 return recode_string_pool (to, from, text, length, NULL);
133 /* Returns the length, in bytes, of the string that a similar recode_string()
134 call would return. */
136 recode_string_len (const char *to, const char *from,
137 const char *text, int length)
139 char *s = recode_string (to, from, text, length);
140 size_t len = strlen (s);
145 /* Uses CONV to convert the INBYTES starting at IP into the OUTBYTES starting
146 at OP, and appends a null terminator to the output.
148 Returns the output length if successful, -1 if the output buffer is too
151 try_recode (iconv_t conv,
152 const char *ip, size_t inbytes,
153 char *op_, size_t outbytes)
155 /* FIXME: Need to ensure that this char is valid in the target encoding */
156 const char fallbackchar = '?';
159 /* Put the converter into the initial shift state, in case there was any
160 state information left over from its last usage. */
161 iconv (conv, NULL, 0, NULL, 0);
163 while (iconv (conv, (ICONV_CONST char **) &ip, &inbytes,
164 &op, &outbytes) == -1)
170 *op++ = fallbackchar;
177 *op++ = fallbackchar;
187 /* should never happen */
188 fprintf (stderr, "Character conversion error: %s\n", strerror (errno));
200 /* Converts the string TEXT, which should be encoded in FROM-encoding, to a
201 dynamically allocated string in TO-encoding. Any characters which cannot be
202 converted will be represented by '?'.
204 LENGTH should be the length of the string or -1, if null terminated.
206 The returned string will be allocated on POOL.
208 This function's behaviour differs from that of g_convert_with_fallback
209 provided by GLib. The GLib function will fail (returns NULL) if any part of
210 the input string is not valid in the declared input encoding. This function
211 however perseveres even in the presence of badly encoded input. */
213 recode_string_pool (const char *to, const char *from,
214 const char *text, int length, struct pool *pool)
216 struct substring out;
222 length = strlen (text);
224 out = recode_substring_pool (to, from, ss_buffer (text, length), pool);
228 /* Returns the name of the encoding that should be used for file names.
230 This is meant to be the same encoding used by g_filename_from_uri() and
231 g_filename_to_uri() in GLib. */
233 filename_encoding (void)
235 #if defined _WIN32 || defined __WIN32__
238 return locale_charset ();
243 xconcat2 (const char *a, size_t a_len,
244 const char *b, size_t b_len)
246 char *s = xmalloc (a_len + b_len + 1);
247 memcpy (s, a, a_len);
248 memcpy (s + a_len, b, b_len);
249 s[a_len + b_len] = '\0';
253 /* Conceptually, this function concatenates HEAD_LEN-byte string HEAD and
254 TAIL_LEN-byte string TAIL, both encoded in UTF-8, then converts them to
255 ENCODING. If the re-encoded result is no more than MAX_LEN bytes long, then
256 it returns HEAD_LEN. Otherwise, it drops one character[*] from the end of
257 HEAD and tries again, repeating as necessary until the concatenated result
258 fits or until HEAD_LEN reaches 0.
260 [*] Actually this function drops grapheme clusters instead of characters, so
261 that, e.g. a Unicode character followed by a combining accent character
262 is either completely included or completely excluded from HEAD_LEN. See
263 UAX #29 at http://unicode.org/reports/tr29/ for more information on
266 A null ENCODING is treated as UTF-8.
268 Sometimes this function has to actually construct the concatenated string to
269 measure its length. When this happens, it sets *RESULTP to that
270 null-terminated string, allocated with malloc(), for the caller to use if it
271 needs it. Otherwise, it sets *RESULTP to NULL.
273 Simple examples for encoding="UTF-8", max_len=6:
275 head="abc", tail="xyz" => 3
276 head="abcd", tail="xyz" => 3 ("d" dropped).
277 head="abc", tail="uvwxyz" => 0 ("abc" dropped).
278 head="abc", tail="tuvwxyz" => 0 ("abc" dropped).
280 Examples for encoding="ISO-8859-1", max_len=6:
282 head="éèä", tail="xyz" => 6
283 (each letter in head is only 1 byte in ISO-8859-1 even though they
284 each take 2 bytes in UTF-8 encoding)
287 utf8_encoding_concat__ (const char *head, size_t head_len,
288 const char *tail, size_t tail_len,
289 const char *encoding, size_t max_len,
295 else if (encoding == NULL || !c_strcasecmp (encoding, "UTF-8"))
297 if (head_len + tail_len <= max_len)
299 else if (tail_len >= max_len)
309 for (ofs = u8_mbtouc (&prev, CHAR_CAST (const uint8_t *, head),
311 ofs <= max_len - tail_len;
316 mblen = u8_mbtouc (&next,
317 CHAR_CAST (const uint8_t *, head + ofs),
319 if (uc_is_grapheme_break (prev, next))
332 result = (tail_len > 0
333 ? xconcat2 (head, head_len, tail, tail_len)
334 : CONST_CAST (char *, head));
335 if (recode_string_len (encoding, "UTF-8", result,
336 head_len + tail_len) <= max_len)
338 *resultp = result != head ? result : NULL;
343 bool correct_result = false;
350 for (ofs = u8_mbtouc (&prev, CHAR_CAST (const uint8_t *, head),
357 mblen = u8_mbtouc (&next,
358 CHAR_CAST (const uint8_t *, head + ofs),
360 if (uc_is_grapheme_break (prev, next))
364 memcpy (result, head, ofs);
365 memcpy (result + ofs, tail, tail_len);
366 result[ofs + tail_len] = '\0';
369 if (recode_string_len (encoding, "UTF-8", result,
370 ofs + tail_len) <= max_len)
372 correct_result = true;
376 correct_result = false;
395 /* Concatenates a prefix of HEAD with all of TAIL and returns the result as a
396 null-terminated string owned by the caller. HEAD, TAIL, and the returned
397 string are all encoded in UTF-8. As many characters[*] from the beginning
398 of HEAD are included as will fit within MAX_LEN bytes supposing that the
399 resulting string were to be re-encoded in ENCODING. All of TAIL is always
400 included, even if TAIL by itself is longer than MAX_LEN in ENCODING.
402 [*] Actually this function drops grapheme clusters instead of characters, so
403 that, e.g. a Unicode character followed by a combining accent character
404 is either completely included or completely excluded from the returned
405 string. See UAX #29 at http://unicode.org/reports/tr29/ for more
406 information on grapheme clusters.
408 A null ENCODING is treated as UTF-8.
410 Simple examples for encoding="UTF-8", max_len=6:
412 head="abc", tail="xyz" => "abcxyz"
413 head="abcd", tail="xyz" => "abcxyz"
414 head="abc", tail="uvwxyz" => "uvwxyz"
415 head="abc", tail="tuvwxyz" => "tuvwxyz"
417 Examples for encoding="ISO-8859-1", max_len=6:
419 head="éèä", tail="xyz" => "éèäxyz"
420 (each letter in HEAD is only 1 byte in ISO-8859-1 even though they
421 each take 2 bytes in UTF-8 encoding)
424 utf8_encoding_concat (const char *head, const char *tail,
425 const char *encoding, size_t max_len)
427 size_t tail_len = strlen (tail);
431 prefix_len = utf8_encoding_concat__ (head, strlen (head), tail, tail_len,
432 encoding, max_len, &result);
433 return (result != NULL
435 : xconcat2 (head, prefix_len, tail, tail_len));
438 /* Returns the length, in bytes, of the string that would be returned by
439 utf8_encoding_concat() if passed the same arguments, but the implementation
440 is often more efficient. */
442 utf8_encoding_concat_len (const char *head, const char *tail,
443 const char *encoding, size_t max_len)
445 size_t tail_len = strlen (tail);
449 prefix_len = utf8_encoding_concat__ (head, strlen (head), tail, tail_len,
450 encoding, max_len, &result);
452 return prefix_len + tail_len;
455 /* Returns an allocated, null-terminated string, owned by the caller,
456 containing as many characters[*] from the beginning of S that would fit
457 within MAX_LEN bytes if the returned string were to be re-encoded in
458 ENCODING. Both S and the returned string are encoded in UTF-8.
460 [*] Actually this function drops grapheme clusters instead of characters, so
461 that, e.g. a Unicode character followed by a combining accent character
462 is either completely included or completely excluded from the returned
463 string. See UAX #29 at http://unicode.org/reports/tr29/ for more
464 information on grapheme clusters.
466 A null ENCODING is treated as UTF-8.
469 utf8_encoding_trunc (const char *s, const char *encoding, size_t max_len)
471 return utf8_encoding_concat (s, "", encoding, max_len);
474 /* Returns the length, in bytes, of the string that would be returned by
475 utf8_encoding_trunc() if passed the same arguments, but the implementation
476 is often more efficient. */
478 utf8_encoding_trunc_len (const char *s, const char *encoding, size_t max_len)
480 return utf8_encoding_concat_len (s, "", encoding, max_len);
483 /* Returns FILENAME converted from UTF-8 to the filename encoding.
484 On Windows the filename encoding is UTF-8; elsewhere it is based on the
487 utf8_to_filename (const char *filename)
489 return recode_string (filename_encoding (), "UTF-8", filename, -1);
492 /* Returns FILENAME converted from the filename encoding to UTF-8.
493 On Windows the filename encoding is UTF-8; elsewhere it is based on the
496 filename_to_utf8 (const char *filename)
498 return recode_string ("UTF-8", filename_encoding (), filename, -1);
501 /* Converts the string TEXT, which should be encoded in FROM-encoding, to a
502 dynamically allocated string in TO-encoding. Any characters which cannot be
503 converted will be represented by '?'.
505 The returned string will be null-terminated and allocated on POOL with
508 This function's behaviour differs from that of g_convert_with_fallback
509 provided by GLib. The GLib function will fail (returns NULL) if any part of
510 the input string is not valid in the declared input encoding. This function
511 however perseveres even in the presence of badly encoded input. */
513 recode_substring_pool (const char *to, const char *from,
514 struct substring text, struct pool *pool)
516 size_t outbufferlength;
520 to = default_encoding;
523 from = default_encoding;
525 conv = create_iconv (to, from);
527 if ( (iconv_t) -1 == conv )
529 struct substring out;
531 out.string = pool_malloc (pool, text.length + 1);
532 out.length = text.length;
533 memcpy (out.string, text.string, text.length);
534 out.string[out.length] = '\0';
538 for ( outbufferlength = 1 ; outbufferlength != 0; outbufferlength <<= 1 )
539 if ( outbufferlength > text.length)
541 char *output = pool_malloc (pool, outbufferlength);
542 ssize_t output_len = try_recode (conv, text.string, text.length,
543 output, outbufferlength);
545 return ss_buffer (output, output_len);
546 pool_free (pool, output);
555 setlocale (LC_ALL, "");
556 bindtextdomain (PACKAGE, relocate(locale_dir));
557 textdomain (PACKAGE);
559 assert (default_encoding == NULL);
560 default_encoding = xstrdup (locale_charset ());
566 get_default_encoding (void)
568 return default_encoding;
572 set_default_encoding (const char *enc)
574 free (default_encoding);
575 default_encoding = xstrdup (enc);
579 /* Attempts to set the encoding from a locale name
580 returns true if successfull.
581 This function does not (should not!) alter the current locale.
584 set_encoding_from_locale (const char *loc)
589 char *tmp = xstrdup (setlocale (LC_CTYPE, NULL));
591 setlocale (LC_CTYPE, "C");
592 c_encoding = xstrdup (locale_charset ());
594 setlocale (LC_CTYPE, loc);
595 loc_encoding = xstrdup (locale_charset ());
598 if ( 0 == strcmp (loc_encoding, c_encoding))
603 setlocale (LC_CTYPE, tmp);
609 free (default_encoding);
610 default_encoding = loc_encoding;
623 struct hmapx_node *node;
624 struct converter *cvtr;
626 HMAPX_FOR_EACH (cvtr, node, &map)
629 free (cvtr->fromcode);
630 if (cvtr->conv != (iconv_t) -1)
631 iconv_close (cvtr->conv);
635 hmapx_destroy (&map);
637 free (default_encoding);
638 default_encoding = NULL;
644 valid_encoding (const char *enc)
646 iconv_t conv = iconv_open (UTF8, enc);
648 if ( conv == (iconv_t) -1)
657 /* Return the system local's idea of the
658 decimal seperator character */
660 get_system_decimal (void)
665 radix_char = nl_langinfo (RADIXCHAR)[0];
669 snprintf (buf, sizeof buf, "%f", 2.5);
678 uc_name (ucs4_t uc, char buffer[16])
680 if (uc >= 0x20 && uc < 0x7f)
681 snprintf (buffer, 16, "`%c'", uc);
683 snprintf (buffer, 16, "U+%04X", uc);
687 /* UTF-8 functions that deal with uppercase/lowercase distinctions. */
689 /* Returns a hash value for the N bytes of UTF-8 encoded data starting at S,
690 with lowercase and uppercase letters treated as equal, starting from
693 utf8_hash_case_bytes (const char *s, size_t n, unsigned int basis)
695 uint8_t folded_buf[2048];
696 size_t folded_len = sizeof folded_buf;
700 folded_s = u8_casefold (CHAR_CAST (const uint8_t *, s), n,
701 NULL, UNINORM_NFKD, folded_buf, &folded_len);
702 if (folded_s != NULL)
704 hash = hash_bytes (folded_s, folded_len, basis);
705 if (folded_s != folded_buf)
712 hash = hash_bytes (s, n, basis);
718 /* Returns a hash value for null-terminated UTF-8 string S, with lowercase and
719 uppercase letters treated as equal, starting from BASIS. */
721 utf8_hash_case_string (const char *s, unsigned int basis)
723 return utf8_hash_case_bytes (s, strlen (s), basis);
726 /* Compares UTF-8 strings A and B case-insensitively.
727 Returns a negative value if A < B, zero if A == B, positive if A > B. */
729 utf8_strcasecmp (const char *a, const char *b)
731 return utf8_strncasecmp (a, strlen (a), b, strlen (b));
734 /* Compares UTF-8 strings A (with length AN) and B (with length BN)
736 Returns a negative value if A < B, zero if A == B, positive if A > B. */
738 utf8_strncasecmp (const char *a, size_t an, const char *b, size_t bn)
742 if (u8_casecmp (CHAR_CAST (const uint8_t *, a), an,
743 CHAR_CAST (const uint8_t *, b), bn,
744 NULL, UNINORM_NFKD, &result))
749 result = memcmp (a, b, MIN (an, bn));
751 result = an < bn ? -1 : an > bn;
758 utf8_casemap (const char *s,
759 uint8_t *(*f) (const uint8_t *, size_t, const char *, uninorm_t,
760 uint8_t *, size_t *))
765 result = CHAR_CAST (char *,
766 f (CHAR_CAST (const uint8_t *, s), strlen (s) + 1,
767 NULL, NULL, NULL, &size));
773 result = xstrdup (s);
779 utf8_to_upper (const char *s)
781 return utf8_casemap (s, u8_toupper);
785 utf8_to_lower (const char *s)
787 return utf8_casemap (s, u8_tolower);
791 get_encoding_info (struct encoding_info *e, const char *name)
793 const struct substring in = SS_LITERAL_INITIALIZER (
795 "!\"#$%&'()*+,-./0123456789:;<=>?@"
796 "ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`"
797 "abcdefghijklmnopqrstuvwxyz{|}~");
799 struct substring out, cr, lf, space;
802 memset (e, 0, sizeof *e);
804 cr = recode_substring_pool (name, "UTF-8", ss_cstr ("\r"), NULL);
805 lf = recode_substring_pool (name, "UTF-8", ss_cstr ("\n"), NULL);
806 space = recode_substring_pool (name, "UTF-8", ss_cstr (" "), NULL);
808 && cr.length <= MAX_UNIT
809 && cr.length == lf.length
810 && cr.length == space.length);
813 fprintf (stderr, "warning: encoding `%s' is not supported.\n", name);
817 ss_alloc_substring (&cr, ss_cstr ("\r"));
818 ss_alloc_substring (&lf, ss_cstr ("\n"));
819 ss_alloc_substring (&space, ss_cstr (" "));
823 memcpy (e->cr, cr.string, e->unit);
824 memcpy (e->lf, lf.string, e->unit);
825 memcpy (e->space, space.string, e->unit);
831 out = recode_substring_pool ("UTF-8", name, in, NULL);
832 e->is_ascii_compatible = ss_equals (in, out);
835 if (!e->is_ascii_compatible && e->unit == 1)
837 out = recode_substring_pool ("UTF-8", name, ss_cstr ("A"), NULL);
838 e->is_ebcdic_compatible = (out.length == 1
839 && (uint8_t) out.string[0] == 0xc1);
843 e->is_ebcdic_compatible = false;
849 is_encoding_ascii_compatible (const char *encoding)
851 struct encoding_info e;
853 get_encoding_info (&e, encoding);
854 return e.is_ascii_compatible;
858 is_encoding_ebcdic_compatible (const char *encoding)
860 struct encoding_info e;
862 get_encoding_info (&e, encoding);
863 return e.is_ebcdic_compatible;
866 /* Returns true if iconv can convert ENCODING to and from UTF-8,
869 is_encoding_supported (const char *encoding)
871 return (create_iconv__ ("UTF-8", encoding)->conv != (iconv_t) -1
872 && create_iconv__ (encoding, "UTF-8")->conv != (iconv_t) -1);
875 /* Returns true if E is the name of a UTF-8 encoding.
877 XXX Possibly we should test not E as a string but its properties via
880 is_encoding_utf8 (const char *e)
882 return ((e[0] == 'u' || e[0] == 'U')
883 && (e[1] == 't' || e[1] == 'T')
884 && (e[2] == 'f' || e[2] == 'F')
885 && ((e[3] == '8' && e[4] == '\0')
886 || (e[3] == '-' && e[4] == '8' && e[5] == '\0')));
889 static struct encoding_category *categories;
890 static int n_categories;
892 static void SENTINEL (0)
893 add_category (size_t *allocated_categories, const char *category, ...)
895 struct encoding_category *c;
896 const char *encodings[16];
900 /* Count encoding arguments. */
901 va_start (args, category);
903 while ((encodings[n] = va_arg (args, const char *)) != NULL)
905 const char *encoding = encodings[n];
906 if (!strcmp (encoding, "Auto") || is_encoding_supported (encoding))
909 assert (n < sizeof encodings / sizeof *encodings);
915 if (n_categories >= *allocated_categories)
916 categories = x2nrealloc (categories,
917 allocated_categories, sizeof *categories);
919 c = &categories[n_categories++];
920 c->category = category;
921 c->encodings = xmalloc (n * sizeof *c->encodings);
922 for (i = 0; i < n; i++)
923 c->encodings[i] = encodings[i];
928 init_encoding_categories (void)
938 add_category (&alloc, "Unicode", "UTF-8", "UTF-16", "UTF-16BE", "UTF-16LE",
939 "UTF-32", "UTF-32BE", "UTF-32LE", NULL_SENTINEL);
940 add_category (&alloc, _("Arabic"), "IBM864", "ISO-8859-6", "Windows-1256",
942 add_category (&alloc, _("Armenian"), "ARMSCII-8", NULL_SENTINEL);
943 add_category (&alloc, _("Baltic"), "ISO-8859-13", "ISO-8859-4",
944 "Windows-1257", NULL_SENTINEL);
945 add_category (&alloc, _("Celtic"), "ISO-8859-14", NULL_SENTINEL);
946 add_category (&alloc, _("Central European"), "IBM852", "ISO-8859-2",
947 "Mac-CentralEurope", "Windows-1250", NULL_SENTINEL);
948 add_category (&alloc, _("Chinese Simplified"), "GB18030", "GB2312", "GBK",
949 "HZ-GB-2312", "ISO-2022-CN", NULL_SENTINEL);
950 add_category (&alloc, _("Chinese Traditional"), "Big5", "Big5-HKSCS",
951 "EUC-TW", NULL_SENTINEL);
952 add_category (&alloc, _("Croatian"), "MacCroatian", NULL_SENTINEL);
953 add_category (&alloc, _("Cyrillic"), "IBM855", "ISO-8859-5", "ISO-IR-111",
954 "KOI8-R", "MacCyrillic", NULL_SENTINEL);
955 add_category (&alloc, _("Cyrillic/Russian"), "IBM866", NULL_SENTINEL);
956 add_category (&alloc, _("Cyrillic/Ukrainian"), "KOI8-U", "MacUkrainian",
958 add_category (&alloc, _("Georgian"), "GEOSTD8", NULL_SENTINEL);
959 add_category (&alloc, _("Greek"), "ISO-8859-7", "MacGreek", NULL_SENTINEL);
960 add_category (&alloc, _("Gujarati"), "MacGujarati", NULL_SENTINEL);
961 add_category (&alloc, _("Gurmukhi"), "MacGurmukhi", NULL_SENTINEL);
962 add_category (&alloc, _("Hebrew"), "IBM862", "ISO-8859-8-I", "Windows-1255",
964 add_category (&alloc, _("Hebrew Visual"), "ISO-8859-8", NULL_SENTINEL);
965 add_category (&alloc, _("Hindi"), "MacDevangari", NULL_SENTINEL);
966 add_category (&alloc, _("Icelandic"), "MacIcelandic", NULL_SENTINEL);
967 add_category (&alloc, _("Japanese"), "EUC-JP", "ISO-2022-JP", "Shift_JIS",
969 add_category (&alloc, _("Korean"), "EUC-KR", "ISO-2022-KR", "JOHAB", "UHC",
971 add_category (&alloc, _("Nordic"), "ISO-8859-10", NULL_SENTINEL);
972 add_category (&alloc, _("Romanian"), "ISO-8859-16", "MacRomanian",
974 add_category (&alloc, _("South European"), "ISO-8859-3", NULL_SENTINEL);
975 add_category (&alloc, _("Thai"), "ISO-8859-11", "TIS-620", "Windows-874",
977 add_category (&alloc, _("Turkish"), "IBM857", "ISO-8859-9", "Windows-1254",
979 add_category (&alloc, _("Vietnamese"), "TVCN", "VISCII", "VPS",
980 "Windows-1258", NULL_SENTINEL);
981 add_category (&alloc, _("Western European"), "ISO-8859-1", "ISO-8859-15",
982 "Windows-1252", "IBM850", "MacRoman", NULL_SENTINEL);
985 /* Returns an array of "struct encoding_category" that contains only the
986 categories and encodings that the system supports. */
987 struct encoding_category *
988 get_encoding_categories (void)
990 init_encoding_categories ();
994 /* Returns the number of elements in the array returned by
995 get_encoding_categories(). */
997 get_n_encoding_categories (void)
999 init_encoding_categories ();
1000 return n_categories;