pintos-os.org Git - pspp/blob - src/libpspp/i18n.c

   1 /* PSPP - a program for statistical analysis.
   2    Copyright (C) 2006, 2009, 2010, 2011 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  16
  17 #include <config.h>
  18
  19 #include "libpspp/i18n.h"
  20
  21 #include <assert.h>
  22 #include <errno.h>
  23 #include <iconv.h>
  24 #include <langinfo.h>
  25 #include <libintl.h>
  26 #include <locale.h>
  27 #include <stdio.h>
  28 #include <stdlib.h>
  29 #include <string.h>
  30 #include <unigbrk.h>
  31
  32 #include "libpspp/assertion.h"
  33 #include "libpspp/hmapx.h"
  34 #include "libpspp/hash-functions.h"
  35 #include "libpspp/pool.h"
  36 #include "libpspp/str.h"
  37 #include "libpspp/version.h"
  38
  39 #include "gl/c-strcase.h"
  40 #include "gl/localcharset.h"
  41 #include "gl/xalloc.h"
  42 #include "gl/relocatable.h"
  43 #include "gl/xstrndup.h"
  44
  45 struct converter
  46  {
  47     char *tocode;
  48     char *fromcode;
  49     iconv_t conv;
  50     int error;
  51   };
  52
  53 static char *default_encoding;
  54 static struct hmapx map;
  55
  56 /* A wrapper around iconv_open */
  57 static struct converter *
  58 create_iconv__ (const char* tocode, const char* fromcode)
  59 {
  60   size_t hash;
  61   struct hmapx_node *node;
  62   struct converter *converter;
  63   assert (fromcode);
  64
  65   hash = hash_string (tocode, hash_string (fromcode, 0));
  66   HMAPX_FOR_EACH_WITH_HASH (converter, node, hash, &map)
  67     if (!strcmp (tocode, converter->tocode)
  68         && !strcmp (fromcode, converter->fromcode))
  69       return converter;
  70
  71   converter = xmalloc (sizeof *converter);
  72   converter->tocode = xstrdup (tocode);
  73   converter->fromcode = xstrdup (fromcode);
  74   converter->conv = iconv_open (tocode, fromcode);
  75   converter->error = converter->conv == (iconv_t) -1 ? errno : 0;
  76   hmapx_insert (&map, converter, hash);
  77
  78   return converter;
  79 }
  80
  81 static iconv_t
  82 create_iconv (const char* tocode, const char* fromcode)
  83 {
  84   struct converter *converter;
  85
  86   converter = create_iconv__ (tocode, fromcode);
  87
  88   /* I don't think it's safe to translate this string or to use messaging
  89      as the converters have not yet been set up */
  90   if (converter->error && strcmp (tocode, fromcode))
  91     {
  92       fprintf (stderr,
  93                "Warning: "
  94                "cannot create a converter for `%s' to `%s': %s\n",
  95                fromcode, tocode, strerror (converter->error));
  96       converter->error = 0;
  97     }
  98
  99   return converter->conv;
 100 }
 101
 102 /* Converts the single byte C from encoding FROM to TO, returning the first
 103    byte of the result.
 104
 105    This function probably shouldn't be used at all, but some code still does
 106    use it. */
 107 char
 108 recode_byte (const char *to, const char *from, char c)
 109 {
 110   char x;
 111   char *s = recode_string (to, from, &c, 1);
 112   x = s[0];
 113   free (s);
 114   return x;
 115 }
 116
 117 /* Similar to recode_string_pool, but allocates the returned value on the heap
 118    instead of in a pool.  It is the caller's responsibility to free the
 119    returned value. */
 120 char *
 121 recode_string (const char *to, const char *from,
 122                const char *text, int length)
 123 {
 124   return recode_string_pool (to, from, text, length, NULL);
 125 }
 126
 127 /* Returns the length, in bytes, of the string that a similar recode_string()
 128    call would return. */
 129 size_t
 130 recode_string_len (const char *to, const char *from,
 131                    const char *text, int length)
 132 {
 133   char *s = recode_string (to, from, text, length);
 134   size_t len = strlen (s);
 135   free (s);
 136   return len;
 137 }
 138
 139 /* Uses CONV to convert the INBYTES starting at IP into the OUTBYTES starting
 140    at OP, and appends a null terminator to the output.
 141
 142    Returns the output length if successful, -1 if the output buffer is too
 143    small. */
 144 static ssize_t
 145 try_recode (iconv_t conv,
 146             const char *ip, size_t inbytes,
 147             char *op_, size_t outbytes)
 148 {
 149   /* FIXME: Need to ensure that this char is valid in the target encoding */
 150   const char fallbackchar = '?';
 151   char *op = op_;
 152
 153   /* Put the converter into the initial shift state, in case there was any
 154      state information left over from its last usage. */
 155   iconv (conv, NULL, 0, NULL, 0);
 156
 157   while (iconv (conv, (ICONV_CONST char **) &ip, &inbytes,
 158                 &op, &outbytes) == -1)
 159     switch (errno)
 160       {
 161       case EINVAL:
 162         if (outbytes < 2)
 163           return -1;
 164         *op++ = fallbackchar;
 165         *op = '\0';
 166         return op - op_;
 167
 168       case EILSEQ:
 169         if (outbytes == 0)
 170           return -1;
 171         *op++ = fallbackchar;
 172         outbytes--;
 173         ip++;
 174         inbytes--;
 175         break;
 176
 177       case E2BIG:
 178         return -1;
 179
 180       default:
 181         /* should never happen */
 182         fprintf (stderr, "Character conversion error: %s\n", strerror (errno));
 183         NOT_REACHED ();
 184         break;
 185       }
 186
 187   if (outbytes == 0)
 188     return -1;
 189
 190   *op = '\0';
 191   return op - op_;
 192 }
 193
 194 /* Converts the string TEXT, which should be encoded in FROM-encoding, to a
 195    dynamically allocated string in TO-encoding.  Any characters which cannot be
 196    converted will be represented by '?'.
 197
 198    LENGTH should be the length of the string or -1, if null terminated.
 199
 200    The returned string will be allocated on POOL.
 201
 202    This function's behaviour differs from that of g_convert_with_fallback
 203    provided by GLib.  The GLib function will fail (returns NULL) if any part of
 204    the input string is not valid in the declared input encoding.  This function
 205    however perseveres even in the presence of badly encoded input. */
 206 char *
 207 recode_string_pool (const char *to, const char *from,
 208                     const char *text, int length, struct pool *pool)
 209 {
 210   struct substring out;
 211
 212   if ( text == NULL )
 213     return NULL;
 214
 215   if ( length == -1 )
 216      length = strlen (text);
 217
 218   out = recode_substring_pool (to, from, ss_buffer (text, length), pool);
 219   return out.string;
 220 }
 221
 222 /* Returns the name of the encoding that should be used for file names.
 223
 224    This is meant to be the same encoding used by g_filename_from_uri() and
 225    g_filename_to_uri() in GLib. */
 226 static const char *
 227 filename_encoding (void)
 228 {
 229 #if defined _WIN32 || defined __WIN32__
 230   return "UTF-8";
 231 #else
 232   return locale_charset ();
 233 #endif
 234 }
 235
 236 static char *
 237 xconcat2 (const char *a, size_t a_len,
 238           const char *b, size_t b_len)
 239 {
 240   char *s = xmalloc (a_len + b_len + 1);
 241   memcpy (s, a, a_len);
 242   memcpy (s + a_len, b, b_len);
 243   s[a_len + b_len] = '\0';
 244   return s;
 245 }
 246
 247 /* Conceptually, this function concatenates HEAD_LEN-byte string HEAD and
 248    TAIL_LEN-byte string TAIL, both encoded in UTF-8, then converts them to
 249    ENCODING.  If the re-encoded result is no more than MAX_LEN bytes long, then
 250    it returns HEAD_LEN.  Otherwise, it drops one character[*] from the end of
 251    HEAD and tries again, repeating as necessary until the concatenated result
 252    fits or until HEAD_LEN reaches 0.
 253
 254    [*] Actually this function drops grapheme clusters instead of characters, so
 255        that, e.g. a Unicode character followed by a combining accent character
 256        is either completely included or completely excluded from HEAD_LEN.  See
 257        UAX #29 at http://unicode.org/reports/tr29/ for more information on
 258        grapheme clusters.
 259
 260    A null ENCODING is treated as UTF-8.
 261
 262    Sometimes this function has to actually construct the concatenated string to
 263    measure its length.  When this happens, it sets *RESULTP to that
 264    null-terminated string, allocated with malloc(), for the caller to use if it
 265    needs it.  Otherwise, it sets *RESULTP to NULL.
 266
 267    Simple examples for encoding="UTF-8", max_len=6:
 268
 269        head="abc",  tail="xyz"     => 3
 270        head="abcd", tail="xyz"     => 3 ("d" dropped).
 271        head="abc",  tail="uvwxyz"  => 0 ("abc" dropped).
 272        head="abc",  tail="tuvwxyz" => 0 ("abc" dropped).
 273
 274    Examples for encoding="ISO-8859-1", max_len=6:
 275
 276        head="éèä",  tail="xyz"     => 6
 277          (each letter in head is only 1 byte in ISO-8859-1 even though they
 278           each take 2 bytes in UTF-8 encoding)
 279 */
 280 static size_t
 281 utf8_encoding_concat__ (const char *head, size_t head_len,
 282                         const char *tail, size_t tail_len,
 283                         const char *encoding, size_t max_len,
 284                         char **resultp)
 285 {
 286   *resultp = NULL;
 287   if (head_len == 0)
 288     return 0;
 289   else if (encoding == NULL || !c_strcasecmp (encoding, "UTF-8"))
 290     {
 291       if (head_len + tail_len <= max_len)
 292         return head_len;
 293       else if (tail_len >= max_len)
 294         return 0;
 295       else
 296         {
 297           size_t copy_len;
 298           ucs4_t prev;
 299           size_t ofs;
 300           int mblen;
 301
 302           copy_len = 0;
 303           for (ofs = u8_mbtouc (&prev, CHAR_CAST (const uint8_t *, head),
 304                                 head_len);
 305                ofs <= max_len - tail_len;
 306                ofs += mblen)
 307             {
 308               ucs4_t next;
 309
 310               mblen = u8_mbtouc (&next,
 311                                  CHAR_CAST (const uint8_t *, head + ofs),
 312                                  head_len - ofs);
 313               if (uc_is_grapheme_break (prev, next))
 314                 copy_len = ofs;
 315
 316               prev = next;
 317             }
 318
 319           return copy_len;
 320         }
 321     }
 322   else
 323     {
 324       char *result;
 325
 326       result = (tail_len > 0
 327                 ? xconcat2 (head, head_len, tail, tail_len)
 328                 : CONST_CAST (char *, head));
 329       if (recode_string_len (encoding, "UTF-8", result,
 330                              head_len + tail_len) <= max_len)
 331         {
 332           *resultp = result != head ? result : NULL;
 333           return head_len;
 334         }
 335       else
 336         {
 337           bool correct_result = false;
 338           size_t copy_len;
 339           ucs4_t prev;
 340           size_t ofs;
 341           int mblen;
 342
 343           copy_len = 0;
 344           for (ofs = u8_mbtouc (&prev, CHAR_CAST (const uint8_t *, head),
 345                                 head_len);
 346                ofs <= head_len;
 347                ofs += mblen)
 348             {
 349               ucs4_t next;
 350
 351               mblen = u8_mbtouc (&next,
 352                                  CHAR_CAST (const uint8_t *, head + ofs),
 353                                  head_len - ofs);
 354               if (uc_is_grapheme_break (prev, next))
 355                 {
 356                   if (result != head)
 357                     {
 358                       memcpy (result, head, ofs);
 359                       memcpy (result + ofs, tail, tail_len);
 360                       result[ofs + tail_len] = '\0';
 361                     }
 362
 363                   if (recode_string_len (encoding, "UTF-8", result,
 364                                          ofs + tail_len) <= max_len)
 365                     {
 366                       correct_result = true;
 367                       copy_len = ofs;
 368                     }
 369                   else
 370                     correct_result = false;
 371                 }
 372
 373               prev = next;
 374             }
 375
 376           if (result != head)
 377             {
 378               if (correct_result)
 379                 *resultp = result;
 380               else
 381                 free (result);
 382             }
 383
 384           return copy_len;
 385         }
 386     }
 387 }
 388
 389 /* Concatenates a prefix of HEAD with all of TAIL and returns the result as a
 390    null-terminated string owned by the caller.  HEAD, TAIL, and the returned
 391    string are all encoded in UTF-8.  As many characters[*] from the beginning
 392    of HEAD are included as will fit within MAX_LEN bytes supposing that the
 393    resulting string were to be re-encoded in ENCODING.  All of TAIL is always
 394    included, even if TAIL by itself is longer than MAX_LEN in ENCODING.
 395
 396    [*] Actually this function drops grapheme clusters instead of characters, so
 397        that, e.g. a Unicode character followed by a combining accent character
 398        is either completely included or completely excluded from the returned
 399        string.  See UAX #29 at http://unicode.org/reports/tr29/ for more
 400        information on grapheme clusters.
 401
 402    A null ENCODING is treated as UTF-8.
 403
 404    Simple examples for encoding="UTF-8", max_len=6:
 405
 406        head="abc",  tail="xyz"     => "abcxyz"
 407        head="abcd", tail="xyz"     => "abcxyz"
 408        head="abc",  tail="uvwxyz"  => "uvwxyz"
 409        head="abc",  tail="tuvwxyz" => "tuvwxyz"
 410
 411    Examples for encoding="ISO-8859-1", max_len=6:
 412
 413        head="éèä",  tail="xyz"    => "éèäxyz"
 414          (each letter in HEAD is only 1 byte in ISO-8859-1 even though they
 415           each take 2 bytes in UTF-8 encoding)
 416 */
 417 char *
 418 utf8_encoding_concat (const char *head, const char *tail,
 419                       const char *encoding, size_t max_len)
 420 {
 421   size_t tail_len = strlen (tail);
 422   size_t prefix_len;
 423   char *result;
 424
 425   prefix_len = utf8_encoding_concat__ (head, strlen (head), tail, tail_len,
 426                                        encoding, max_len, &result);
 427   return (result != NULL
 428           ? result
 429           : xconcat2 (head, prefix_len, tail, tail_len));
 430 }
 431
 432 /* Returns the length, in bytes, of the string that would be returned by
 433    utf8_encoding_concat() if passed the same arguments, but the implementation
 434    is often more efficient. */
 435 size_t
 436 utf8_encoding_concat_len (const char *head, const char *tail,
 437                           const char *encoding, size_t max_len)
 438 {
 439   size_t tail_len = strlen (tail);
 440   size_t prefix_len;
 441   char *result;
 442
 443   prefix_len = utf8_encoding_concat__ (head, strlen (head), tail, tail_len,
 444                                        encoding, max_len, &result);
 445   free (result);
 446   return prefix_len + tail_len;
 447 }
 448
 449 /* Returns an allocated, null-terminated string, owned by the caller,
 450    containing as many characters[*] from the beginning of S that would fit
 451    within MAX_LEN bytes if the returned string were to be re-encoded in
 452    ENCODING.  Both S and the returned string are encoded in UTF-8.
 453
 454    [*] Actually this function drops grapheme clusters instead of characters, so
 455        that, e.g. a Unicode character followed by a combining accent character
 456        is either completely included or completely excluded from the returned
 457        string.  See UAX #29 at http://unicode.org/reports/tr29/ for more
 458        information on grapheme clusters.
 459
 460    A null ENCODING is treated as UTF-8.
 461 */
 462 char *
 463 utf8_encoding_trunc (const char *s, const char *encoding, size_t max_len)
 464 {
 465   return utf8_encoding_concat (s, "", encoding, max_len);
 466 }
 467
 468 /* Returns the length, in bytes, of the string that would be returned by
 469    utf8_encoding_trunc() if passed the same arguments, but the implementation
 470    is often more efficient. */
 471 size_t
 472 utf8_encoding_trunc_len (const char *s, const char *encoding, size_t max_len)
 473 {
 474   return utf8_encoding_concat_len (s, "", encoding, max_len);
 475 }
 476
 477 /* Returns FILENAME converted from UTF-8 to the filename encoding.
 478    On Windows the filename encoding is UTF-8; elsewhere it is based on the
 479    current locale. */
 480 char *
 481 utf8_to_filename (const char *filename)
 482 {
 483   return recode_string (filename_encoding (), "UTF-8", filename, -1);
 484 }
 485
 486 /* Returns FILENAME converted from the filename encoding to UTF-8.
 487    On Windows the filename encoding is UTF-8; elsewhere it is based on the
 488    current locale. */
 489 char *
 490 filename_to_utf8 (const char *filename)
 491 {
 492   return recode_string ("UTF-8", filename_encoding (), filename, -1);
 493 }
 494
 495 /* Converts the string TEXT, which should be encoded in FROM-encoding, to a
 496    dynamically allocated string in TO-encoding.  Any characters which cannot be
 497    converted will be represented by '?'.
 498
 499    The returned string will be null-terminated and allocated on POOL.
 500
 501    This function's behaviour differs from that of g_convert_with_fallback
 502    provided by GLib.  The GLib function will fail (returns NULL) if any part of
 503    the input string is not valid in the declared input encoding.  This function
 504    however perseveres even in the presence of badly encoded input. */
 505 struct substring
 506 recode_substring_pool (const char *to, const char *from,
 507                        struct substring text, struct pool *pool)
 508 {
 509   size_t outbufferlength;
 510   iconv_t conv ;
 511
 512   if (to == NULL)
 513     to = default_encoding;
 514
 515   if (from == NULL)
 516     from = default_encoding;
 517
 518   conv = create_iconv (to, from);
 519
 520   if ( (iconv_t) -1 == conv )
 521     {
 522       struct substring out;
 523       ss_alloc_substring_pool (&out, text, pool);
 524       return out;
 525     }
 526
 527   for ( outbufferlength = 1 ; outbufferlength != 0; outbufferlength <<= 1 )
 528     if ( outbufferlength > text.length)
 529       {
 530         char *output = pool_malloc (pool, outbufferlength);
 531         ssize_t output_len = try_recode (conv, text.string, text.length,
 532                                          output, outbufferlength);
 533         if (output_len >= 0)
 534           return ss_buffer (output, output_len);
 535         pool_free (pool, output);
 536       }
 537
 538   NOT_REACHED ();
 539 }
 540
 541 void
 542 i18n_init (void)
 543 {
 544   setlocale (LC_CTYPE, "");
 545   setlocale (LC_MESSAGES, "");
 546 #if HAVE_LC_PAPER
 547   setlocale (LC_PAPER, "");
 548 #endif
 549   bindtextdomain (PACKAGE, relocate(locale_dir));
 550   textdomain (PACKAGE);
 551
 552   assert (default_encoding == NULL);
 553   default_encoding = xstrdup (locale_charset ());
 554
 555   hmapx_init (&map);
 556 }
 557
 558 const char *
 559 get_default_encoding (void)
 560 {
 561   return default_encoding;
 562 }
 563
 564 void
 565 set_default_encoding (const char *enc)
 566 {
 567   free (default_encoding);
 568   default_encoding = xstrdup (enc);
 569 }
 570
 571
 572 /* Attempts to set the encoding from a locale name
 573    returns true if successfull.
 574    This function does not (should not!) alter the current locale.
 575 */
 576 bool
 577 set_encoding_from_locale (const char *loc)
 578 {
 579   bool ok = true;
 580   char *c_encoding;
 581   char *loc_encoding;
 582   char *tmp = xstrdup (setlocale (LC_CTYPE, NULL));
 583
 584   setlocale (LC_CTYPE, "C");
 585   c_encoding = xstrdup (locale_charset ());
 586
 587   setlocale (LC_CTYPE, loc);
 588   loc_encoding = xstrdup (locale_charset ());
 589
 590
 591   if ( 0 == strcmp (loc_encoding, c_encoding))
 592     {
 593       ok = false;
 594     }
 595
 596
 597   setlocale (LC_CTYPE, tmp);
 598
 599   free (tmp);
 600
 601   if (ok)
 602     {
 603       free (default_encoding);
 604       default_encoding = loc_encoding;
 605     }
 606   else
 607     free (loc_encoding);
 608
 609   free (c_encoding);
 610
 611   return ok;
 612 }
 613
 614 void
 615 i18n_done (void)
 616 {
 617   struct hmapx_node *node;
 618   struct converter *cvtr;
 619
 620   HMAPX_FOR_EACH (cvtr, node, &map)
 621     {
 622       free (cvtr->tocode);
 623       free (cvtr->fromcode);
 624       if (cvtr->conv != (iconv_t) -1)
 625         iconv_close (cvtr->conv);
 626       free (cvtr);
 627     }
 628
 629   hmapx_destroy (&map);
 630
 631   free (default_encoding);
 632   default_encoding = NULL;
 633 }
 634
 635
 636
 637 bool
 638 valid_encoding (const char *enc)
 639 {
 640   iconv_t conv = iconv_open (UTF8, enc);
 641
 642   if ( conv == (iconv_t) -1)
 643     return false;
 644
 645   iconv_close (conv);
 646
 647   return true;
 648 }
 649
 650
 651 /* Return the system local's idea of the
 652    decimal seperator character */
 653 char
 654 get_system_decimal (void)
 655 {
 656   char radix_char;
 657
 658   char *ol = xstrdup (setlocale (LC_NUMERIC, NULL));
 659   setlocale (LC_NUMERIC, "");
 660
 661 #if HAVE_NL_LANGINFO
 662   radix_char = nl_langinfo (RADIXCHAR)[0];
 663 #else
 664   {
 665     char buf[10];
 666     snprintf (buf, sizeof buf, "%f", 2.5);
 667     radix_char = buf[1];
 668   }
 669 #endif
 670
 671   /* We MUST leave LC_NUMERIC untouched, since it would
 672      otherwise interfere with data_{in,out} */
 673   setlocale (LC_NUMERIC, ol);
 674   free (ol);
 675   return radix_char;
 676 }
 677
 678 const char *
 679 uc_name (ucs4_t uc, char buffer[16])
 680 {
 681   if (uc >= 0x20 && uc < 0x7f)
 682     snprintf (buffer, 16, "`%c'", uc);
 683   else
 684     snprintf (buffer, 16, "U+%04X", uc);
 685   return buffer;
 686 }
 687 \f
 688 bool
 689 get_encoding_info (struct encoding_info *e, const char *name)
 690 {
 691   const struct substring in = SS_LITERAL_INITIALIZER (
 692     "\t\n\v\f\r "
 693     "!\"#$%&'()*+,-./0123456789:;<=>?@"
 694     "ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`"
 695     "abcdefghijklmnopqrstuvwxyz{|}~");
 696
 697   struct substring out, cr, lf;
 698   bool ok;
 699
 700   memset (e, 0, sizeof *e);
 701
 702   cr = recode_substring_pool (name, "UTF-8", ss_cstr ("\r"), NULL);
 703   lf = recode_substring_pool (name, "UTF-8", ss_cstr ("\n"), NULL);
 704   ok = cr.length >= 1 && cr.length <= MAX_UNIT && cr.length == lf.length;
 705   if (!ok)
 706     {
 707       fprintf (stderr, "warning: encoding `%s' is not supported.\n", name);
 708       ss_dealloc (&cr);
 709       ss_dealloc (&lf);
 710       ss_alloc_substring (&cr, ss_cstr ("\r"));
 711       ss_alloc_substring (&lf, ss_cstr ("\n"));
 712     }
 713
 714   e->unit = cr.length;
 715   memcpy (e->cr, cr.string, e->unit);
 716   memcpy (e->lf, lf.string, e->unit);
 717
 718   ss_dealloc (&cr);
 719   ss_dealloc (&lf);
 720
 721   out = recode_substring_pool ("UTF-8", name, in, NULL);
 722   e->is_ascii_compatible = ss_equals (in, out);
 723   ss_dealloc (&out);
 724
 725   return ok;
 726 }
 727
 728 bool
 729 is_encoding_ascii_compatible (const char *encoding)
 730 {
 731   struct encoding_info e;
 732
 733   get_encoding_info (&e, encoding);
 734   return e.is_ascii_compatible;
 735 }
 736
 737 /* Returns true if iconv can convert ENCODING to and from UTF-8,
 738    otherwise false. */
 739 bool
 740 is_encoding_supported (const char *encoding)
 741 {
 742   return (create_iconv__ ("UTF-8", encoding)->conv != (iconv_t) -1
 743           && create_iconv__ (encoding, "UTF-8")->conv != (iconv_t) -1);
 744 }