1 /* Unicode character output to streams with locale dependent encoding.
3 Copyright (C) 2000 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU Library General Public License as published
7 by the Free Software Foundation; either version 2, or (at your option)
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
20 /* Written by Bruno Haible <haible@clisp.cons.org>. */
46 # define _(Text) gettext (Text)
51 #include "unicodeio.h"
53 /* When we pass a Unicode character to iconv(), we must pass it in a
54 suitable encoding. The standardized Unicode encodings are
55 UTF-8, UCS-2, UCS-4, UTF-16, UTF-16BE, UTF-16LE, UTF-7.
56 UCS-2 supports only characters up to \U0000FFFF.
57 UTF-16 and variants support only characters up to \U0010FFFF.
58 UTF-7 is way too complex and not supported by glibc-2.1.
59 UCS-4 specification leaves doubts about endianness and byte order
60 mark. glibc currently interprets it as big endian without byte order
61 mark, but this is not backed by an RFC.
62 So we use UTF-8. It supports characters up to \U7FFFFFFF and is
63 unambiguously defined. */
65 /* Stores the UTF-8 representation of the Unicode character wc in r[0..5].
66 Returns the number of bytes stored, or -1 if wc is out of range. */
68 utf8_wctomb (unsigned char *r, unsigned int wc)
76 else if (wc < 0x10000)
78 else if (wc < 0x200000)
80 else if (wc < 0x4000000)
82 else if (wc <= 0x7fffffff)
89 /* Note: code falls through cases! */
90 case 6: r[5] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x4000000;
91 case 5: r[4] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x200000;
92 case 4: r[3] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x10000;
93 case 3: r[2] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x800;
94 case 2: r[1] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0xc0;
101 /* Luckily, the encoding's name is platform independent. */
102 #define UTF8_NAME "UTF-8"
104 /* Outputs the Unicode character CODE to the output stream STREAM.
105 Assumes that the locale doesn't change between two calls. */
107 print_unicode_char (FILE *stream, unsigned int code)
109 static int initialized;
112 static iconv_t utf8_to_local;
120 extern const char *locale_charset (void);
121 const char *charset = locale_charset ();
123 is_utf8 = (charset != NULL && !strcmp (charset, UTF8_NAME));
127 utf8_to_local = (charset != NULL
128 ? iconv_open (charset, UTF8_NAME)
130 if (utf8_to_local == (iconv_t)(-1))
132 /* For an unknown encoding, assume ASCII. */
133 utf8_to_local = iconv_open ("ASCII", UTF8_NAME);
134 if (utf8_to_local == (iconv_t)(-1))
136 _("cannot output U+%04X: iconv function not usable"),
144 /* Convert the character to UTF-8. */
145 count = utf8_wctomb ((unsigned char *) inbuf, code);
147 error (1, 0, _("U+%04X: character out of range"), code);
151 fwrite (inbuf, 1, count, stream);
166 outbytesleft = sizeof (outbuf);
168 /* Convert the character from UTF-8 to the locale's charset. */
169 res = iconv (utf8_to_local, &inptr, &inbytesleft, &outptr, &outbytesleft);
170 if (inbytesleft > 0 || res == (size_t)(-1))
171 error (1, res == (size_t)(-1) ? errno : 0,
172 _("cannot convert U+%04X to local character set"), code);
174 /* Avoid glibc-2.1 bug. */
175 # if defined _LIBICONV_VERSION || !(__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1)
177 /* Get back to the initial shift state. */
178 res = iconv (utf8_to_local, NULL, NULL, &outptr, &outbytesleft);
179 if (res == (size_t)(-1))
180 error (1, errno, _("cannot convert U+%04X to local character set"),
184 fwrite (outbuf, 1, outptr - outbuf, stream);
186 error (1, 0, _("cannot output U+%04X: iconv function not available"),