1 /* Test of character set conversion with error handling and autodetection.
2 Copyright (C) 2007 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18 /* Written by Bruno Haible <bruno@clisp.org>, 2007. */
24 #include "striconveha.h"
35 #define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
36 #define ASSERT(expr) \
41 fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
47 /* Magic number for detecting bounds violations. */
48 #define MAGIC 0x1983EFF1
51 new_offsets (size_t n)
53 size_t *offsets = (size_t *) malloc ((n + 1) * sizeof (size_t));
61 static enum iconv_ilseq_handler handlers[] =
62 { iconveh_error, iconveh_question_mark, iconveh_escape_sequence };
68 /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
69 ISO-8859-2, and UTF-8. */
71 /* ------------------------- Test mem_iconveha() ------------------------- */
73 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
74 for (h = 0; h < SIZEOF (handlers); h++)
76 enum iconv_ilseq_handler handler = handlers[h];
77 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
78 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
79 for (o = 0; o < 2; o++)
81 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
84 int retval = mem_iconveha (input, strlen (input),
85 "ISO-8859-2", "ISO-8859-1",
90 ASSERT (length == strlen (expected));
91 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
94 for (i = 0; i < 37; i++)
95 ASSERT (offsets[i] == i);
96 ASSERT (offsets[37] == MAGIC);
103 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
104 for (h = 0; h < SIZEOF (handlers); h++)
106 enum iconv_ilseq_handler handler = handlers[h];
107 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
108 for (o = 0; o < 2; o++)
110 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
113 int retval = mem_iconveha (input, strlen (input),
114 "ISO-8859-2", "ISO-8859-1",
121 ASSERT (retval == -1 && errno == EILSEQ);
122 ASSERT (result == NULL);
126 case iconveh_question_mark:
128 static const char expected[] = "Rafa? Maszkowski";
129 ASSERT (retval == 0);
130 ASSERT (length == strlen (expected));
131 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
134 for (i = 0; i < 16; i++)
135 ASSERT (offsets[i] == i);
136 ASSERT (offsets[16] == MAGIC);
142 case iconveh_escape_sequence:
144 static const char expected[] = "Rafa\\u0142 Maszkowski";
145 ASSERT (retval == 0);
146 ASSERT (length == strlen (expected));
147 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
150 for (i = 0; i < 16; i++)
151 ASSERT (offsets[i] == (i < 5 ? i :
153 ASSERT (offsets[16] == MAGIC);
163 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
164 for (h = 0; h < SIZEOF (handlers); h++)
166 enum iconv_ilseq_handler handler = handlers[h];
167 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
168 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
169 for (o = 0; o < 2; o++)
171 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
174 int retval = mem_iconveha (input, strlen (input),
175 "ISO-8859-1", "UTF-8",
179 ASSERT (retval == 0);
180 ASSERT (length == strlen (expected));
181 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
184 for (i = 0; i < 37; i++)
185 ASSERT (offsets[i] == (i < 1 ? i :
189 ASSERT (offsets[37] == MAGIC);
196 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
197 for (h = 0; h < SIZEOF (handlers); h++)
199 enum iconv_ilseq_handler handler = handlers[h];
200 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
201 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
202 for (o = 0; o < 2; o++)
204 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
207 int retval = mem_iconveha (input, strlen (input),
208 "UTF-8", "ISO-8859-1",
212 ASSERT (retval == 0);
213 ASSERT (length == strlen (expected));
214 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
217 for (i = 0; i < 41; i++)
218 ASSERT (offsets[i] == (i < 1 ? i :
219 i == 1 ? (size_t)(-1) :
221 i == 13 ? (size_t)(-1) :
223 i == 20 ? (size_t)(-1) :
226 ASSERT (offsets[41] == MAGIC);
233 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
234 for (h = 0; h < SIZEOF (handlers); h++)
236 enum iconv_ilseq_handler handler = handlers[h];
237 static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
238 for (o = 0; o < 2; o++)
240 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
243 int retval = mem_iconveha (input, strlen (input),
244 "UTF-8", "ISO-8859-1",
251 ASSERT (retval == -1 && errno == EILSEQ);
252 ASSERT (result == NULL);
256 case iconveh_question_mark:
258 static const char expected[] = "Rafa? Maszkowski";
259 ASSERT (retval == 0);
260 ASSERT (length == strlen (expected));
261 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
264 for (i = 0; i < 17; i++)
265 ASSERT (offsets[i] == (i < 5 ? i :
266 i == 5 ? (size_t)(-1) :
268 ASSERT (offsets[17] == MAGIC);
274 case iconveh_escape_sequence:
276 static const char expected[] = "Rafa\\u0142 Maszkowski";
277 ASSERT (retval == 0);
278 ASSERT (length == strlen (expected));
279 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
282 for (i = 0; i < 17; i++)
283 ASSERT (offsets[i] == (i < 5 ? i :
284 i == 5 ? (size_t)(-1) :
286 ASSERT (offsets[17] == MAGIC);
296 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
297 for (h = 0; h < SIZEOF (handlers); h++)
299 enum iconv_ilseq_handler handler = handlers[h];
300 static const char input[] = "\342";
301 for (o = 0; o < 2; o++)
303 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
306 int retval = mem_iconveha (input, strlen (input),
307 "UTF-8", "ISO-8859-1",
311 ASSERT (retval == 0);
312 ASSERT (length == 0);
315 ASSERT (offsets[0] == 0);
316 ASSERT (offsets[1] == MAGIC);
324 /* autodetect_jp is only supported when iconv() support ISO-2022-JP-2. */
325 # if defined _LIBICONV_VERSION || !(defined _AIX || defined __sgi || defined __hpux || defined __osf__ || defined __sun)
326 /* Test conversions from autodetect_jp to UTF-8. */
327 for (h = 0; h < SIZEOF (handlers); h++)
329 enum iconv_ilseq_handler handler = handlers[h];
330 static const char input[] = "\244\263\244\363\244\313\244\301\244\317"; /* こんにちは in EUC-JP */
331 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
332 for (o = 0; o < 2; o++)
334 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
337 int retval = mem_iconveha (input, strlen (input),
338 "autodetect_jp", "UTF-8",
342 ASSERT (retval == 0);
343 ASSERT (length == strlen (expected));
344 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
347 for (i = 0; i < 10; i++)
348 ASSERT (offsets[i] == ((i % 2) == 0 ? (i / 2) * 3 : (size_t)(-1)));
349 ASSERT (offsets[10] == MAGIC);
355 for (h = 0; h < SIZEOF (handlers); h++)
357 enum iconv_ilseq_handler handler = handlers[h];
358 static const char input[] = "\202\261\202\361\202\311\202\277\202\315"; /* こんにちは in Shift_JIS */
359 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
360 for (o = 0; o < 2; o++)
362 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
365 int retval = mem_iconveha (input, strlen (input),
366 "autodetect_jp", "UTF-8",
370 ASSERT (retval == 0);
371 ASSERT (length == strlen (expected));
372 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
375 for (i = 0; i < 10; i++)
376 ASSERT (offsets[i] == ((i % 2) == 0 ? (i / 2) * 3 : (size_t)(-1)));
377 ASSERT (offsets[10] == MAGIC);
383 for (h = 0; h < SIZEOF (handlers); h++)
385 enum iconv_ilseq_handler handler = handlers[h];
386 static const char input[] = "\033$B$3$s$K$A$O\033(B"; /* こんにちは in ISO-2022-JP-2 */
387 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
388 for (o = 0; o < 2; o++)
390 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
393 int retval = mem_iconveha (input, strlen (input),
394 "autodetect_jp", "UTF-8",
398 ASSERT (retval == 0);
399 ASSERT (length == strlen (expected));
400 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
403 for (i = 0; i < 16; i++)
404 ASSERT (offsets[i] == (i == 0 ? 0 :
411 ASSERT (offsets[16] == MAGIC);
419 # if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || _LIBICONV_VERSION >= 0x0105
420 /* Test conversion from UTF-8 to ISO-8859-1 with transliteration. */
421 for (h = 0; h < SIZEOF (handlers); h++)
423 enum iconv_ilseq_handler handler = handlers[h];
424 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
425 static const char expected[] = "Costs: 27 EUR";
426 for (o = 0; o < 2; o++)
428 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
431 int retval = mem_iconveha (input, strlen (input),
432 "UTF-8", "ISO-8859-1",
436 ASSERT (retval == 0);
437 ASSERT (length == strlen (expected));
438 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
441 for (i = 0; i < 13; i++)
442 ASSERT (offsets[i] == (i < 11 ? i : (size_t)(-1)));
443 ASSERT (offsets[13] == MAGIC);
451 /* ------------------------- Test str_iconveha() ------------------------- */
453 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
454 for (h = 0; h < SIZEOF (handlers); h++)
456 enum iconv_ilseq_handler handler = handlers[h];
457 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
458 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
459 char *result = str_iconveha (input, "ISO-8859-2", "ISO-8859-1", false, handler);
460 ASSERT (result != NULL);
461 ASSERT (strcmp (result, expected) == 0);
465 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
466 for (h = 0; h < SIZEOF (handlers); h++)
468 enum iconv_ilseq_handler handler = handlers[h];
469 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
470 char *result = str_iconveha (input, "ISO-8859-2", "ISO-8859-1", false, handler);
474 ASSERT (result == NULL && errno == EILSEQ);
476 case iconveh_question_mark:
478 static const char expected[] = "Rafa? Maszkowski";
479 ASSERT (result != NULL);
480 ASSERT (strcmp (result, expected) == 0);
484 case iconveh_escape_sequence:
486 static const char expected[] = "Rafa\\u0142 Maszkowski";
487 ASSERT (result != NULL);
488 ASSERT (strcmp (result, expected) == 0);
495 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
496 for (h = 0; h < SIZEOF (handlers); h++)
498 enum iconv_ilseq_handler handler = handlers[h];
499 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
500 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
501 char *result = str_iconveha (input, "ISO-8859-1", "UTF-8", false, handler);
502 ASSERT (result != NULL);
503 ASSERT (strcmp (result, expected) == 0);
507 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
508 for (h = 0; h < SIZEOF (handlers); h++)
510 enum iconv_ilseq_handler handler = handlers[h];
511 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
512 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
513 char *result = str_iconveha (input, "UTF-8", "ISO-8859-1", false, handler);
514 ASSERT (result != NULL);
515 ASSERT (strcmp (result, expected) == 0);
519 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
520 for (h = 0; h < SIZEOF (handlers); h++)
522 enum iconv_ilseq_handler handler = handlers[h];
523 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
524 char *result = str_iconveha (input, "UTF-8", "ISO-8859-1", false, handler);
528 ASSERT (result == NULL && errno == EILSEQ);
530 case iconveh_question_mark:
532 static const char expected[] = "Costs: 27 ?";
533 ASSERT (result != NULL);
534 ASSERT (strcmp (result, expected) == 0);
538 case iconveh_escape_sequence:
540 static const char expected[] = "Costs: 27 \\u20AC";
541 ASSERT (result != NULL);
542 ASSERT (strcmp (result, expected) == 0);
549 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
550 for (h = 0; h < SIZEOF (handlers); h++)
552 enum iconv_ilseq_handler handler = handlers[h];
553 static const char input[] = "\342";
554 char *result = str_iconveha (input, "UTF-8", "ISO-8859-1", false, handler);
555 ASSERT (result != NULL);
556 ASSERT (strcmp (result, "") == 0);
560 /* autodetect_jp is only supported when iconv() support ISO-2022-JP-2. */
561 # if defined _LIBICONV_VERSION || !(defined _AIX || defined __sgi || defined __hpux || defined __osf__ || defined __sun)
562 /* Test conversions from autodetect_jp to UTF-8. */
563 for (h = 0; h < SIZEOF (handlers); h++)
565 enum iconv_ilseq_handler handler = handlers[h];
566 static const char input[] = "\244\263\244\363\244\313\244\301\244\317"; /* こんにちは in EUC-JP */
567 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
568 char *result = str_iconveha (input, "autodetect_jp", "UTF-8", false, handler);
569 ASSERT (result != NULL);
570 ASSERT (strcmp (result, expected) == 0);
573 for (h = 0; h < SIZEOF (handlers); h++)
575 enum iconv_ilseq_handler handler = handlers[h];
576 static const char input[] = "\202\261\202\361\202\311\202\277\202\315"; /* こんにちは in Shift_JIS */
577 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
578 char *result = str_iconveha (input, "autodetect_jp", "UTF-8", false, handler);
579 ASSERT (result != NULL);
580 ASSERT (strcmp (result, expected) == 0);
583 for (h = 0; h < SIZEOF (handlers); h++)
585 enum iconv_ilseq_handler handler = handlers[h];
586 static const char input[] = "\033$B$3$s$K$A$O\033(B"; /* こんにちは in ISO-2022-JP-2 */
587 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
588 char *result = str_iconveha (input, "autodetect_jp", "UTF-8", false, handler);
589 ASSERT (result != NULL);
590 ASSERT (strcmp (result, expected) == 0);
595 # if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || _LIBICONV_VERSION >= 0x0105
596 /* Test conversion from UTF-8 to ISO-8859-1 with transliteration. */
597 for (h = 0; h < SIZEOF (handlers); h++)
599 enum iconv_ilseq_handler handler = handlers[h];
600 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
601 static const char expected[] = "Costs: 27 EUR";
602 char *result = str_iconveha (input, "UTF-8", "ISO-8859-1", true, handler);
603 ASSERT (result != NULL);
604 ASSERT (strcmp (result, expected) == 0);