1 /* Test of character set conversion with error handling and autodetection.
2 Copyright (C) 2007 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18 /* Written by Bruno Haible <bruno@clisp.org>, 2007. */
22 #include "striconveha.h"
33 #define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
34 #define ASSERT(expr) \
39 fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
45 /* Magic number for detecting bounds violations. */
46 #define MAGIC 0x1983EFF1
49 new_offsets (size_t n)
51 size_t *offsets = (size_t *) malloc ((n + 1) * sizeof (size_t));
59 static enum iconv_ilseq_handler handlers[] =
60 { iconveh_error, iconveh_question_mark, iconveh_escape_sequence };
66 /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
67 ISO-8859-2, and UTF-8. */
69 /* ------------------------- Test mem_iconveha() ------------------------- */
71 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
72 for (h = 0; h < SIZEOF (handlers); h++)
74 enum iconv_ilseq_handler handler = handlers[h];
75 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
76 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
77 for (o = 0; o < 2; o++)
79 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
82 int retval = mem_iconveha (input, strlen (input),
83 "ISO-8859-2", "ISO-8859-1",
88 ASSERT (length == strlen (expected));
89 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
92 for (i = 0; i < 37; i++)
93 ASSERT (offsets[i] == i);
94 ASSERT (offsets[37] == MAGIC);
101 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
102 for (h = 0; h < SIZEOF (handlers); h++)
104 enum iconv_ilseq_handler handler = handlers[h];
105 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
106 for (o = 0; o < 2; o++)
108 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
111 int retval = mem_iconveha (input, strlen (input),
112 "ISO-8859-2", "ISO-8859-1",
119 ASSERT (retval == -1 && errno == EILSEQ);
120 ASSERT (result == NULL);
124 case iconveh_question_mark:
126 static const char expected[] = "Rafa? Maszkowski";
127 ASSERT (retval == 0);
128 ASSERT (length == strlen (expected));
129 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
132 for (i = 0; i < 16; i++)
133 ASSERT (offsets[i] == i);
134 ASSERT (offsets[16] == MAGIC);
140 case iconveh_escape_sequence:
142 static const char expected[] = "Rafa\\u0142 Maszkowski";
143 ASSERT (retval == 0);
144 ASSERT (length == strlen (expected));
145 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
148 for (i = 0; i < 16; i++)
149 ASSERT (offsets[i] == (i < 5 ? i :
151 ASSERT (offsets[16] == MAGIC);
161 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
162 for (h = 0; h < SIZEOF (handlers); h++)
164 enum iconv_ilseq_handler handler = handlers[h];
165 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
166 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
167 for (o = 0; o < 2; o++)
169 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
172 int retval = mem_iconveha (input, strlen (input),
173 "ISO-8859-1", "UTF-8",
177 ASSERT (retval == 0);
178 ASSERT (length == strlen (expected));
179 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
182 for (i = 0; i < 37; i++)
183 ASSERT (offsets[i] == (i < 1 ? i :
187 ASSERT (offsets[37] == MAGIC);
194 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
195 for (h = 0; h < SIZEOF (handlers); h++)
197 enum iconv_ilseq_handler handler = handlers[h];
198 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
199 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
200 for (o = 0; o < 2; o++)
202 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
205 int retval = mem_iconveha (input, strlen (input),
206 "UTF-8", "ISO-8859-1",
210 ASSERT (retval == 0);
211 ASSERT (length == strlen (expected));
212 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
215 for (i = 0; i < 41; i++)
216 ASSERT (offsets[i] == (i < 1 ? i :
217 i == 1 ? (size_t)(-1) :
219 i == 13 ? (size_t)(-1) :
221 i == 20 ? (size_t)(-1) :
224 ASSERT (offsets[41] == MAGIC);
231 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
232 for (h = 0; h < SIZEOF (handlers); h++)
234 enum iconv_ilseq_handler handler = handlers[h];
235 static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
236 for (o = 0; o < 2; o++)
238 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
241 int retval = mem_iconveha (input, strlen (input),
242 "UTF-8", "ISO-8859-1",
249 ASSERT (retval == -1 && errno == EILSEQ);
250 ASSERT (result == NULL);
254 case iconveh_question_mark:
256 static const char expected[] = "Rafa? Maszkowski";
257 ASSERT (retval == 0);
258 ASSERT (length == strlen (expected));
259 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
262 for (i = 0; i < 17; i++)
263 ASSERT (offsets[i] == (i < 5 ? i :
264 i == 5 ? (size_t)(-1) :
266 ASSERT (offsets[17] == MAGIC);
272 case iconveh_escape_sequence:
274 static const char expected[] = "Rafa\\u0142 Maszkowski";
275 ASSERT (retval == 0);
276 ASSERT (length == strlen (expected));
277 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
280 for (i = 0; i < 17; i++)
281 ASSERT (offsets[i] == (i < 5 ? i :
282 i == 5 ? (size_t)(-1) :
284 ASSERT (offsets[17] == MAGIC);
294 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
295 for (h = 0; h < SIZEOF (handlers); h++)
297 enum iconv_ilseq_handler handler = handlers[h];
298 static const char input[] = "\342";
299 for (o = 0; o < 2; o++)
301 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
304 int retval = mem_iconveha (input, strlen (input),
305 "UTF-8", "ISO-8859-1",
309 ASSERT (retval == 0);
310 ASSERT (length == 0);
313 ASSERT (offsets[0] == 0);
314 ASSERT (offsets[1] == MAGIC);
322 /* autodetect_jp is only supported when iconv() support ISO-2022-JP-2. */
323 # if defined _LIBICONV_VERSION || !(defined _AIX || defined __sgi || defined __hpux || defined __osf__ || defined __sun)
324 /* Test conversions from autodetect_jp to UTF-8. */
325 for (h = 0; h < SIZEOF (handlers); h++)
327 enum iconv_ilseq_handler handler = handlers[h];
328 static const char input[] = "\244\263\244\363\244\313\244\301\244\317"; /* こんにちは in EUC-JP */
329 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
330 for (o = 0; o < 2; o++)
332 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
335 int retval = mem_iconveha (input, strlen (input),
336 "autodetect_jp", "UTF-8",
340 ASSERT (retval == 0);
341 ASSERT (length == strlen (expected));
342 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
345 for (i = 0; i < 10; i++)
346 ASSERT (offsets[i] == ((i % 2) == 0 ? (i / 2) * 3 : (size_t)(-1)));
347 ASSERT (offsets[10] == MAGIC);
353 for (h = 0; h < SIZEOF (handlers); h++)
355 enum iconv_ilseq_handler handler = handlers[h];
356 static const char input[] = "\202\261\202\361\202\311\202\277\202\315"; /* こんにちは in Shift_JIS */
357 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
358 for (o = 0; o < 2; o++)
360 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
363 int retval = mem_iconveha (input, strlen (input),
364 "autodetect_jp", "UTF-8",
368 ASSERT (retval == 0);
369 ASSERT (length == strlen (expected));
370 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
373 for (i = 0; i < 10; i++)
374 ASSERT (offsets[i] == ((i % 2) == 0 ? (i / 2) * 3 : (size_t)(-1)));
375 ASSERT (offsets[10] == MAGIC);
381 for (h = 0; h < SIZEOF (handlers); h++)
383 enum iconv_ilseq_handler handler = handlers[h];
384 static const char input[] = "\033$B$3$s$K$A$O\033(B"; /* こんにちは in ISO-2022-JP-2 */
385 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
386 for (o = 0; o < 2; o++)
388 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
391 int retval = mem_iconveha (input, strlen (input),
392 "autodetect_jp", "UTF-8",
396 ASSERT (retval == 0);
397 ASSERT (length == strlen (expected));
398 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
401 for (i = 0; i < 16; i++)
402 ASSERT (offsets[i] == (i == 0 ? 0 :
409 ASSERT (offsets[16] == MAGIC);
417 # if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || _LIBICONV_VERSION >= 0x0105
418 /* Test conversion from UTF-8 to ISO-8859-1 with transliteration. */
419 for (h = 0; h < SIZEOF (handlers); h++)
421 enum iconv_ilseq_handler handler = handlers[h];
422 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
423 static const char expected[] = "Costs: 27 EUR";
424 for (o = 0; o < 2; o++)
426 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
429 int retval = mem_iconveha (input, strlen (input),
430 "UTF-8", "ISO-8859-1",
434 ASSERT (retval == 0);
435 ASSERT (length == strlen (expected));
436 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
439 for (i = 0; i < 13; i++)
440 ASSERT (offsets[i] == (i < 11 ? i : (size_t)(-1)));
441 ASSERT (offsets[13] == MAGIC);
449 /* ------------------------- Test str_iconveha() ------------------------- */
451 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
452 for (h = 0; h < SIZEOF (handlers); h++)
454 enum iconv_ilseq_handler handler = handlers[h];
455 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
456 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
457 char *result = str_iconveha (input, "ISO-8859-2", "ISO-8859-1", false, handler);
458 ASSERT (result != NULL);
459 ASSERT (strcmp (result, expected) == 0);
463 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
464 for (h = 0; h < SIZEOF (handlers); h++)
466 enum iconv_ilseq_handler handler = handlers[h];
467 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
468 char *result = str_iconveha (input, "ISO-8859-2", "ISO-8859-1", false, handler);
472 ASSERT (result == NULL && errno == EILSEQ);
474 case iconveh_question_mark:
476 static const char expected[] = "Rafa? Maszkowski";
477 ASSERT (result != NULL);
478 ASSERT (strcmp (result, expected) == 0);
482 case iconveh_escape_sequence:
484 static const char expected[] = "Rafa\\u0142 Maszkowski";
485 ASSERT (result != NULL);
486 ASSERT (strcmp (result, expected) == 0);
493 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
494 for (h = 0; h < SIZEOF (handlers); h++)
496 enum iconv_ilseq_handler handler = handlers[h];
497 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
498 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
499 char *result = str_iconveha (input, "ISO-8859-1", "UTF-8", false, handler);
500 ASSERT (result != NULL);
501 ASSERT (strcmp (result, expected) == 0);
505 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
506 for (h = 0; h < SIZEOF (handlers); h++)
508 enum iconv_ilseq_handler handler = handlers[h];
509 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
510 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
511 char *result = str_iconveha (input, "UTF-8", "ISO-8859-1", false, handler);
512 ASSERT (result != NULL);
513 ASSERT (strcmp (result, expected) == 0);
517 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
518 for (h = 0; h < SIZEOF (handlers); h++)
520 enum iconv_ilseq_handler handler = handlers[h];
521 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
522 char *result = str_iconveha (input, "UTF-8", "ISO-8859-1", false, handler);
526 ASSERT (result == NULL && errno == EILSEQ);
528 case iconveh_question_mark:
530 static const char expected[] = "Costs: 27 ?";
531 ASSERT (result != NULL);
532 ASSERT (strcmp (result, expected) == 0);
536 case iconveh_escape_sequence:
538 static const char expected[] = "Costs: 27 \\u20AC";
539 ASSERT (result != NULL);
540 ASSERT (strcmp (result, expected) == 0);
547 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
548 for (h = 0; h < SIZEOF (handlers); h++)
550 enum iconv_ilseq_handler handler = handlers[h];
551 static const char input[] = "\342";
552 char *result = str_iconveha (input, "UTF-8", "ISO-8859-1", false, handler);
553 ASSERT (result != NULL);
554 ASSERT (strcmp (result, "") == 0);
558 /* autodetect_jp is only supported when iconv() support ISO-2022-JP-2. */
559 # if defined _LIBICONV_VERSION || !(defined _AIX || defined __sgi || defined __hpux || defined __osf__ || defined __sun)
560 /* Test conversions from autodetect_jp to UTF-8. */
561 for (h = 0; h < SIZEOF (handlers); h++)
563 enum iconv_ilseq_handler handler = handlers[h];
564 static const char input[] = "\244\263\244\363\244\313\244\301\244\317"; /* こんにちは in EUC-JP */
565 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
566 char *result = str_iconveha (input, "autodetect_jp", "UTF-8", false, handler);
567 ASSERT (result != NULL);
568 ASSERT (strcmp (result, expected) == 0);
571 for (h = 0; h < SIZEOF (handlers); h++)
573 enum iconv_ilseq_handler handler = handlers[h];
574 static const char input[] = "\202\261\202\361\202\311\202\277\202\315"; /* こんにちは in Shift_JIS */
575 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
576 char *result = str_iconveha (input, "autodetect_jp", "UTF-8", false, handler);
577 ASSERT (result != NULL);
578 ASSERT (strcmp (result, expected) == 0);
581 for (h = 0; h < SIZEOF (handlers); h++)
583 enum iconv_ilseq_handler handler = handlers[h];
584 static const char input[] = "\033$B$3$s$K$A$O\033(B"; /* こんにちは in ISO-2022-JP-2 */
585 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
586 char *result = str_iconveha (input, "autodetect_jp", "UTF-8", false, handler);
587 ASSERT (result != NULL);
588 ASSERT (strcmp (result, expected) == 0);
593 # if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || _LIBICONV_VERSION >= 0x0105
594 /* Test conversion from UTF-8 to ISO-8859-1 with transliteration. */
595 for (h = 0; h < SIZEOF (handlers); h++)
597 enum iconv_ilseq_handler handler = handlers[h];
598 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
599 static const char expected[] = "Costs: 27 EUR";
600 char *result = str_iconveha (input, "UTF-8", "ISO-8859-1", true, handler);
601 ASSERT (result != NULL);
602 ASSERT (strcmp (result, expected) == 0);