1 /* Test of character set conversion with error handling.
2 Copyright (C) 2007 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18 /* Written by Bruno Haible <bruno@clisp.org>, 2007. */
24 #include "striconveh.h"
35 #define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
36 #define ASSERT(expr) \
41 fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
47 /* Magic number for detecting bounds violations. */
48 #define MAGIC 0x1983EFF1
51 new_offsets (size_t n)
53 size_t *offsets = (size_t *) malloc ((n + 1) * sizeof (size_t));
61 static enum iconv_ilseq_handler handlers[] =
62 { iconveh_error, iconveh_question_mark, iconveh_escape_sequence };
68 /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
69 ISO-8859-2, and UTF-8. */
70 iconv_t cd_88591_to_88592 = iconv_open ("ISO-8859-2", "ISO-8859-1");
71 iconv_t cd_88592_to_88591 = iconv_open ("ISO-8859-1", "ISO-8859-2");
72 iconv_t cd_88591_to_utf8 = iconv_open ("UTF-8", "ISO-8859-1");
73 iconv_t cd_utf8_to_88591 = iconv_open ("ISO-8859-1", "UTF-8");
74 iconv_t cd_88592_to_utf8 = iconv_open ("UTF-8", "ISO-8859-2");
75 iconv_t cd_utf8_to_88592 = iconv_open ("ISO-8859-2", "UTF-8");
77 ASSERT (cd_88591_to_utf8 != (iconv_t)(-1));
78 ASSERT (cd_utf8_to_88591 != (iconv_t)(-1));
79 ASSERT (cd_88592_to_utf8 != (iconv_t)(-1));
80 ASSERT (cd_utf8_to_88592 != (iconv_t)(-1));
82 /* ------------------------ Test mem_cd_iconveh() ------------------------ */
84 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
85 for (h = 0; h < SIZEOF (handlers); h++)
87 enum iconv_ilseq_handler handler = handlers[h];
88 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
89 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
90 for (o = 0; o < 2; o++)
92 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
95 int retval = mem_cd_iconveh (input, strlen (input),
97 cd_88592_to_utf8, cd_utf8_to_88591,
101 ASSERT (retval == 0);
102 ASSERT (length == strlen (expected));
103 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
106 for (i = 0; i < 37; i++)
107 ASSERT (offsets[i] == i);
108 ASSERT (offsets[37] == MAGIC);
115 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
116 for (h = 0; h < SIZEOF (handlers); h++)
118 enum iconv_ilseq_handler handler = handlers[h];
119 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
120 for (o = 0; o < 2; o++)
122 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
125 int retval = mem_cd_iconveh (input, strlen (input),
127 cd_88592_to_utf8, cd_utf8_to_88591,
134 ASSERT (retval == -1 && errno == EILSEQ);
135 ASSERT (result == NULL);
139 case iconveh_question_mark:
141 static const char expected[] = "Rafa? Maszkowski";
142 ASSERT (retval == 0);
143 ASSERT (length == strlen (expected));
144 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
147 for (i = 0; i < 16; i++)
148 ASSERT (offsets[i] == i);
149 ASSERT (offsets[16] == MAGIC);
155 case iconveh_escape_sequence:
157 static const char expected[] = "Rafa\\u0142 Maszkowski";
158 ASSERT (retval == 0);
159 ASSERT (length == strlen (expected));
160 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
163 for (i = 0; i < 16; i++)
164 ASSERT (offsets[i] == (i < 5 ? i :
166 ASSERT (offsets[16] == MAGIC);
176 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
177 for (h = 0; h < SIZEOF (handlers); h++)
179 enum iconv_ilseq_handler handler = handlers[h];
180 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
181 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
182 for (o = 0; o < 2; o++)
184 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
187 int retval = mem_cd_iconveh (input, strlen (input),
189 cd_88591_to_utf8, (iconv_t)(-1),
193 ASSERT (retval == 0);
194 ASSERT (length == strlen (expected));
195 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
198 for (i = 0; i < 37; i++)
199 ASSERT (offsets[i] == (i < 1 ? i :
203 ASSERT (offsets[37] == MAGIC);
210 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
211 for (h = 0; h < SIZEOF (handlers); h++)
213 enum iconv_ilseq_handler handler = handlers[h];
214 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
215 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
216 for (o = 0; o < 2; o++)
218 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
221 int retval = mem_cd_iconveh (input, strlen (input),
223 (iconv_t)(-1), cd_utf8_to_88591,
227 ASSERT (retval == 0);
228 ASSERT (length == strlen (expected));
229 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
232 for (i = 0; i < 41; i++)
233 ASSERT (offsets[i] == (i < 1 ? i :
234 i == 1 ? (size_t)(-1) :
236 i == 13 ? (size_t)(-1) :
238 i == 20 ? (size_t)(-1) :
241 ASSERT (offsets[41] == MAGIC);
248 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
249 for (h = 0; h < SIZEOF (handlers); h++)
251 enum iconv_ilseq_handler handler = handlers[h];
252 static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
253 for (o = 0; o < 2; o++)
255 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
258 int retval = mem_cd_iconveh (input, strlen (input),
260 (iconv_t)(-1), cd_utf8_to_88591,
267 ASSERT (retval == -1 && errno == EILSEQ);
268 ASSERT (result == NULL);
272 case iconveh_question_mark:
274 static const char expected[] = "Rafa? Maszkowski";
275 ASSERT (retval == 0);
276 ASSERT (length == strlen (expected));
277 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
280 for (i = 0; i < 17; i++)
281 ASSERT (offsets[i] == (i < 5 ? i :
282 i == 5 ? (size_t)(-1) :
284 ASSERT (offsets[17] == MAGIC);
290 case iconveh_escape_sequence:
292 static const char expected[] = "Rafa\\u0142 Maszkowski";
293 ASSERT (retval == 0);
294 ASSERT (length == strlen (expected));
295 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
298 for (i = 0; i < 17; i++)
299 ASSERT (offsets[i] == (i < 5 ? i :
300 i == 5 ? (size_t)(-1) :
302 ASSERT (offsets[17] == MAGIC);
312 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
313 for (h = 0; h < SIZEOF (handlers); h++)
315 enum iconv_ilseq_handler handler = handlers[h];
316 static const char input[] = "\342";
317 for (o = 0; o < 2; o++)
319 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
322 int retval = mem_cd_iconveh (input, strlen (input),
324 (iconv_t)(-1), cd_utf8_to_88591,
328 ASSERT (retval == 0);
329 ASSERT (length == 0);
332 ASSERT (offsets[0] == 0);
333 ASSERT (offsets[1] == MAGIC);
341 /* ------------------------ Test str_cd_iconveh() ------------------------ */
343 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
344 for (h = 0; h < SIZEOF (handlers); h++)
346 enum iconv_ilseq_handler handler = handlers[h];
347 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
348 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
349 char *result = str_cd_iconveh (input,
351 cd_88592_to_utf8, cd_utf8_to_88591,
353 ASSERT (result != NULL);
354 ASSERT (strcmp (result, expected) == 0);
358 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
359 for (h = 0; h < SIZEOF (handlers); h++)
361 enum iconv_ilseq_handler handler = handlers[h];
362 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
363 char *result = str_cd_iconveh (input,
365 cd_88592_to_utf8, cd_utf8_to_88591,
370 ASSERT (result == NULL && errno == EILSEQ);
372 case iconveh_question_mark:
374 static const char expected[] = "Rafa? Maszkowski";
375 ASSERT (result != NULL);
376 ASSERT (strcmp (result, expected) == 0);
380 case iconveh_escape_sequence:
382 static const char expected[] = "Rafa\\u0142 Maszkowski";
383 ASSERT (result != NULL);
384 ASSERT (strcmp (result, expected) == 0);
391 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
392 for (h = 0; h < SIZEOF (handlers); h++)
394 enum iconv_ilseq_handler handler = handlers[h];
395 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
396 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
397 char *result = str_cd_iconveh (input,
399 cd_88591_to_utf8, (iconv_t)(-1),
401 ASSERT (result != NULL);
402 ASSERT (strcmp (result, expected) == 0);
406 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
407 for (h = 0; h < SIZEOF (handlers); h++)
409 enum iconv_ilseq_handler handler = handlers[h];
410 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
411 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
412 char *result = str_cd_iconveh (input,
414 (iconv_t)(-1), cd_utf8_to_88591,
416 ASSERT (result != NULL);
417 ASSERT (strcmp (result, expected) == 0);
421 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
422 for (h = 0; h < SIZEOF (handlers); h++)
424 enum iconv_ilseq_handler handler = handlers[h];
425 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
426 char *result = str_cd_iconveh (input,
428 (iconv_t)(-1), cd_utf8_to_88591,
433 ASSERT (result == NULL && errno == EILSEQ);
435 case iconveh_question_mark:
437 static const char expected[] = "Costs: 27 ?";
438 ASSERT (result != NULL);
439 ASSERT (strcmp (result, expected) == 0);
443 case iconveh_escape_sequence:
445 static const char expected[] = "Costs: 27 \\u20AC";
446 ASSERT (result != NULL);
447 ASSERT (strcmp (result, expected) == 0);
454 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
455 for (h = 0; h < SIZEOF (handlers); h++)
457 enum iconv_ilseq_handler handler = handlers[h];
458 static const char input[] = "\342";
459 char *result = str_cd_iconveh (input,
461 (iconv_t)(-1), cd_utf8_to_88591,
463 ASSERT (result != NULL);
464 ASSERT (strcmp (result, "") == 0);
468 if (cd_88591_to_88592 != (iconv_t)(-1))
469 iconv_close (cd_88591_to_88592);
470 if (cd_88592_to_88591 != (iconv_t)(-1))
471 iconv_close (cd_88592_to_88591);
472 iconv_close (cd_88591_to_utf8);
473 iconv_close (cd_utf8_to_88591);
474 iconv_close (cd_88592_to_utf8);
475 iconv_close (cd_utf8_to_88592);
477 /* ------------------------- Test mem_iconveh() ------------------------- */
479 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
480 for (h = 0; h < SIZEOF (handlers); h++)
482 enum iconv_ilseq_handler handler = handlers[h];
483 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
484 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
485 for (o = 0; o < 2; o++)
487 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
490 int retval = mem_iconveh (input, strlen (input),
491 "ISO-8859-2", "ISO-8859-1",
495 ASSERT (retval == 0);
496 ASSERT (length == strlen (expected));
497 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
500 for (i = 0; i < 37; i++)
501 ASSERT (offsets[i] == i);
502 ASSERT (offsets[37] == MAGIC);
509 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
510 for (h = 0; h < SIZEOF (handlers); h++)
512 enum iconv_ilseq_handler handler = handlers[h];
513 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
514 for (o = 0; o < 2; o++)
516 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
519 int retval = mem_iconveh (input, strlen (input),
520 "ISO-8859-2", "ISO-8859-1",
527 ASSERT (retval == -1 && errno == EILSEQ);
528 ASSERT (result == NULL);
532 case iconveh_question_mark:
534 static const char expected[] = "Rafa? Maszkowski";
535 ASSERT (retval == 0);
536 ASSERT (length == strlen (expected));
537 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
540 for (i = 0; i < 16; i++)
541 ASSERT (offsets[i] == i);
542 ASSERT (offsets[16] == MAGIC);
548 case iconveh_escape_sequence:
550 static const char expected[] = "Rafa\\u0142 Maszkowski";
551 ASSERT (retval == 0);
552 ASSERT (length == strlen (expected));
553 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
556 for (i = 0; i < 16; i++)
557 ASSERT (offsets[i] == (i < 5 ? i :
559 ASSERT (offsets[16] == MAGIC);
569 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
570 for (h = 0; h < SIZEOF (handlers); h++)
572 enum iconv_ilseq_handler handler = handlers[h];
573 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
574 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
575 for (o = 0; o < 2; o++)
577 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
580 int retval = mem_iconveh (input, strlen (input),
581 "ISO-8859-1", "UTF-8",
585 ASSERT (retval == 0);
586 ASSERT (length == strlen (expected));
587 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
590 for (i = 0; i < 37; i++)
591 ASSERT (offsets[i] == (i < 1 ? i :
595 ASSERT (offsets[37] == MAGIC);
602 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
603 for (h = 0; h < SIZEOF (handlers); h++)
605 enum iconv_ilseq_handler handler = handlers[h];
606 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
607 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
608 for (o = 0; o < 2; o++)
610 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
613 int retval = mem_iconveh (input, strlen (input),
614 "UTF-8", "ISO-8859-1",
618 ASSERT (retval == 0);
619 ASSERT (length == strlen (expected));
620 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
623 for (i = 0; i < 41; i++)
624 ASSERT (offsets[i] == (i < 1 ? i :
625 i == 1 ? (size_t)(-1) :
627 i == 13 ? (size_t)(-1) :
629 i == 20 ? (size_t)(-1) :
632 ASSERT (offsets[41] == MAGIC);
639 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
640 for (h = 0; h < SIZEOF (handlers); h++)
642 enum iconv_ilseq_handler handler = handlers[h];
643 static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
644 for (o = 0; o < 2; o++)
646 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
649 int retval = mem_iconveh (input, strlen (input),
650 "UTF-8", "ISO-8859-1",
657 ASSERT (retval == -1 && errno == EILSEQ);
658 ASSERT (result == NULL);
662 case iconveh_question_mark:
664 static const char expected[] = "Rafa? Maszkowski";
665 ASSERT (retval == 0);
666 ASSERT (length == strlen (expected));
667 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
670 for (i = 0; i < 17; i++)
671 ASSERT (offsets[i] == (i < 5 ? i :
672 i == 5 ? (size_t)(-1) :
674 ASSERT (offsets[17] == MAGIC);
680 case iconveh_escape_sequence:
682 static const char expected[] = "Rafa\\u0142 Maszkowski";
683 ASSERT (retval == 0);
684 ASSERT (length == strlen (expected));
685 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
688 for (i = 0; i < 17; i++)
689 ASSERT (offsets[i] == (i < 5 ? i :
690 i == 5 ? (size_t)(-1) :
692 ASSERT (offsets[17] == MAGIC);
702 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
703 for (h = 0; h < SIZEOF (handlers); h++)
705 enum iconv_ilseq_handler handler = handlers[h];
706 static const char input[] = "\342";
707 for (o = 0; o < 2; o++)
709 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
712 int retval = mem_iconveh (input, strlen (input),
713 "UTF-8", "ISO-8859-1",
717 ASSERT (retval == 0);
718 ASSERT (length == 0);
721 ASSERT (offsets[0] == 0);
722 ASSERT (offsets[1] == MAGIC);
730 /* ------------------------- Test str_iconveh() ------------------------- */
732 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
733 for (h = 0; h < SIZEOF (handlers); h++)
735 enum iconv_ilseq_handler handler = handlers[h];
736 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
737 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
738 char *result = str_iconveh (input, "ISO-8859-2", "ISO-8859-1", handler);
739 ASSERT (result != NULL);
740 ASSERT (strcmp (result, expected) == 0);
744 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
745 for (h = 0; h < SIZEOF (handlers); h++)
747 enum iconv_ilseq_handler handler = handlers[h];
748 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
749 char *result = str_iconveh (input, "ISO-8859-2", "ISO-8859-1", handler);
753 ASSERT (result == NULL && errno == EILSEQ);
755 case iconveh_question_mark:
757 static const char expected[] = "Rafa? Maszkowski";
758 ASSERT (result != NULL);
759 ASSERT (strcmp (result, expected) == 0);
763 case iconveh_escape_sequence:
765 static const char expected[] = "Rafa\\u0142 Maszkowski";
766 ASSERT (result != NULL);
767 ASSERT (strcmp (result, expected) == 0);
774 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
775 for (h = 0; h < SIZEOF (handlers); h++)
777 enum iconv_ilseq_handler handler = handlers[h];
778 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
779 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
780 char *result = str_iconveh (input, "ISO-8859-1", "UTF-8", handler);
781 ASSERT (result != NULL);
782 ASSERT (strcmp (result, expected) == 0);
786 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
787 for (h = 0; h < SIZEOF (handlers); h++)
789 enum iconv_ilseq_handler handler = handlers[h];
790 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
791 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
792 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
793 ASSERT (result != NULL);
794 ASSERT (strcmp (result, expected) == 0);
798 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
799 for (h = 0; h < SIZEOF (handlers); h++)
801 enum iconv_ilseq_handler handler = handlers[h];
802 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
803 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
807 ASSERT (result == NULL && errno == EILSEQ);
809 case iconveh_question_mark:
811 static const char expected[] = "Costs: 27 ?";
812 ASSERT (result != NULL);
813 ASSERT (strcmp (result, expected) == 0);
817 case iconveh_escape_sequence:
819 static const char expected[] = "Costs: 27 \\u20AC";
820 ASSERT (result != NULL);
821 ASSERT (strcmp (result, expected) == 0);
828 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
829 for (h = 0; h < SIZEOF (handlers); h++)
831 enum iconv_ilseq_handler handler = handlers[h];
832 static const char input[] = "\342";
833 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
834 ASSERT (result != NULL);
835 ASSERT (strcmp (result, "") == 0);