1 /* Test of character set conversion with error handling.
2 Copyright (C) 2007-2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2007. */
21 #include "striconveh.h"
32 #define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
33 #define ASSERT(expr) \
38 fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
45 /* Magic number for detecting bounds violations. */
46 #define MAGIC 0x1983EFF1
49 new_offsets (size_t n)
51 size_t *offsets = (size_t *) malloc ((n + 1) * sizeof (size_t));
59 static enum iconv_ilseq_handler handlers[] =
60 { iconveh_error, iconveh_question_mark, iconveh_escape_sequence };
67 /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
68 ISO-8859-2, and UTF-8. */
69 iconv_t cd_ascii_to_88591 = iconv_open ("ISO-8859-1", "ASCII");
70 iconv_t cd_88591_to_88592 = iconv_open ("ISO-8859-2", "ISO-8859-1");
71 iconv_t cd_88592_to_88591 = iconv_open ("ISO-8859-1", "ISO-8859-2");
72 iconv_t cd_ascii_to_utf8 = iconv_open ("UTF-8", "ASCII");
73 iconv_t cd_88591_to_utf8 = iconv_open ("UTF-8", "ISO-8859-1");
74 iconv_t cd_utf8_to_88591 = iconv_open ("ISO-8859-1", "UTF-8");
75 iconv_t cd_88592_to_utf8 = iconv_open ("UTF-8", "ISO-8859-2");
76 iconv_t cd_utf8_to_88592 = iconv_open ("ISO-8859-2", "UTF-8");
77 iconv_t cd_utf7_to_utf8 = iconv_open ("UTF-8", "UTF-7");
78 iconveh_t cdeh_ascii_to_88591;
79 iconveh_t cdeh_ascii_to_88591_indirectly;
80 iconveh_t cdeh_88592_to_88591;
81 iconveh_t cdeh_88592_to_88591_indirectly;
82 iconveh_t cdeh_ascii_to_utf8;
83 iconveh_t cdeh_88591_to_utf8;
84 iconveh_t cdeh_utf8_to_88591;
85 iconveh_t cdeh_utf7_to_utf8;
87 ASSERT (cd_ascii_to_utf8 != (iconv_t)(-1));
88 ASSERT (cd_88591_to_utf8 != (iconv_t)(-1));
89 ASSERT (cd_utf8_to_88591 != (iconv_t)(-1));
90 ASSERT (cd_88592_to_utf8 != (iconv_t)(-1));
91 ASSERT (cd_utf8_to_88592 != (iconv_t)(-1));
93 cdeh_ascii_to_88591.cd = cd_ascii_to_88591;
94 cdeh_ascii_to_88591.cd1 = cd_ascii_to_utf8;
95 cdeh_ascii_to_88591.cd2 = cd_utf8_to_88591;
97 cdeh_ascii_to_88591_indirectly.cd = (iconv_t)(-1);
98 cdeh_ascii_to_88591_indirectly.cd1 = cd_ascii_to_utf8;
99 cdeh_ascii_to_88591_indirectly.cd2 = cd_utf8_to_88591;
101 cdeh_88592_to_88591.cd = cd_88592_to_88591;
102 cdeh_88592_to_88591.cd1 = cd_88592_to_utf8;
103 cdeh_88592_to_88591.cd2 = cd_utf8_to_88591;
105 cdeh_88592_to_88591_indirectly.cd = (iconv_t)(-1);
106 cdeh_88592_to_88591_indirectly.cd1 = cd_88592_to_utf8;
107 cdeh_88592_to_88591_indirectly.cd2 = cd_utf8_to_88591;
109 cdeh_ascii_to_utf8.cd = cd_ascii_to_utf8;
110 cdeh_ascii_to_utf8.cd1 = cd_ascii_to_utf8;
111 cdeh_ascii_to_utf8.cd2 = (iconv_t)(-1);
113 cdeh_88591_to_utf8.cd = cd_88591_to_utf8;
114 cdeh_88591_to_utf8.cd1 = cd_88591_to_utf8;
115 cdeh_88591_to_utf8.cd2 = (iconv_t)(-1);
117 cdeh_utf8_to_88591.cd = cd_utf8_to_88591;
118 cdeh_utf8_to_88591.cd1 = (iconv_t)(-1);
119 cdeh_utf8_to_88591.cd2 = cd_utf8_to_88591;
121 cdeh_utf7_to_utf8.cd = cd_utf7_to_utf8;
122 cdeh_utf7_to_utf8.cd1 = cd_utf7_to_utf8;
123 cdeh_utf7_to_utf8.cd2 = (iconv_t)(-1);
125 /* ------------------------ Test mem_cd_iconveh() ------------------------ */
127 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
128 for (indirect = 0; indirect <= 1; indirect++)
130 for (h = 0; h < SIZEOF (handlers); h++)
132 enum iconv_ilseq_handler handler = handlers[h];
133 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
134 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
135 for (o = 0; o < 2; o++)
137 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
140 int retval = mem_cd_iconveh (input, strlen (input),
142 ? &cdeh_88592_to_88591_indirectly
143 : &cdeh_88592_to_88591),
147 ASSERT (retval == 0);
148 ASSERT (length == strlen (expected));
149 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
152 for (i = 0; i < 37; i++)
153 ASSERT (offsets[i] == i);
154 ASSERT (offsets[37] == MAGIC);
162 /* Test conversion from ASCII to ISO-8859-1 with invalid input (EILSEQ). */
163 for (indirect = 0; indirect <= 1; indirect++)
165 for (h = 0; h < SIZEOF (handlers); h++)
167 enum iconv_ilseq_handler handler = handlers[h];
168 static const char input[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
169 for (o = 0; o < 2; o++)
171 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
174 int retval = mem_cd_iconveh (input, strlen (input),
176 ? &cdeh_ascii_to_88591_indirectly
177 : &cdeh_ascii_to_88591),
184 ASSERT (retval == -1 && errno == EILSEQ);
185 ASSERT (result == NULL);
189 case iconveh_question_mark:
190 case iconveh_escape_sequence:
192 static const char expected[] = "Rafa? Maszkowski";
193 ASSERT (retval == 0);
194 ASSERT (length == strlen (expected));
195 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
198 for (i = 0; i < 16; i++)
199 ASSERT (offsets[i] == i);
200 ASSERT (offsets[16] == MAGIC);
211 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
212 for (indirect = 0; indirect <= 1; indirect++)
214 for (h = 0; h < SIZEOF (handlers); h++)
216 enum iconv_ilseq_handler handler = handlers[h];
217 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
218 for (o = 0; o < 2; o++)
220 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
223 int retval = mem_cd_iconveh (input, strlen (input),
225 ? &cdeh_88592_to_88591_indirectly
226 : &cdeh_88592_to_88591),
233 ASSERT (retval == -1 && errno == EILSEQ);
234 ASSERT (result == NULL);
238 case iconveh_question_mark:
240 static const char expected[] = "Rafa? Maszkowski";
241 ASSERT (retval == 0);
242 ASSERT (length == strlen (expected));
243 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
246 for (i = 0; i < 16; i++)
247 ASSERT (offsets[i] == i);
248 ASSERT (offsets[16] == MAGIC);
254 case iconveh_escape_sequence:
256 static const char expected[] = "Rafa\\u0142 Maszkowski";
257 ASSERT (retval == 0);
258 ASSERT (length == strlen (expected));
259 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
262 for (i = 0; i < 16; i++)
263 ASSERT (offsets[i] == (i < 5 ? i :
265 ASSERT (offsets[16] == MAGIC);
276 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
277 for (h = 0; h < SIZEOF (handlers); h++)
279 enum iconv_ilseq_handler handler = handlers[h];
280 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
281 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
282 for (o = 0; o < 2; o++)
284 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
287 int retval = mem_cd_iconveh (input, strlen (input),
292 ASSERT (retval == 0);
293 ASSERT (length == strlen (expected));
294 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
297 for (i = 0; i < 37; i++)
298 ASSERT (offsets[i] == (i < 1 ? i :
302 ASSERT (offsets[37] == MAGIC);
309 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
310 for (h = 0; h < SIZEOF (handlers); h++)
312 enum iconv_ilseq_handler handler = handlers[h];
313 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
314 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
315 for (o = 0; o < 2; o++)
317 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
320 int retval = mem_cd_iconveh (input, strlen (input),
325 ASSERT (retval == 0);
326 ASSERT (length == strlen (expected));
327 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
330 for (i = 0; i < 41; i++)
331 ASSERT (offsets[i] == (i < 1 ? i :
332 i == 1 ? (size_t)(-1) :
334 i == 13 ? (size_t)(-1) :
336 i == 20 ? (size_t)(-1) :
339 ASSERT (offsets[41] == MAGIC);
346 /* Test conversion from ASCII to UTF-8 with invalid input (EILSEQ). */
347 for (h = 0; h < SIZEOF (handlers); h++)
349 enum iconv_ilseq_handler handler = handlers[h];
350 static const char input[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
351 for (o = 0; o < 2; o++)
353 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
356 int retval = mem_cd_iconveh (input, strlen (input),
364 ASSERT (retval == -1 && errno == EILSEQ);
365 ASSERT (result == NULL);
369 case iconveh_question_mark:
370 case iconveh_escape_sequence:
372 static const char expected[] = "Rafa? Maszkowski";
373 ASSERT (retval == 0);
374 ASSERT (length == strlen (expected));
375 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
378 for (i = 0; i < 16; i++)
379 ASSERT (offsets[i] == i);
380 ASSERT (offsets[16] == MAGIC);
390 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
391 for (h = 0; h < SIZEOF (handlers); h++)
393 enum iconv_ilseq_handler handler = handlers[h];
394 static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
395 for (o = 0; o < 2; o++)
397 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
400 int retval = mem_cd_iconveh (input, strlen (input),
408 ASSERT (retval == -1 && errno == EILSEQ);
409 ASSERT (result == NULL);
413 case iconveh_question_mark:
415 static const char expected[] = "Rafa? Maszkowski";
416 ASSERT (retval == 0);
417 ASSERT (length == strlen (expected));
418 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
421 for (i = 0; i < 17; i++)
422 ASSERT (offsets[i] == (i < 5 ? i :
423 i == 5 ? (size_t)(-1) :
425 ASSERT (offsets[17] == MAGIC);
431 case iconveh_escape_sequence:
433 static const char expected[] = "Rafa\\u0142 Maszkowski";
434 ASSERT (retval == 0);
435 ASSERT (length == strlen (expected));
436 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
439 for (i = 0; i < 17; i++)
440 ASSERT (offsets[i] == (i < 5 ? i :
441 i == 5 ? (size_t)(-1) :
443 ASSERT (offsets[17] == MAGIC);
453 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
454 for (h = 0; h < SIZEOF (handlers); h++)
456 enum iconv_ilseq_handler handler = handlers[h];
457 static const char input[] = "\342";
458 for (o = 0; o < 2; o++)
460 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
463 int retval = mem_cd_iconveh (input, strlen (input),
468 ASSERT (retval == 0);
469 ASSERT (length == 0);
472 ASSERT (offsets[0] == 0);
473 ASSERT (offsets[1] == MAGIC);
480 if (cd_utf7_to_utf8 != (iconv_t)(-1))
482 /* Disabled on Solaris, because Solaris 9 iconv() is buggy: it returns
483 -1 / EILSEQ when converting the 7th byte of the input "+VDLYP9hA". */
484 # if !(defined __sun && !defined _LIBICONV_VERSION)
485 /* Test conversion from UTF-7 to UTF-8 with EINVAL. */
486 for (h = 0; h < SIZEOF (handlers); h++)
488 enum iconv_ilseq_handler handler = handlers[h];
489 /* This is base64 encoded 0x54 0x32 0xD8 0x3F 0xD8 0x40. It would
490 convert to U+5432 U+D83F U+D840 but these are Unicode surrogates. */
491 static const char input[] = "+VDLYP9hA";
492 static const char expected1[] = "\345\220\262"; /* 吲 glibc */
493 static const char expected2[] = ""; /* libiconv */
496 int retval = mem_cd_iconveh (input, 7,
501 ASSERT (retval == 0);
502 ASSERT (length == strlen (expected1) || length == strlen (expected2));
503 ASSERT (result != NULL);
504 if (length == strlen (expected1))
505 ASSERT (memcmp (result, expected1, strlen (expected1)) == 0);
507 ASSERT (memcmp (result, expected2, strlen (expected2)) == 0);
511 /* Test conversion from UTF-7 to UTF-8 with EILSEQ. */
512 for (h = 0; h < SIZEOF (handlers); h++)
514 enum iconv_ilseq_handler handler = handlers[h];
515 /* This is base64 encoded 0xD8 0x3F 0xD8 0x40 0xD8 0x41. It would
516 convert to U+D83F U+D840 U+D841 but these are Unicode surrogates. */
517 static const char input[] = "+2D/YQNhB";
520 int retval = mem_cd_iconveh (input, strlen (input),
528 ASSERT (retval == -1 && errno == EILSEQ);
529 ASSERT (result == NULL);
531 case iconveh_question_mark:
532 case iconveh_escape_sequence:
535 static const char expected1[] = "?????";
536 /* libiconv <= 1.12 result */
537 static const char expected2[] = "?2D/YQNhB";
538 /* libiconv behaviour changed in version 1.13: the result is
539 '?' U+0FF6 U+1036; this is U+D83F U+D840 U+D841 shifted left
541 static const char expected3[] = "?\340\277\266\341\200\266";
542 ASSERT (retval == 0);
543 ASSERT (length == strlen (expected1)
544 || length == strlen (expected2)
545 || length == strlen (expected3));
546 ASSERT (result != NULL);
547 if (length == strlen (expected1))
548 ASSERT (memcmp (result, expected1, strlen (expected1)) == 0);
549 else if (length == strlen (expected2))
550 ASSERT (memcmp (result, expected2, strlen (expected2)) == 0);
552 ASSERT (memcmp (result, expected3, strlen (expected3)) == 0);
561 /* ------------------------ Test str_cd_iconveh() ------------------------ */
563 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
564 for (indirect = 0; indirect <= 1; indirect++)
566 for (h = 0; h < SIZEOF (handlers); h++)
568 enum iconv_ilseq_handler handler = handlers[h];
569 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
570 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
571 char *result = str_cd_iconveh (input,
573 ? &cdeh_88592_to_88591_indirectly
574 : &cdeh_88592_to_88591),
576 ASSERT (result != NULL);
577 ASSERT (strcmp (result, expected) == 0);
582 /* Test conversion from ASCII to ISO-8859-1 with invalid input (EILSEQ). */
583 for (indirect = 0; indirect <= 1; indirect++)
585 for (h = 0; h < SIZEOF (handlers); h++)
587 enum iconv_ilseq_handler handler = handlers[h];
588 static const char input[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
589 char *result = str_cd_iconveh (input,
591 ? &cdeh_ascii_to_88591_indirectly
592 : &cdeh_ascii_to_88591),
597 ASSERT (result == NULL && errno == EILSEQ);
599 case iconveh_question_mark:
600 case iconveh_escape_sequence:
602 static const char expected[] = "Rafa? Maszkowski";
603 ASSERT (result != NULL);
604 ASSERT (strcmp (result, expected) == 0);
612 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
613 for (indirect = 0; indirect <= 1; indirect++)
615 for (h = 0; h < SIZEOF (handlers); h++)
617 enum iconv_ilseq_handler handler = handlers[h];
618 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
619 char *result = str_cd_iconveh (input,
621 ? &cdeh_88592_to_88591_indirectly
622 : &cdeh_88592_to_88591),
627 ASSERT (result == NULL && errno == EILSEQ);
629 case iconveh_question_mark:
631 static const char expected[] = "Rafa? Maszkowski";
632 ASSERT (result != NULL);
633 ASSERT (strcmp (result, expected) == 0);
637 case iconveh_escape_sequence:
639 static const char expected[] = "Rafa\\u0142 Maszkowski";
640 ASSERT (result != NULL);
641 ASSERT (strcmp (result, expected) == 0);
649 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
650 for (h = 0; h < SIZEOF (handlers); h++)
652 enum iconv_ilseq_handler handler = handlers[h];
653 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
654 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
655 char *result = str_cd_iconveh (input,
658 ASSERT (result != NULL);
659 ASSERT (strcmp (result, expected) == 0);
663 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
664 for (h = 0; h < SIZEOF (handlers); h++)
666 enum iconv_ilseq_handler handler = handlers[h];
667 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
668 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
669 char *result = str_cd_iconveh (input,
672 ASSERT (result != NULL);
673 ASSERT (strcmp (result, expected) == 0);
677 /* Test conversion from ASCII to UTF-8 with invalid input (EILSEQ). */
678 for (h = 0; h < SIZEOF (handlers); h++)
680 enum iconv_ilseq_handler handler = handlers[h];
681 static const char input[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
682 char *result = str_cd_iconveh (input,
688 ASSERT (result == NULL && errno == EILSEQ);
690 case iconveh_question_mark:
691 case iconveh_escape_sequence:
693 static const char expected[] = "Rafa? Maszkowski";
694 ASSERT (result != NULL);
695 ASSERT (strcmp (result, expected) == 0);
702 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
703 for (h = 0; h < SIZEOF (handlers); h++)
705 enum iconv_ilseq_handler handler = handlers[h];
706 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
707 char *result = str_cd_iconveh (input,
713 ASSERT (result == NULL && errno == EILSEQ);
715 case iconveh_question_mark:
717 static const char expected[] = "Costs: 27 ?";
718 ASSERT (result != NULL);
719 ASSERT (strcmp (result, expected) == 0);
723 case iconveh_escape_sequence:
725 static const char expected[] = "Costs: 27 \\u20AC";
726 ASSERT (result != NULL);
727 ASSERT (strcmp (result, expected) == 0);
734 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
735 for (h = 0; h < SIZEOF (handlers); h++)
737 enum iconv_ilseq_handler handler = handlers[h];
738 static const char input[] = "\342";
739 char *result = str_cd_iconveh (input,
742 ASSERT (result != NULL);
743 ASSERT (strcmp (result, "") == 0);
747 if (cd_88591_to_88592 != (iconv_t)(-1))
748 iconv_close (cd_88591_to_88592);
749 if (cd_88592_to_88591 != (iconv_t)(-1))
750 iconv_close (cd_88592_to_88591);
751 iconv_close (cd_88591_to_utf8);
752 iconv_close (cd_utf8_to_88591);
753 iconv_close (cd_88592_to_utf8);
754 iconv_close (cd_utf8_to_88592);
756 /* ------------------------- Test mem_iconveh() ------------------------- */
758 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
759 for (h = 0; h < SIZEOF (handlers); h++)
761 enum iconv_ilseq_handler handler = handlers[h];
762 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
763 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
764 for (o = 0; o < 2; o++)
766 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
769 int retval = mem_iconveh (input, strlen (input),
770 "ISO-8859-2", "ISO-8859-1",
774 ASSERT (retval == 0);
775 ASSERT (length == strlen (expected));
776 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
779 for (i = 0; i < 37; i++)
780 ASSERT (offsets[i] == i);
781 ASSERT (offsets[37] == MAGIC);
788 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
789 for (h = 0; h < SIZEOF (handlers); h++)
791 enum iconv_ilseq_handler handler = handlers[h];
792 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
793 for (o = 0; o < 2; o++)
795 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
798 int retval = mem_iconveh (input, strlen (input),
799 "ISO-8859-2", "ISO-8859-1",
806 ASSERT (retval == -1 && errno == EILSEQ);
807 ASSERT (result == NULL);
811 case iconveh_question_mark:
813 static const char expected[] = "Rafa? Maszkowski";
814 ASSERT (retval == 0);
815 ASSERT (length == strlen (expected));
816 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
819 for (i = 0; i < 16; i++)
820 ASSERT (offsets[i] == i);
821 ASSERT (offsets[16] == MAGIC);
827 case iconveh_escape_sequence:
829 static const char expected[] = "Rafa\\u0142 Maszkowski";
830 ASSERT (retval == 0);
831 ASSERT (length == strlen (expected));
832 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
835 for (i = 0; i < 16; i++)
836 ASSERT (offsets[i] == (i < 5 ? i :
838 ASSERT (offsets[16] == MAGIC);
848 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
849 for (h = 0; h < SIZEOF (handlers); h++)
851 enum iconv_ilseq_handler handler = handlers[h];
852 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
853 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
854 for (o = 0; o < 2; o++)
856 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
859 int retval = mem_iconveh (input, strlen (input),
860 "ISO-8859-1", "UTF-8",
864 ASSERT (retval == 0);
865 ASSERT (length == strlen (expected));
866 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
869 for (i = 0; i < 37; i++)
870 ASSERT (offsets[i] == (i < 1 ? i :
874 ASSERT (offsets[37] == MAGIC);
881 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
882 for (h = 0; h < SIZEOF (handlers); h++)
884 enum iconv_ilseq_handler handler = handlers[h];
885 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
886 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
887 for (o = 0; o < 2; o++)
889 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
892 int retval = mem_iconveh (input, strlen (input),
893 "UTF-8", "ISO-8859-1",
897 ASSERT (retval == 0);
898 ASSERT (length == strlen (expected));
899 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
902 for (i = 0; i < 41; i++)
903 ASSERT (offsets[i] == (i < 1 ? i :
904 i == 1 ? (size_t)(-1) :
906 i == 13 ? (size_t)(-1) :
908 i == 20 ? (size_t)(-1) :
911 ASSERT (offsets[41] == MAGIC);
918 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
919 for (h = 0; h < SIZEOF (handlers); h++)
921 enum iconv_ilseq_handler handler = handlers[h];
922 static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
923 for (o = 0; o < 2; o++)
925 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
928 int retval = mem_iconveh (input, strlen (input),
929 "UTF-8", "ISO-8859-1",
936 ASSERT (retval == -1 && errno == EILSEQ);
937 ASSERT (result == NULL);
941 case iconveh_question_mark:
943 static const char expected[] = "Rafa? Maszkowski";
944 ASSERT (retval == 0);
945 ASSERT (length == strlen (expected));
946 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
949 for (i = 0; i < 17; i++)
950 ASSERT (offsets[i] == (i < 5 ? i :
951 i == 5 ? (size_t)(-1) :
953 ASSERT (offsets[17] == MAGIC);
959 case iconveh_escape_sequence:
961 static const char expected[] = "Rafa\\u0142 Maszkowski";
962 ASSERT (retval == 0);
963 ASSERT (length == strlen (expected));
964 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
967 for (i = 0; i < 17; i++)
968 ASSERT (offsets[i] == (i < 5 ? i :
969 i == 5 ? (size_t)(-1) :
971 ASSERT (offsets[17] == MAGIC);
981 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
982 for (h = 0; h < SIZEOF (handlers); h++)
984 enum iconv_ilseq_handler handler = handlers[h];
985 static const char input[] = "\342";
986 for (o = 0; o < 2; o++)
988 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
991 int retval = mem_iconveh (input, strlen (input),
992 "UTF-8", "ISO-8859-1",
996 ASSERT (retval == 0);
997 ASSERT (length == 0);
1000 ASSERT (offsets[0] == 0);
1001 ASSERT (offsets[1] == MAGIC);
1008 /* ------------------------- Test str_iconveh() ------------------------- */
1010 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
1011 for (h = 0; h < SIZEOF (handlers); h++)
1013 enum iconv_ilseq_handler handler = handlers[h];
1014 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1015 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1016 char *result = str_iconveh (input, "ISO-8859-2", "ISO-8859-1", handler);
1017 ASSERT (result != NULL);
1018 ASSERT (strcmp (result, expected) == 0);
1022 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
1023 for (h = 0; h < SIZEOF (handlers); h++)
1025 enum iconv_ilseq_handler handler = handlers[h];
1026 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
1027 char *result = str_iconveh (input, "ISO-8859-2", "ISO-8859-1", handler);
1031 ASSERT (result == NULL && errno == EILSEQ);
1033 case iconveh_question_mark:
1035 static const char expected[] = "Rafa? Maszkowski";
1036 ASSERT (result != NULL);
1037 ASSERT (strcmp (result, expected) == 0);
1041 case iconveh_escape_sequence:
1043 static const char expected[] = "Rafa\\u0142 Maszkowski";
1044 ASSERT (result != NULL);
1045 ASSERT (strcmp (result, expected) == 0);
1052 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
1053 for (h = 0; h < SIZEOF (handlers); h++)
1055 enum iconv_ilseq_handler handler = handlers[h];
1056 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1057 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
1058 char *result = str_iconveh (input, "ISO-8859-1", "UTF-8", handler);
1059 ASSERT (result != NULL);
1060 ASSERT (strcmp (result, expected) == 0);
1064 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
1065 for (h = 0; h < SIZEOF (handlers); h++)
1067 enum iconv_ilseq_handler handler = handlers[h];
1068 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
1069 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1070 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
1071 ASSERT (result != NULL);
1072 ASSERT (strcmp (result, expected) == 0);
1076 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
1077 for (h = 0; h < SIZEOF (handlers); h++)
1079 enum iconv_ilseq_handler handler = handlers[h];
1080 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
1081 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
1085 ASSERT (result == NULL && errno == EILSEQ);
1087 case iconveh_question_mark:
1089 static const char expected[] = "Costs: 27 ?";
1090 ASSERT (result != NULL);
1091 ASSERT (strcmp (result, expected) == 0);
1095 case iconveh_escape_sequence:
1097 static const char expected[] = "Costs: 27 \\u20AC";
1098 ASSERT (result != NULL);
1099 ASSERT (strcmp (result, expected) == 0);
1106 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
1107 for (h = 0; h < SIZEOF (handlers); h++)
1109 enum iconv_ilseq_handler handler = handlers[h];
1110 static const char input[] = "\342";
1111 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
1112 ASSERT (result != NULL);
1113 ASSERT (strcmp (result, "") == 0);