1 /* Test of character set conversion with error handling.
2 Copyright (C) 2007 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2007. */
21 #include "striconveh.h"
32 #define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
33 #define ASSERT(expr) \
38 fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
44 /* Magic number for detecting bounds violations. */
45 #define MAGIC 0x1983EFF1
48 new_offsets (size_t n)
50 size_t *offsets = (size_t *) malloc ((n + 1) * sizeof (size_t));
58 static enum iconv_ilseq_handler handlers[] =
59 { iconveh_error, iconveh_question_mark, iconveh_escape_sequence };
65 /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
66 ISO-8859-2, and UTF-8. */
67 iconv_t cd_88591_to_88592 = iconv_open ("ISO-8859-2", "ISO-8859-1");
68 iconv_t cd_88592_to_88591 = iconv_open ("ISO-8859-1", "ISO-8859-2");
69 iconv_t cd_88591_to_utf8 = iconv_open ("UTF-8", "ISO-8859-1");
70 iconv_t cd_utf8_to_88591 = iconv_open ("ISO-8859-1", "UTF-8");
71 iconv_t cd_88592_to_utf8 = iconv_open ("UTF-8", "ISO-8859-2");
72 iconv_t cd_utf8_to_88592 = iconv_open ("ISO-8859-2", "UTF-8");
74 ASSERT (cd_88591_to_utf8 != (iconv_t)(-1));
75 ASSERT (cd_utf8_to_88591 != (iconv_t)(-1));
76 ASSERT (cd_88592_to_utf8 != (iconv_t)(-1));
77 ASSERT (cd_utf8_to_88592 != (iconv_t)(-1));
79 /* ------------------------ Test mem_cd_iconveh() ------------------------ */
81 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
82 for (h = 0; h < SIZEOF (handlers); h++)
84 enum iconv_ilseq_handler handler = handlers[h];
85 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
86 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
87 for (o = 0; o < 2; o++)
89 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
92 int retval = mem_cd_iconveh (input, strlen (input),
94 cd_88592_to_utf8, cd_utf8_to_88591,
99 ASSERT (length == strlen (expected));
100 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
103 for (i = 0; i < 37; i++)
104 ASSERT (offsets[i] == i);
105 ASSERT (offsets[37] == MAGIC);
112 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
113 for (h = 0; h < SIZEOF (handlers); h++)
115 enum iconv_ilseq_handler handler = handlers[h];
116 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
117 for (o = 0; o < 2; o++)
119 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
122 int retval = mem_cd_iconveh (input, strlen (input),
124 cd_88592_to_utf8, cd_utf8_to_88591,
131 ASSERT (retval == -1 && errno == EILSEQ);
132 ASSERT (result == NULL);
136 case iconveh_question_mark:
138 static const char expected[] = "Rafa? Maszkowski";
139 ASSERT (retval == 0);
140 ASSERT (length == strlen (expected));
141 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
144 for (i = 0; i < 16; i++)
145 ASSERT (offsets[i] == i);
146 ASSERT (offsets[16] == MAGIC);
152 case iconveh_escape_sequence:
154 static const char expected[] = "Rafa\\u0142 Maszkowski";
155 ASSERT (retval == 0);
156 ASSERT (length == strlen (expected));
157 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
160 for (i = 0; i < 16; i++)
161 ASSERT (offsets[i] == (i < 5 ? i :
163 ASSERT (offsets[16] == MAGIC);
173 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
174 for (h = 0; h < SIZEOF (handlers); h++)
176 enum iconv_ilseq_handler handler = handlers[h];
177 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
178 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
179 for (o = 0; o < 2; o++)
181 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
184 int retval = mem_cd_iconveh (input, strlen (input),
186 cd_88591_to_utf8, (iconv_t)(-1),
190 ASSERT (retval == 0);
191 ASSERT (length == strlen (expected));
192 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
195 for (i = 0; i < 37; i++)
196 ASSERT (offsets[i] == (i < 1 ? i :
200 ASSERT (offsets[37] == MAGIC);
207 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
208 for (h = 0; h < SIZEOF (handlers); h++)
210 enum iconv_ilseq_handler handler = handlers[h];
211 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
212 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
213 for (o = 0; o < 2; o++)
215 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
218 int retval = mem_cd_iconveh (input, strlen (input),
220 (iconv_t)(-1), cd_utf8_to_88591,
224 ASSERT (retval == 0);
225 ASSERT (length == strlen (expected));
226 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
229 for (i = 0; i < 41; i++)
230 ASSERT (offsets[i] == (i < 1 ? i :
231 i == 1 ? (size_t)(-1) :
233 i == 13 ? (size_t)(-1) :
235 i == 20 ? (size_t)(-1) :
238 ASSERT (offsets[41] == MAGIC);
245 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
246 for (h = 0; h < SIZEOF (handlers); h++)
248 enum iconv_ilseq_handler handler = handlers[h];
249 static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
250 for (o = 0; o < 2; o++)
252 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
255 int retval = mem_cd_iconveh (input, strlen (input),
257 (iconv_t)(-1), cd_utf8_to_88591,
264 ASSERT (retval == -1 && errno == EILSEQ);
265 ASSERT (result == NULL);
269 case iconveh_question_mark:
271 static const char expected[] = "Rafa? Maszkowski";
272 ASSERT (retval == 0);
273 ASSERT (length == strlen (expected));
274 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
277 for (i = 0; i < 17; i++)
278 ASSERT (offsets[i] == (i < 5 ? i :
279 i == 5 ? (size_t)(-1) :
281 ASSERT (offsets[17] == MAGIC);
287 case iconveh_escape_sequence:
289 static const char expected[] = "Rafa\\u0142 Maszkowski";
290 ASSERT (retval == 0);
291 ASSERT (length == strlen (expected));
292 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
295 for (i = 0; i < 17; i++)
296 ASSERT (offsets[i] == (i < 5 ? i :
297 i == 5 ? (size_t)(-1) :
299 ASSERT (offsets[17] == MAGIC);
309 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
310 for (h = 0; h < SIZEOF (handlers); h++)
312 enum iconv_ilseq_handler handler = handlers[h];
313 static const char input[] = "\342";
314 for (o = 0; o < 2; o++)
316 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
319 int retval = mem_cd_iconveh (input, strlen (input),
321 (iconv_t)(-1), cd_utf8_to_88591,
325 ASSERT (retval == 0);
326 ASSERT (length == 0);
329 ASSERT (offsets[0] == 0);
330 ASSERT (offsets[1] == MAGIC);
337 /* ------------------------ Test str_cd_iconveh() ------------------------ */
339 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
340 for (h = 0; h < SIZEOF (handlers); h++)
342 enum iconv_ilseq_handler handler = handlers[h];
343 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
344 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
345 char *result = str_cd_iconveh (input,
347 cd_88592_to_utf8, cd_utf8_to_88591,
349 ASSERT (result != NULL);
350 ASSERT (strcmp (result, expected) == 0);
354 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
355 for (h = 0; h < SIZEOF (handlers); h++)
357 enum iconv_ilseq_handler handler = handlers[h];
358 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
359 char *result = str_cd_iconveh (input,
361 cd_88592_to_utf8, cd_utf8_to_88591,
366 ASSERT (result == NULL && errno == EILSEQ);
368 case iconveh_question_mark:
370 static const char expected[] = "Rafa? Maszkowski";
371 ASSERT (result != NULL);
372 ASSERT (strcmp (result, expected) == 0);
376 case iconveh_escape_sequence:
378 static const char expected[] = "Rafa\\u0142 Maszkowski";
379 ASSERT (result != NULL);
380 ASSERT (strcmp (result, expected) == 0);
387 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
388 for (h = 0; h < SIZEOF (handlers); h++)
390 enum iconv_ilseq_handler handler = handlers[h];
391 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
392 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
393 char *result = str_cd_iconveh (input,
395 cd_88591_to_utf8, (iconv_t)(-1),
397 ASSERT (result != NULL);
398 ASSERT (strcmp (result, expected) == 0);
402 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
403 for (h = 0; h < SIZEOF (handlers); h++)
405 enum iconv_ilseq_handler handler = handlers[h];
406 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
407 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
408 char *result = str_cd_iconveh (input,
410 (iconv_t)(-1), cd_utf8_to_88591,
412 ASSERT (result != NULL);
413 ASSERT (strcmp (result, expected) == 0);
417 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
418 for (h = 0; h < SIZEOF (handlers); h++)
420 enum iconv_ilseq_handler handler = handlers[h];
421 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
422 char *result = str_cd_iconveh (input,
424 (iconv_t)(-1), cd_utf8_to_88591,
429 ASSERT (result == NULL && errno == EILSEQ);
431 case iconveh_question_mark:
433 static const char expected[] = "Costs: 27 ?";
434 ASSERT (result != NULL);
435 ASSERT (strcmp (result, expected) == 0);
439 case iconveh_escape_sequence:
441 static const char expected[] = "Costs: 27 \\u20AC";
442 ASSERT (result != NULL);
443 ASSERT (strcmp (result, expected) == 0);
450 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
451 for (h = 0; h < SIZEOF (handlers); h++)
453 enum iconv_ilseq_handler handler = handlers[h];
454 static const char input[] = "\342";
455 char *result = str_cd_iconveh (input,
457 (iconv_t)(-1), cd_utf8_to_88591,
459 ASSERT (result != NULL);
460 ASSERT (strcmp (result, "") == 0);
464 if (cd_88591_to_88592 != (iconv_t)(-1))
465 iconv_close (cd_88591_to_88592);
466 if (cd_88592_to_88591 != (iconv_t)(-1))
467 iconv_close (cd_88592_to_88591);
468 iconv_close (cd_88591_to_utf8);
469 iconv_close (cd_utf8_to_88591);
470 iconv_close (cd_88592_to_utf8);
471 iconv_close (cd_utf8_to_88592);
473 /* ------------------------- Test mem_iconveh() ------------------------- */
475 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
476 for (h = 0; h < SIZEOF (handlers); h++)
478 enum iconv_ilseq_handler handler = handlers[h];
479 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
480 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
481 for (o = 0; o < 2; o++)
483 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
486 int retval = mem_iconveh (input, strlen (input),
487 "ISO-8859-2", "ISO-8859-1",
491 ASSERT (retval == 0);
492 ASSERT (length == strlen (expected));
493 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
496 for (i = 0; i < 37; i++)
497 ASSERT (offsets[i] == i);
498 ASSERT (offsets[37] == MAGIC);
505 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
506 for (h = 0; h < SIZEOF (handlers); h++)
508 enum iconv_ilseq_handler handler = handlers[h];
509 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
510 for (o = 0; o < 2; o++)
512 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
515 int retval = mem_iconveh (input, strlen (input),
516 "ISO-8859-2", "ISO-8859-1",
523 ASSERT (retval == -1 && errno == EILSEQ);
524 ASSERT (result == NULL);
528 case iconveh_question_mark:
530 static const char expected[] = "Rafa? Maszkowski";
531 ASSERT (retval == 0);
532 ASSERT (length == strlen (expected));
533 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
536 for (i = 0; i < 16; i++)
537 ASSERT (offsets[i] == i);
538 ASSERT (offsets[16] == MAGIC);
544 case iconveh_escape_sequence:
546 static const char expected[] = "Rafa\\u0142 Maszkowski";
547 ASSERT (retval == 0);
548 ASSERT (length == strlen (expected));
549 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
552 for (i = 0; i < 16; i++)
553 ASSERT (offsets[i] == (i < 5 ? i :
555 ASSERT (offsets[16] == MAGIC);
565 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
566 for (h = 0; h < SIZEOF (handlers); h++)
568 enum iconv_ilseq_handler handler = handlers[h];
569 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
570 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
571 for (o = 0; o < 2; o++)
573 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
576 int retval = mem_iconveh (input, strlen (input),
577 "ISO-8859-1", "UTF-8",
581 ASSERT (retval == 0);
582 ASSERT (length == strlen (expected));
583 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
586 for (i = 0; i < 37; i++)
587 ASSERT (offsets[i] == (i < 1 ? i :
591 ASSERT (offsets[37] == MAGIC);
598 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
599 for (h = 0; h < SIZEOF (handlers); h++)
601 enum iconv_ilseq_handler handler = handlers[h];
602 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
603 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
604 for (o = 0; o < 2; o++)
606 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
609 int retval = mem_iconveh (input, strlen (input),
610 "UTF-8", "ISO-8859-1",
614 ASSERT (retval == 0);
615 ASSERT (length == strlen (expected));
616 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
619 for (i = 0; i < 41; i++)
620 ASSERT (offsets[i] == (i < 1 ? i :
621 i == 1 ? (size_t)(-1) :
623 i == 13 ? (size_t)(-1) :
625 i == 20 ? (size_t)(-1) :
628 ASSERT (offsets[41] == MAGIC);
635 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
636 for (h = 0; h < SIZEOF (handlers); h++)
638 enum iconv_ilseq_handler handler = handlers[h];
639 static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
640 for (o = 0; o < 2; o++)
642 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
645 int retval = mem_iconveh (input, strlen (input),
646 "UTF-8", "ISO-8859-1",
653 ASSERT (retval == -1 && errno == EILSEQ);
654 ASSERT (result == NULL);
658 case iconveh_question_mark:
660 static const char expected[] = "Rafa? Maszkowski";
661 ASSERT (retval == 0);
662 ASSERT (length == strlen (expected));
663 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
666 for (i = 0; i < 17; i++)
667 ASSERT (offsets[i] == (i < 5 ? i :
668 i == 5 ? (size_t)(-1) :
670 ASSERT (offsets[17] == MAGIC);
676 case iconveh_escape_sequence:
678 static const char expected[] = "Rafa\\u0142 Maszkowski";
679 ASSERT (retval == 0);
680 ASSERT (length == strlen (expected));
681 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
684 for (i = 0; i < 17; i++)
685 ASSERT (offsets[i] == (i < 5 ? i :
686 i == 5 ? (size_t)(-1) :
688 ASSERT (offsets[17] == MAGIC);
698 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
699 for (h = 0; h < SIZEOF (handlers); h++)
701 enum iconv_ilseq_handler handler = handlers[h];
702 static const char input[] = "\342";
703 for (o = 0; o < 2; o++)
705 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
708 int retval = mem_iconveh (input, strlen (input),
709 "UTF-8", "ISO-8859-1",
713 ASSERT (retval == 0);
714 ASSERT (length == 0);
717 ASSERT (offsets[0] == 0);
718 ASSERT (offsets[1] == MAGIC);
725 /* ------------------------- Test str_iconveh() ------------------------- */
727 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
728 for (h = 0; h < SIZEOF (handlers); h++)
730 enum iconv_ilseq_handler handler = handlers[h];
731 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
732 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
733 char *result = str_iconveh (input, "ISO-8859-2", "ISO-8859-1", handler);
734 ASSERT (result != NULL);
735 ASSERT (strcmp (result, expected) == 0);
739 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
740 for (h = 0; h < SIZEOF (handlers); h++)
742 enum iconv_ilseq_handler handler = handlers[h];
743 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
744 char *result = str_iconveh (input, "ISO-8859-2", "ISO-8859-1", handler);
748 ASSERT (result == NULL && errno == EILSEQ);
750 case iconveh_question_mark:
752 static const char expected[] = "Rafa? Maszkowski";
753 ASSERT (result != NULL);
754 ASSERT (strcmp (result, expected) == 0);
758 case iconveh_escape_sequence:
760 static const char expected[] = "Rafa\\u0142 Maszkowski";
761 ASSERT (result != NULL);
762 ASSERT (strcmp (result, expected) == 0);
769 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
770 for (h = 0; h < SIZEOF (handlers); h++)
772 enum iconv_ilseq_handler handler = handlers[h];
773 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
774 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
775 char *result = str_iconveh (input, "ISO-8859-1", "UTF-8", handler);
776 ASSERT (result != NULL);
777 ASSERT (strcmp (result, expected) == 0);
781 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
782 for (h = 0; h < SIZEOF (handlers); h++)
784 enum iconv_ilseq_handler handler = handlers[h];
785 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
786 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
787 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
788 ASSERT (result != NULL);
789 ASSERT (strcmp (result, expected) == 0);
793 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
794 for (h = 0; h < SIZEOF (handlers); h++)
796 enum iconv_ilseq_handler handler = handlers[h];
797 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
798 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
802 ASSERT (result == NULL && errno == EILSEQ);
804 case iconveh_question_mark:
806 static const char expected[] = "Costs: 27 ?";
807 ASSERT (result != NULL);
808 ASSERT (strcmp (result, expected) == 0);
812 case iconveh_escape_sequence:
814 static const char expected[] = "Costs: 27 \\u20AC";
815 ASSERT (result != NULL);
816 ASSERT (strcmp (result, expected) == 0);
823 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
824 for (h = 0; h < SIZEOF (handlers); h++)
826 enum iconv_ilseq_handler handler = handlers[h];
827 static const char input[] = "\342";
828 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
829 ASSERT (result != NULL);
830 ASSERT (strcmp (result, "") == 0);