1 /* Test of character set conversion with error handling.
2 Copyright (C) 2007-2008 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2007. */
21 #include "striconveh.h"
32 #define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
33 #define ASSERT(expr) \
38 fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
45 /* Magic number for detecting bounds violations. */
46 #define MAGIC 0x1983EFF1
49 new_offsets (size_t n)
51 size_t *offsets = (size_t *) malloc ((n + 1) * sizeof (size_t));
59 static enum iconv_ilseq_handler handlers[] =
60 { iconveh_error, iconveh_question_mark, iconveh_escape_sequence };
66 /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
67 ISO-8859-2, and UTF-8. */
68 iconv_t cd_88591_to_88592 = iconv_open ("ISO-8859-2", "ISO-8859-1");
69 iconv_t cd_88592_to_88591 = iconv_open ("ISO-8859-1", "ISO-8859-2");
70 iconv_t cd_88591_to_utf8 = iconv_open ("UTF-8", "ISO-8859-1");
71 iconv_t cd_utf8_to_88591 = iconv_open ("ISO-8859-1", "UTF-8");
72 iconv_t cd_88592_to_utf8 = iconv_open ("UTF-8", "ISO-8859-2");
73 iconv_t cd_utf8_to_88592 = iconv_open ("ISO-8859-2", "UTF-8");
75 ASSERT (cd_88591_to_utf8 != (iconv_t)(-1));
76 ASSERT (cd_utf8_to_88591 != (iconv_t)(-1));
77 ASSERT (cd_88592_to_utf8 != (iconv_t)(-1));
78 ASSERT (cd_utf8_to_88592 != (iconv_t)(-1));
80 /* ------------------------ Test mem_cd_iconveh() ------------------------ */
82 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
83 for (h = 0; h < SIZEOF (handlers); h++)
85 enum iconv_ilseq_handler handler = handlers[h];
86 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
87 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
88 for (o = 0; o < 2; o++)
90 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
93 int retval = mem_cd_iconveh (input, strlen (input),
95 cd_88592_to_utf8, cd_utf8_to_88591,
100 ASSERT (length == strlen (expected));
101 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
104 for (i = 0; i < 37; i++)
105 ASSERT (offsets[i] == i);
106 ASSERT (offsets[37] == MAGIC);
113 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
114 for (h = 0; h < SIZEOF (handlers); h++)
116 enum iconv_ilseq_handler handler = handlers[h];
117 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
118 for (o = 0; o < 2; o++)
120 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
123 int retval = mem_cd_iconveh (input, strlen (input),
125 cd_88592_to_utf8, cd_utf8_to_88591,
132 ASSERT (retval == -1 && errno == EILSEQ);
133 ASSERT (result == NULL);
137 case iconveh_question_mark:
139 static const char expected[] = "Rafa? Maszkowski";
140 ASSERT (retval == 0);
141 ASSERT (length == strlen (expected));
142 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
145 for (i = 0; i < 16; i++)
146 ASSERT (offsets[i] == i);
147 ASSERT (offsets[16] == MAGIC);
153 case iconveh_escape_sequence:
155 static const char expected[] = "Rafa\\u0142 Maszkowski";
156 ASSERT (retval == 0);
157 ASSERT (length == strlen (expected));
158 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
161 for (i = 0; i < 16; i++)
162 ASSERT (offsets[i] == (i < 5 ? i :
164 ASSERT (offsets[16] == MAGIC);
174 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
175 for (h = 0; h < SIZEOF (handlers); h++)
177 enum iconv_ilseq_handler handler = handlers[h];
178 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
179 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
180 for (o = 0; o < 2; o++)
182 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
185 int retval = mem_cd_iconveh (input, strlen (input),
187 cd_88591_to_utf8, (iconv_t)(-1),
191 ASSERT (retval == 0);
192 ASSERT (length == strlen (expected));
193 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
196 for (i = 0; i < 37; i++)
197 ASSERT (offsets[i] == (i < 1 ? i :
201 ASSERT (offsets[37] == MAGIC);
208 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
209 for (h = 0; h < SIZEOF (handlers); h++)
211 enum iconv_ilseq_handler handler = handlers[h];
212 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
213 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
214 for (o = 0; o < 2; o++)
216 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
219 int retval = mem_cd_iconveh (input, strlen (input),
221 (iconv_t)(-1), cd_utf8_to_88591,
225 ASSERT (retval == 0);
226 ASSERT (length == strlen (expected));
227 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
230 for (i = 0; i < 41; i++)
231 ASSERT (offsets[i] == (i < 1 ? i :
232 i == 1 ? (size_t)(-1) :
234 i == 13 ? (size_t)(-1) :
236 i == 20 ? (size_t)(-1) :
239 ASSERT (offsets[41] == MAGIC);
246 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
247 for (h = 0; h < SIZEOF (handlers); h++)
249 enum iconv_ilseq_handler handler = handlers[h];
250 static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
251 for (o = 0; o < 2; o++)
253 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
256 int retval = mem_cd_iconveh (input, strlen (input),
258 (iconv_t)(-1), cd_utf8_to_88591,
265 ASSERT (retval == -1 && errno == EILSEQ);
266 ASSERT (result == NULL);
270 case iconveh_question_mark:
272 static const char expected[] = "Rafa? Maszkowski";
273 ASSERT (retval == 0);
274 ASSERT (length == strlen (expected));
275 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
278 for (i = 0; i < 17; i++)
279 ASSERT (offsets[i] == (i < 5 ? i :
280 i == 5 ? (size_t)(-1) :
282 ASSERT (offsets[17] == MAGIC);
288 case iconveh_escape_sequence:
290 static const char expected[] = "Rafa\\u0142 Maszkowski";
291 ASSERT (retval == 0);
292 ASSERT (length == strlen (expected));
293 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
296 for (i = 0; i < 17; i++)
297 ASSERT (offsets[i] == (i < 5 ? i :
298 i == 5 ? (size_t)(-1) :
300 ASSERT (offsets[17] == MAGIC);
310 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
311 for (h = 0; h < SIZEOF (handlers); h++)
313 enum iconv_ilseq_handler handler = handlers[h];
314 static const char input[] = "\342";
315 for (o = 0; o < 2; o++)
317 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
320 int retval = mem_cd_iconveh (input, strlen (input),
322 (iconv_t)(-1), cd_utf8_to_88591,
326 ASSERT (retval == 0);
327 ASSERT (length == 0);
330 ASSERT (offsets[0] == 0);
331 ASSERT (offsets[1] == MAGIC);
338 /* ------------------------ Test str_cd_iconveh() ------------------------ */
340 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
341 for (h = 0; h < SIZEOF (handlers); h++)
343 enum iconv_ilseq_handler handler = handlers[h];
344 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
345 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
346 char *result = str_cd_iconveh (input,
348 cd_88592_to_utf8, cd_utf8_to_88591,
350 ASSERT (result != NULL);
351 ASSERT (strcmp (result, expected) == 0);
355 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
356 for (h = 0; h < SIZEOF (handlers); h++)
358 enum iconv_ilseq_handler handler = handlers[h];
359 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
360 char *result = str_cd_iconveh (input,
362 cd_88592_to_utf8, cd_utf8_to_88591,
367 ASSERT (result == NULL && errno == EILSEQ);
369 case iconveh_question_mark:
371 static const char expected[] = "Rafa? Maszkowski";
372 ASSERT (result != NULL);
373 ASSERT (strcmp (result, expected) == 0);
377 case iconveh_escape_sequence:
379 static const char expected[] = "Rafa\\u0142 Maszkowski";
380 ASSERT (result != NULL);
381 ASSERT (strcmp (result, expected) == 0);
388 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
389 for (h = 0; h < SIZEOF (handlers); h++)
391 enum iconv_ilseq_handler handler = handlers[h];
392 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
393 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
394 char *result = str_cd_iconveh (input,
396 cd_88591_to_utf8, (iconv_t)(-1),
398 ASSERT (result != NULL);
399 ASSERT (strcmp (result, expected) == 0);
403 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
404 for (h = 0; h < SIZEOF (handlers); h++)
406 enum iconv_ilseq_handler handler = handlers[h];
407 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
408 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
409 char *result = str_cd_iconveh (input,
411 (iconv_t)(-1), cd_utf8_to_88591,
413 ASSERT (result != NULL);
414 ASSERT (strcmp (result, expected) == 0);
418 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
419 for (h = 0; h < SIZEOF (handlers); h++)
421 enum iconv_ilseq_handler handler = handlers[h];
422 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
423 char *result = str_cd_iconveh (input,
425 (iconv_t)(-1), cd_utf8_to_88591,
430 ASSERT (result == NULL && errno == EILSEQ);
432 case iconveh_question_mark:
434 static const char expected[] = "Costs: 27 ?";
435 ASSERT (result != NULL);
436 ASSERT (strcmp (result, expected) == 0);
440 case iconveh_escape_sequence:
442 static const char expected[] = "Costs: 27 \\u20AC";
443 ASSERT (result != NULL);
444 ASSERT (strcmp (result, expected) == 0);
451 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
452 for (h = 0; h < SIZEOF (handlers); h++)
454 enum iconv_ilseq_handler handler = handlers[h];
455 static const char input[] = "\342";
456 char *result = str_cd_iconveh (input,
458 (iconv_t)(-1), cd_utf8_to_88591,
460 ASSERT (result != NULL);
461 ASSERT (strcmp (result, "") == 0);
465 if (cd_88591_to_88592 != (iconv_t)(-1))
466 iconv_close (cd_88591_to_88592);
467 if (cd_88592_to_88591 != (iconv_t)(-1))
468 iconv_close (cd_88592_to_88591);
469 iconv_close (cd_88591_to_utf8);
470 iconv_close (cd_utf8_to_88591);
471 iconv_close (cd_88592_to_utf8);
472 iconv_close (cd_utf8_to_88592);
474 /* ------------------------- Test mem_iconveh() ------------------------- */
476 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
477 for (h = 0; h < SIZEOF (handlers); h++)
479 enum iconv_ilseq_handler handler = handlers[h];
480 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
481 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
482 for (o = 0; o < 2; o++)
484 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
487 int retval = mem_iconveh (input, strlen (input),
488 "ISO-8859-2", "ISO-8859-1",
492 ASSERT (retval == 0);
493 ASSERT (length == strlen (expected));
494 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
497 for (i = 0; i < 37; i++)
498 ASSERT (offsets[i] == i);
499 ASSERT (offsets[37] == MAGIC);
506 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
507 for (h = 0; h < SIZEOF (handlers); h++)
509 enum iconv_ilseq_handler handler = handlers[h];
510 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
511 for (o = 0; o < 2; o++)
513 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
516 int retval = mem_iconveh (input, strlen (input),
517 "ISO-8859-2", "ISO-8859-1",
524 ASSERT (retval == -1 && errno == EILSEQ);
525 ASSERT (result == NULL);
529 case iconveh_question_mark:
531 static const char expected[] = "Rafa? Maszkowski";
532 ASSERT (retval == 0);
533 ASSERT (length == strlen (expected));
534 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
537 for (i = 0; i < 16; i++)
538 ASSERT (offsets[i] == i);
539 ASSERT (offsets[16] == MAGIC);
545 case iconveh_escape_sequence:
547 static const char expected[] = "Rafa\\u0142 Maszkowski";
548 ASSERT (retval == 0);
549 ASSERT (length == strlen (expected));
550 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
553 for (i = 0; i < 16; i++)
554 ASSERT (offsets[i] == (i < 5 ? i :
556 ASSERT (offsets[16] == MAGIC);
566 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
567 for (h = 0; h < SIZEOF (handlers); h++)
569 enum iconv_ilseq_handler handler = handlers[h];
570 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
571 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
572 for (o = 0; o < 2; o++)
574 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
577 int retval = mem_iconveh (input, strlen (input),
578 "ISO-8859-1", "UTF-8",
582 ASSERT (retval == 0);
583 ASSERT (length == strlen (expected));
584 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
587 for (i = 0; i < 37; i++)
588 ASSERT (offsets[i] == (i < 1 ? i :
592 ASSERT (offsets[37] == MAGIC);
599 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
600 for (h = 0; h < SIZEOF (handlers); h++)
602 enum iconv_ilseq_handler handler = handlers[h];
603 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
604 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
605 for (o = 0; o < 2; o++)
607 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
610 int retval = mem_iconveh (input, strlen (input),
611 "UTF-8", "ISO-8859-1",
615 ASSERT (retval == 0);
616 ASSERT (length == strlen (expected));
617 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
620 for (i = 0; i < 41; i++)
621 ASSERT (offsets[i] == (i < 1 ? i :
622 i == 1 ? (size_t)(-1) :
624 i == 13 ? (size_t)(-1) :
626 i == 20 ? (size_t)(-1) :
629 ASSERT (offsets[41] == MAGIC);
636 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
637 for (h = 0; h < SIZEOF (handlers); h++)
639 enum iconv_ilseq_handler handler = handlers[h];
640 static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
641 for (o = 0; o < 2; o++)
643 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
646 int retval = mem_iconveh (input, strlen (input),
647 "UTF-8", "ISO-8859-1",
654 ASSERT (retval == -1 && errno == EILSEQ);
655 ASSERT (result == NULL);
659 case iconveh_question_mark:
661 static const char expected[] = "Rafa? Maszkowski";
662 ASSERT (retval == 0);
663 ASSERT (length == strlen (expected));
664 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
667 for (i = 0; i < 17; i++)
668 ASSERT (offsets[i] == (i < 5 ? i :
669 i == 5 ? (size_t)(-1) :
671 ASSERT (offsets[17] == MAGIC);
677 case iconveh_escape_sequence:
679 static const char expected[] = "Rafa\\u0142 Maszkowski";
680 ASSERT (retval == 0);
681 ASSERT (length == strlen (expected));
682 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
685 for (i = 0; i < 17; i++)
686 ASSERT (offsets[i] == (i < 5 ? i :
687 i == 5 ? (size_t)(-1) :
689 ASSERT (offsets[17] == MAGIC);
699 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
700 for (h = 0; h < SIZEOF (handlers); h++)
702 enum iconv_ilseq_handler handler = handlers[h];
703 static const char input[] = "\342";
704 for (o = 0; o < 2; o++)
706 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
709 int retval = mem_iconveh (input, strlen (input),
710 "UTF-8", "ISO-8859-1",
714 ASSERT (retval == 0);
715 ASSERT (length == 0);
718 ASSERT (offsets[0] == 0);
719 ASSERT (offsets[1] == MAGIC);
726 /* ------------------------- Test str_iconveh() ------------------------- */
728 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
729 for (h = 0; h < SIZEOF (handlers); h++)
731 enum iconv_ilseq_handler handler = handlers[h];
732 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
733 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
734 char *result = str_iconveh (input, "ISO-8859-2", "ISO-8859-1", handler);
735 ASSERT (result != NULL);
736 ASSERT (strcmp (result, expected) == 0);
740 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
741 for (h = 0; h < SIZEOF (handlers); h++)
743 enum iconv_ilseq_handler handler = handlers[h];
744 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
745 char *result = str_iconveh (input, "ISO-8859-2", "ISO-8859-1", handler);
749 ASSERT (result == NULL && errno == EILSEQ);
751 case iconveh_question_mark:
753 static const char expected[] = "Rafa? Maszkowski";
754 ASSERT (result != NULL);
755 ASSERT (strcmp (result, expected) == 0);
759 case iconveh_escape_sequence:
761 static const char expected[] = "Rafa\\u0142 Maszkowski";
762 ASSERT (result != NULL);
763 ASSERT (strcmp (result, expected) == 0);
770 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
771 for (h = 0; h < SIZEOF (handlers); h++)
773 enum iconv_ilseq_handler handler = handlers[h];
774 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
775 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
776 char *result = str_iconveh (input, "ISO-8859-1", "UTF-8", handler);
777 ASSERT (result != NULL);
778 ASSERT (strcmp (result, expected) == 0);
782 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
783 for (h = 0; h < SIZEOF (handlers); h++)
785 enum iconv_ilseq_handler handler = handlers[h];
786 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
787 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
788 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
789 ASSERT (result != NULL);
790 ASSERT (strcmp (result, expected) == 0);
794 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
795 for (h = 0; h < SIZEOF (handlers); h++)
797 enum iconv_ilseq_handler handler = handlers[h];
798 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
799 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
803 ASSERT (result == NULL && errno == EILSEQ);
805 case iconveh_question_mark:
807 static const char expected[] = "Costs: 27 ?";
808 ASSERT (result != NULL);
809 ASSERT (strcmp (result, expected) == 0);
813 case iconveh_escape_sequence:
815 static const char expected[] = "Costs: 27 \\u20AC";
816 ASSERT (result != NULL);
817 ASSERT (strcmp (result, expected) == 0);
824 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
825 for (h = 0; h < SIZEOF (handlers); h++)
827 enum iconv_ilseq_handler handler = handlers[h];
828 static const char input[] = "\342";
829 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
830 ASSERT (result != NULL);
831 ASSERT (strcmp (result, "") == 0);