1 /* Test of character set conversion with error handling.
2 Copyright (C) 2007 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18 /* Written by Bruno Haible <bruno@clisp.org>, 2007. */
22 #include "striconveh.h"
33 #define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
34 #define ASSERT(expr) \
39 fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
45 /* Magic number for detecting bounds violations. */
46 #define MAGIC 0x1983EFF1
49 new_offsets (size_t n)
51 size_t *offsets = (size_t *) malloc ((n + 1) * sizeof (size_t));
59 static enum iconv_ilseq_handler handlers[] =
60 { iconveh_error, iconveh_question_mark, iconveh_escape_sequence };
66 /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
67 ISO-8859-2, and UTF-8. */
68 iconv_t cd_88591_to_88592 = iconv_open ("ISO-8859-2", "ISO-8859-1");
69 iconv_t cd_88592_to_88591 = iconv_open ("ISO-8859-1", "ISO-8859-2");
70 iconv_t cd_88591_to_utf8 = iconv_open ("UTF-8", "ISO-8859-1");
71 iconv_t cd_utf8_to_88591 = iconv_open ("ISO-8859-1", "UTF-8");
72 iconv_t cd_88592_to_utf8 = iconv_open ("UTF-8", "ISO-8859-2");
73 iconv_t cd_utf8_to_88592 = iconv_open ("ISO-8859-2", "UTF-8");
75 ASSERT (cd_88591_to_utf8 != (iconv_t)(-1));
76 ASSERT (cd_utf8_to_88591 != (iconv_t)(-1));
77 ASSERT (cd_88592_to_utf8 != (iconv_t)(-1));
78 ASSERT (cd_utf8_to_88592 != (iconv_t)(-1));
80 /* ------------------------ Test mem_cd_iconveh() ------------------------ */
82 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
83 for (h = 0; h < SIZEOF (handlers); h++)
85 enum iconv_ilseq_handler handler = handlers[h];
86 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
87 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
88 for (o = 0; o < 2; o++)
90 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
93 int retval = mem_cd_iconveh (input, strlen (input),
95 cd_88592_to_utf8, cd_utf8_to_88591,
100 ASSERT (length == strlen (expected));
101 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
104 for (i = 0; i < 37; i++)
105 ASSERT (offsets[i] == i);
106 ASSERT (offsets[37] == MAGIC);
113 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
114 for (h = 0; h < SIZEOF (handlers); h++)
116 enum iconv_ilseq_handler handler = handlers[h];
117 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
118 for (o = 0; o < 2; o++)
120 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
123 int retval = mem_cd_iconveh (input, strlen (input),
125 cd_88592_to_utf8, cd_utf8_to_88591,
132 ASSERT (retval == -1 && errno == EILSEQ);
133 ASSERT (result == NULL);
137 case iconveh_question_mark:
139 static const char expected[] = "Rafa? Maszkowski";
140 ASSERT (retval == 0);
141 ASSERT (length == strlen (expected));
142 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
145 for (i = 0; i < 16; i++)
146 ASSERT (offsets[i] == i);
147 ASSERT (offsets[16] == MAGIC);
153 case iconveh_escape_sequence:
155 static const char expected[] = "Rafa\\u0142 Maszkowski";
156 ASSERT (retval == 0);
157 ASSERT (length == strlen (expected));
158 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
161 for (i = 0; i < 16; i++)
162 ASSERT (offsets[i] == (i < 5 ? i :
164 ASSERT (offsets[16] == MAGIC);
174 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
175 for (h = 0; h < SIZEOF (handlers); h++)
177 enum iconv_ilseq_handler handler = handlers[h];
178 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
179 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
180 for (o = 0; o < 2; o++)
182 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
185 int retval = mem_cd_iconveh (input, strlen (input),
187 cd_88591_to_utf8, (iconv_t)(-1),
191 ASSERT (retval == 0);
192 ASSERT (length == strlen (expected));
193 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
196 for (i = 0; i < 37; i++)
197 ASSERT (offsets[i] == (i < 1 ? i :
201 ASSERT (offsets[37] == MAGIC);
208 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
209 for (h = 0; h < SIZEOF (handlers); h++)
211 enum iconv_ilseq_handler handler = handlers[h];
212 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
213 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
214 for (o = 0; o < 2; o++)
216 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
219 int retval = mem_cd_iconveh (input, strlen (input),
221 (iconv_t)(-1), cd_utf8_to_88591,
225 ASSERT (retval == 0);
226 ASSERT (length == strlen (expected));
227 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
230 for (i = 0; i < 41; i++)
231 ASSERT (offsets[i] == (i < 1 ? i :
232 i == 1 ? (size_t)(-1) :
234 i == 13 ? (size_t)(-1) :
236 i == 20 ? (size_t)(-1) :
239 ASSERT (offsets[41] == MAGIC);
246 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
247 for (h = 0; h < SIZEOF (handlers); h++)
249 enum iconv_ilseq_handler handler = handlers[h];
250 static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
251 for (o = 0; o < 2; o++)
253 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
256 int retval = mem_cd_iconveh (input, strlen (input),
258 (iconv_t)(-1), cd_utf8_to_88591,
265 ASSERT (retval == -1 && errno == EILSEQ);
266 ASSERT (result == NULL);
270 case iconveh_question_mark:
272 static const char expected[] = "Rafa? Maszkowski";
273 ASSERT (retval == 0);
274 ASSERT (length == strlen (expected));
275 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
278 for (i = 0; i < 17; i++)
279 ASSERT (offsets[i] == (i < 5 ? i :
280 i == 5 ? (size_t)(-1) :
282 ASSERT (offsets[17] == MAGIC);
288 case iconveh_escape_sequence:
290 static const char expected[] = "Rafa\\u0142 Maszkowski";
291 ASSERT (retval == 0);
292 ASSERT (length == strlen (expected));
293 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
296 for (i = 0; i < 17; i++)
297 ASSERT (offsets[i] == (i < 5 ? i :
298 i == 5 ? (size_t)(-1) :
300 ASSERT (offsets[17] == MAGIC);
310 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
311 for (h = 0; h < SIZEOF (handlers); h++)
313 enum iconv_ilseq_handler handler = handlers[h];
314 static const char input[] = "\342";
315 for (o = 0; o < 2; o++)
317 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
320 int retval = mem_cd_iconveh (input, strlen (input),
322 (iconv_t)(-1), cd_utf8_to_88591,
326 ASSERT (retval == 0);
327 ASSERT (length == 0);
330 ASSERT (offsets[0] == 0);
331 ASSERT (offsets[1] == MAGIC);
339 /* ------------------------ Test str_cd_iconveh() ------------------------ */
341 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
342 for (h = 0; h < SIZEOF (handlers); h++)
344 enum iconv_ilseq_handler handler = handlers[h];
345 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
346 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
347 char *result = str_cd_iconveh (input,
349 cd_88592_to_utf8, cd_utf8_to_88591,
351 ASSERT (result != NULL);
352 ASSERT (strcmp (result, expected) == 0);
356 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
357 for (h = 0; h < SIZEOF (handlers); h++)
359 enum iconv_ilseq_handler handler = handlers[h];
360 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
361 char *result = str_cd_iconveh (input,
363 cd_88592_to_utf8, cd_utf8_to_88591,
368 ASSERT (result == NULL && errno == EILSEQ);
370 case iconveh_question_mark:
372 static const char expected[] = "Rafa? Maszkowski";
373 ASSERT (result != NULL);
374 ASSERT (strcmp (result, expected) == 0);
378 case iconveh_escape_sequence:
380 static const char expected[] = "Rafa\\u0142 Maszkowski";
381 ASSERT (result != NULL);
382 ASSERT (strcmp (result, expected) == 0);
389 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
390 for (h = 0; h < SIZEOF (handlers); h++)
392 enum iconv_ilseq_handler handler = handlers[h];
393 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
394 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
395 char *result = str_cd_iconveh (input,
397 cd_88591_to_utf8, (iconv_t)(-1),
399 ASSERT (result != NULL);
400 ASSERT (strcmp (result, expected) == 0);
404 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
405 for (h = 0; h < SIZEOF (handlers); h++)
407 enum iconv_ilseq_handler handler = handlers[h];
408 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
409 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
410 char *result = str_cd_iconveh (input,
412 (iconv_t)(-1), cd_utf8_to_88591,
414 ASSERT (result != NULL);
415 ASSERT (strcmp (result, expected) == 0);
419 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
420 for (h = 0; h < SIZEOF (handlers); h++)
422 enum iconv_ilseq_handler handler = handlers[h];
423 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
424 char *result = str_cd_iconveh (input,
426 (iconv_t)(-1), cd_utf8_to_88591,
431 ASSERT (result == NULL && errno == EILSEQ);
433 case iconveh_question_mark:
435 static const char expected[] = "Costs: 27 ?";
436 ASSERT (result != NULL);
437 ASSERT (strcmp (result, expected) == 0);
441 case iconveh_escape_sequence:
443 static const char expected[] = "Costs: 27 \\u20AC";
444 ASSERT (result != NULL);
445 ASSERT (strcmp (result, expected) == 0);
452 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
453 for (h = 0; h < SIZEOF (handlers); h++)
455 enum iconv_ilseq_handler handler = handlers[h];
456 static const char input[] = "\342";
457 char *result = str_cd_iconveh (input,
459 (iconv_t)(-1), cd_utf8_to_88591,
461 ASSERT (result != NULL);
462 ASSERT (strcmp (result, "") == 0);
466 if (cd_88591_to_88592 != (iconv_t)(-1))
467 iconv_close (cd_88591_to_88592);
468 if (cd_88592_to_88591 != (iconv_t)(-1))
469 iconv_close (cd_88592_to_88591);
470 iconv_close (cd_88591_to_utf8);
471 iconv_close (cd_utf8_to_88591);
472 iconv_close (cd_88592_to_utf8);
473 iconv_close (cd_utf8_to_88592);
475 /* ------------------------- Test mem_iconveh() ------------------------- */
477 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
478 for (h = 0; h < SIZEOF (handlers); h++)
480 enum iconv_ilseq_handler handler = handlers[h];
481 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
482 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
483 for (o = 0; o < 2; o++)
485 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
488 int retval = mem_iconveh (input, strlen (input),
489 "ISO-8859-2", "ISO-8859-1",
493 ASSERT (retval == 0);
494 ASSERT (length == strlen (expected));
495 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
498 for (i = 0; i < 37; i++)
499 ASSERT (offsets[i] == i);
500 ASSERT (offsets[37] == MAGIC);
507 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
508 for (h = 0; h < SIZEOF (handlers); h++)
510 enum iconv_ilseq_handler handler = handlers[h];
511 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
512 for (o = 0; o < 2; o++)
514 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
517 int retval = mem_iconveh (input, strlen (input),
518 "ISO-8859-2", "ISO-8859-1",
525 ASSERT (retval == -1 && errno == EILSEQ);
526 ASSERT (result == NULL);
530 case iconveh_question_mark:
532 static const char expected[] = "Rafa? Maszkowski";
533 ASSERT (retval == 0);
534 ASSERT (length == strlen (expected));
535 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
538 for (i = 0; i < 16; i++)
539 ASSERT (offsets[i] == i);
540 ASSERT (offsets[16] == MAGIC);
546 case iconveh_escape_sequence:
548 static const char expected[] = "Rafa\\u0142 Maszkowski";
549 ASSERT (retval == 0);
550 ASSERT (length == strlen (expected));
551 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
554 for (i = 0; i < 16; i++)
555 ASSERT (offsets[i] == (i < 5 ? i :
557 ASSERT (offsets[16] == MAGIC);
567 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
568 for (h = 0; h < SIZEOF (handlers); h++)
570 enum iconv_ilseq_handler handler = handlers[h];
571 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
572 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
573 for (o = 0; o < 2; o++)
575 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
578 int retval = mem_iconveh (input, strlen (input),
579 "ISO-8859-1", "UTF-8",
583 ASSERT (retval == 0);
584 ASSERT (length == strlen (expected));
585 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
588 for (i = 0; i < 37; i++)
589 ASSERT (offsets[i] == (i < 1 ? i :
593 ASSERT (offsets[37] == MAGIC);
600 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
601 for (h = 0; h < SIZEOF (handlers); h++)
603 enum iconv_ilseq_handler handler = handlers[h];
604 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
605 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
606 for (o = 0; o < 2; o++)
608 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
611 int retval = mem_iconveh (input, strlen (input),
612 "UTF-8", "ISO-8859-1",
616 ASSERT (retval == 0);
617 ASSERT (length == strlen (expected));
618 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
621 for (i = 0; i < 41; i++)
622 ASSERT (offsets[i] == (i < 1 ? i :
623 i == 1 ? (size_t)(-1) :
625 i == 13 ? (size_t)(-1) :
627 i == 20 ? (size_t)(-1) :
630 ASSERT (offsets[41] == MAGIC);
637 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
638 for (h = 0; h < SIZEOF (handlers); h++)
640 enum iconv_ilseq_handler handler = handlers[h];
641 static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
642 for (o = 0; o < 2; o++)
644 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
647 int retval = mem_iconveh (input, strlen (input),
648 "UTF-8", "ISO-8859-1",
655 ASSERT (retval == -1 && errno == EILSEQ);
656 ASSERT (result == NULL);
660 case iconveh_question_mark:
662 static const char expected[] = "Rafa? Maszkowski";
663 ASSERT (retval == 0);
664 ASSERT (length == strlen (expected));
665 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
668 for (i = 0; i < 17; i++)
669 ASSERT (offsets[i] == (i < 5 ? i :
670 i == 5 ? (size_t)(-1) :
672 ASSERT (offsets[17] == MAGIC);
678 case iconveh_escape_sequence:
680 static const char expected[] = "Rafa\\u0142 Maszkowski";
681 ASSERT (retval == 0);
682 ASSERT (length == strlen (expected));
683 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
686 for (i = 0; i < 17; i++)
687 ASSERT (offsets[i] == (i < 5 ? i :
688 i == 5 ? (size_t)(-1) :
690 ASSERT (offsets[17] == MAGIC);
700 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
701 for (h = 0; h < SIZEOF (handlers); h++)
703 enum iconv_ilseq_handler handler = handlers[h];
704 static const char input[] = "\342";
705 for (o = 0; o < 2; o++)
707 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
710 int retval = mem_iconveh (input, strlen (input),
711 "UTF-8", "ISO-8859-1",
715 ASSERT (retval == 0);
716 ASSERT (length == 0);
719 ASSERT (offsets[0] == 0);
720 ASSERT (offsets[1] == MAGIC);
728 /* ------------------------- Test str_iconveh() ------------------------- */
730 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
731 for (h = 0; h < SIZEOF (handlers); h++)
733 enum iconv_ilseq_handler handler = handlers[h];
734 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
735 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
736 char *result = str_iconveh (input, "ISO-8859-2", "ISO-8859-1", handler);
737 ASSERT (result != NULL);
738 ASSERT (strcmp (result, expected) == 0);
742 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
743 for (h = 0; h < SIZEOF (handlers); h++)
745 enum iconv_ilseq_handler handler = handlers[h];
746 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
747 char *result = str_iconveh (input, "ISO-8859-2", "ISO-8859-1", handler);
751 ASSERT (result == NULL && errno == EILSEQ);
753 case iconveh_question_mark:
755 static const char expected[] = "Rafa? Maszkowski";
756 ASSERT (result != NULL);
757 ASSERT (strcmp (result, expected) == 0);
761 case iconveh_escape_sequence:
763 static const char expected[] = "Rafa\\u0142 Maszkowski";
764 ASSERT (result != NULL);
765 ASSERT (strcmp (result, expected) == 0);
772 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
773 for (h = 0; h < SIZEOF (handlers); h++)
775 enum iconv_ilseq_handler handler = handlers[h];
776 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
777 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
778 char *result = str_iconveh (input, "ISO-8859-1", "UTF-8", handler);
779 ASSERT (result != NULL);
780 ASSERT (strcmp (result, expected) == 0);
784 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
785 for (h = 0; h < SIZEOF (handlers); h++)
787 enum iconv_ilseq_handler handler = handlers[h];
788 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
789 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
790 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
791 ASSERT (result != NULL);
792 ASSERT (strcmp (result, expected) == 0);
796 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
797 for (h = 0; h < SIZEOF (handlers); h++)
799 enum iconv_ilseq_handler handler = handlers[h];
800 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
801 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
805 ASSERT (result == NULL && errno == EILSEQ);
807 case iconveh_question_mark:
809 static const char expected[] = "Costs: 27 ?";
810 ASSERT (result != NULL);
811 ASSERT (strcmp (result, expected) == 0);
815 case iconveh_escape_sequence:
817 static const char expected[] = "Costs: 27 \\u20AC";
818 ASSERT (result != NULL);
819 ASSERT (strcmp (result, expected) == 0);
826 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
827 for (h = 0; h < SIZEOF (handlers); h++)
829 enum iconv_ilseq_handler handler = handlers[h];
830 static const char input[] = "\342";
831 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
832 ASSERT (result != NULL);
833 ASSERT (strcmp (result, "") == 0);