1 /* Test of character set conversion with error handling and autodetection.
2 Copyright (C) 2007-2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2007. */
21 #include "striconveha.h"
33 /* Magic number for detecting bounds violations. */
34 #define MAGIC 0x1983EFF1
37 new_offsets (size_t n)
39 size_t *offsets = (size_t *) malloc ((n + 1) * sizeof (size_t));
47 static enum iconv_ilseq_handler handlers[] =
48 { iconveh_error, iconveh_question_mark, iconveh_escape_sequence };
54 /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
55 ISO-8859-2, and UTF-8. */
57 /* ------------------------- Test mem_iconveha() ------------------------- */
59 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
60 for (h = 0; h < SIZEOF (handlers); h++)
62 enum iconv_ilseq_handler handler = handlers[h];
63 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
64 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
65 for (o = 0; o < 2; o++)
67 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
70 int retval = mem_iconveha (input, strlen (input),
71 "ISO-8859-2", "ISO-8859-1",
76 ASSERT (length == strlen (expected));
77 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
80 for (i = 0; i < 37; i++)
81 ASSERT (offsets[i] == i);
82 ASSERT (offsets[37] == MAGIC);
89 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
90 for (h = 0; h < SIZEOF (handlers); h++)
92 enum iconv_ilseq_handler handler = handlers[h];
93 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
94 for (o = 0; o < 2; o++)
96 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
99 int retval = mem_iconveha (input, strlen (input),
100 "ISO-8859-2", "ISO-8859-1",
107 ASSERT (retval == -1 && errno == EILSEQ);
108 ASSERT (result == NULL);
112 case iconveh_question_mark:
114 static const char expected[] = "Rafa? Maszkowski";
115 ASSERT (retval == 0);
116 ASSERT (length == strlen (expected));
117 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
120 for (i = 0; i < 16; i++)
121 ASSERT (offsets[i] == i);
122 ASSERT (offsets[16] == MAGIC);
128 case iconveh_escape_sequence:
130 static const char expected[] = "Rafa\\u0142 Maszkowski";
131 ASSERT (retval == 0);
132 ASSERT (length == strlen (expected));
133 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
136 for (i = 0; i < 16; i++)
137 ASSERT (offsets[i] == (i < 5 ? i :
139 ASSERT (offsets[16] == MAGIC);
149 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
150 for (h = 0; h < SIZEOF (handlers); h++)
152 enum iconv_ilseq_handler handler = handlers[h];
153 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
154 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
155 for (o = 0; o < 2; o++)
157 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
160 int retval = mem_iconveha (input, strlen (input),
161 "ISO-8859-1", "UTF-8",
165 ASSERT (retval == 0);
166 ASSERT (length == strlen (expected));
167 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
170 for (i = 0; i < 37; i++)
171 ASSERT (offsets[i] == (i < 1 ? i :
175 ASSERT (offsets[37] == MAGIC);
182 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
183 for (h = 0; h < SIZEOF (handlers); h++)
185 enum iconv_ilseq_handler handler = handlers[h];
186 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
187 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
188 for (o = 0; o < 2; o++)
190 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
193 int retval = mem_iconveha (input, strlen (input),
194 "UTF-8", "ISO-8859-1",
198 ASSERT (retval == 0);
199 ASSERT (length == strlen (expected));
200 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
203 for (i = 0; i < 41; i++)
204 ASSERT (offsets[i] == (i < 1 ? i :
205 i == 1 ? (size_t)(-1) :
207 i == 13 ? (size_t)(-1) :
209 i == 20 ? (size_t)(-1) :
212 ASSERT (offsets[41] == MAGIC);
219 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
220 for (h = 0; h < SIZEOF (handlers); h++)
222 enum iconv_ilseq_handler handler = handlers[h];
223 static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
224 for (o = 0; o < 2; o++)
226 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
229 int retval = mem_iconveha (input, strlen (input),
230 "UTF-8", "ISO-8859-1",
237 ASSERT (retval == -1 && errno == EILSEQ);
238 ASSERT (result == NULL);
242 case iconveh_question_mark:
244 static const char expected[] = "Rafa? Maszkowski";
245 ASSERT (retval == 0);
246 ASSERT (length == strlen (expected));
247 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
250 for (i = 0; i < 17; i++)
251 ASSERT (offsets[i] == (i < 5 ? i :
252 i == 5 ? (size_t)(-1) :
254 ASSERT (offsets[17] == MAGIC);
260 case iconveh_escape_sequence:
262 static const char expected[] = "Rafa\\u0142 Maszkowski";
263 ASSERT (retval == 0);
264 ASSERT (length == strlen (expected));
265 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
268 for (i = 0; i < 17; i++)
269 ASSERT (offsets[i] == (i < 5 ? i :
270 i == 5 ? (size_t)(-1) :
272 ASSERT (offsets[17] == MAGIC);
282 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
283 for (h = 0; h < SIZEOF (handlers); h++)
285 enum iconv_ilseq_handler handler = handlers[h];
286 static const char input[] = "\342";
287 for (o = 0; o < 2; o++)
289 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
292 int retval = mem_iconveha (input, strlen (input),
293 "UTF-8", "ISO-8859-1",
297 ASSERT (retval == 0);
298 ASSERT (length == 0);
301 ASSERT (offsets[0] == 0);
302 ASSERT (offsets[1] == MAGIC);
309 /* autodetect_jp is only supported when iconv() support ISO-2022-JP-2. */
310 # if defined _LIBICONV_VERSION || !(defined _AIX || defined __sgi || defined __hpux || defined __osf__ || defined __sun)
311 /* Test conversions from autodetect_jp to UTF-8. */
312 for (h = 0; h < SIZEOF (handlers); h++)
314 enum iconv_ilseq_handler handler = handlers[h];
315 static const char input[] = "\244\263\244\363\244\313\244\301\244\317"; /* こんにちは in EUC-JP */
316 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
317 for (o = 0; o < 2; o++)
319 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
322 int retval = mem_iconveha (input, strlen (input),
323 "autodetect_jp", "UTF-8",
327 ASSERT (retval == 0);
328 ASSERT (length == strlen (expected));
329 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
332 for (i = 0; i < 10; i++)
333 ASSERT (offsets[i] == ((i % 2) == 0 ? (i / 2) * 3 : (size_t)(-1)));
334 ASSERT (offsets[10] == MAGIC);
340 for (h = 0; h < SIZEOF (handlers); h++)
342 enum iconv_ilseq_handler handler = handlers[h];
343 static const char input[] = "\202\261\202\361\202\311\202\277\202\315"; /* こんにちは in Shift_JIS */
344 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
345 for (o = 0; o < 2; o++)
347 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
350 int retval = mem_iconveha (input, strlen (input),
351 "autodetect_jp", "UTF-8",
355 ASSERT (retval == 0);
356 ASSERT (length == strlen (expected));
357 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
360 for (i = 0; i < 10; i++)
361 ASSERT (offsets[i] == ((i % 2) == 0 ? (i / 2) * 3 : (size_t)(-1)));
362 ASSERT (offsets[10] == MAGIC);
368 for (h = 0; h < SIZEOF (handlers); h++)
370 enum iconv_ilseq_handler handler = handlers[h];
371 static const char input[] = "\033$B$3$s$K$A$O\033(B"; /* こんにちは in ISO-2022-JP-2 */
372 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
373 for (o = 0; o < 2; o++)
375 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
378 int retval = mem_iconveha (input, strlen (input),
379 "autodetect_jp", "UTF-8",
383 ASSERT (retval == 0);
384 ASSERT (length == strlen (expected));
385 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
388 for (i = 0; i < 16; i++)
389 ASSERT (offsets[i] == (i == 0 ? 0 :
396 ASSERT (offsets[16] == MAGIC);
404 # if (((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2) && !defined __UCLIBC__) || _LIBICONV_VERSION >= 0x0105
405 /* Test conversion from UTF-8 to ISO-8859-1 with transliteration. */
406 for (h = 0; h < SIZEOF (handlers); h++)
408 enum iconv_ilseq_handler handler = handlers[h];
409 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
410 static const char expected[] = "Costs: 27 EUR";
411 for (o = 0; o < 2; o++)
413 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
416 int retval = mem_iconveha (input, strlen (input),
417 "UTF-8", "ISO-8859-1",
421 ASSERT (retval == 0);
422 ASSERT (length == strlen (expected));
423 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
426 for (i = 0; i < 13; i++)
427 ASSERT (offsets[i] == (i < 11 ? i : (size_t)(-1)));
428 ASSERT (offsets[13] == MAGIC);
436 /* ------------------------- Test str_iconveha() ------------------------- */
438 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
439 for (h = 0; h < SIZEOF (handlers); h++)
441 enum iconv_ilseq_handler handler = handlers[h];
442 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
443 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
444 char *result = str_iconveha (input, "ISO-8859-2", "ISO-8859-1", false, handler);
445 ASSERT (result != NULL);
446 ASSERT (strcmp (result, expected) == 0);
450 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
451 for (h = 0; h < SIZEOF (handlers); h++)
453 enum iconv_ilseq_handler handler = handlers[h];
454 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
455 char *result = str_iconveha (input, "ISO-8859-2", "ISO-8859-1", false, handler);
459 ASSERT (result == NULL && errno == EILSEQ);
461 case iconveh_question_mark:
463 static const char expected[] = "Rafa? Maszkowski";
464 ASSERT (result != NULL);
465 ASSERT (strcmp (result, expected) == 0);
469 case iconveh_escape_sequence:
471 static const char expected[] = "Rafa\\u0142 Maszkowski";
472 ASSERT (result != NULL);
473 ASSERT (strcmp (result, expected) == 0);
480 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
481 for (h = 0; h < SIZEOF (handlers); h++)
483 enum iconv_ilseq_handler handler = handlers[h];
484 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
485 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
486 char *result = str_iconveha (input, "ISO-8859-1", "UTF-8", false, handler);
487 ASSERT (result != NULL);
488 ASSERT (strcmp (result, expected) == 0);
492 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
493 for (h = 0; h < SIZEOF (handlers); h++)
495 enum iconv_ilseq_handler handler = handlers[h];
496 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
497 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
498 char *result = str_iconveha (input, "UTF-8", "ISO-8859-1", false, handler);
499 ASSERT (result != NULL);
500 ASSERT (strcmp (result, expected) == 0);
504 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
505 for (h = 0; h < SIZEOF (handlers); h++)
507 enum iconv_ilseq_handler handler = handlers[h];
508 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
509 char *result = str_iconveha (input, "UTF-8", "ISO-8859-1", false, handler);
513 ASSERT (result == NULL && errno == EILSEQ);
515 case iconveh_question_mark:
517 static const char expected[] = "Costs: 27 ?";
518 ASSERT (result != NULL);
519 ASSERT (strcmp (result, expected) == 0);
523 case iconveh_escape_sequence:
525 static const char expected[] = "Costs: 27 \\u20AC";
526 ASSERT (result != NULL);
527 ASSERT (strcmp (result, expected) == 0);
534 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
535 for (h = 0; h < SIZEOF (handlers); h++)
537 enum iconv_ilseq_handler handler = handlers[h];
538 static const char input[] = "\342";
539 char *result = str_iconveha (input, "UTF-8", "ISO-8859-1", false, handler);
540 ASSERT (result != NULL);
541 ASSERT (strcmp (result, "") == 0);
545 /* autodetect_jp is only supported when iconv() support ISO-2022-JP-2. */
546 # if defined _LIBICONV_VERSION || !(defined _AIX || defined __sgi || defined __hpux || defined __osf__ || defined __sun)
547 /* Test conversions from autodetect_jp to UTF-8. */
548 for (h = 0; h < SIZEOF (handlers); h++)
550 enum iconv_ilseq_handler handler = handlers[h];
551 static const char input[] = "\244\263\244\363\244\313\244\301\244\317"; /* こんにちは in EUC-JP */
552 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
553 char *result = str_iconveha (input, "autodetect_jp", "UTF-8", false, handler);
554 ASSERT (result != NULL);
555 ASSERT (strcmp (result, expected) == 0);
558 for (h = 0; h < SIZEOF (handlers); h++)
560 enum iconv_ilseq_handler handler = handlers[h];
561 static const char input[] = "\202\261\202\361\202\311\202\277\202\315"; /* こんにちは in Shift_JIS */
562 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
563 char *result = str_iconveha (input, "autodetect_jp", "UTF-8", false, handler);
564 ASSERT (result != NULL);
565 ASSERT (strcmp (result, expected) == 0);
568 for (h = 0; h < SIZEOF (handlers); h++)
570 enum iconv_ilseq_handler handler = handlers[h];
571 static const char input[] = "\033$B$3$s$K$A$O\033(B"; /* こんにちは in ISO-2022-JP-2 */
572 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
573 char *result = str_iconveha (input, "autodetect_jp", "UTF-8", false, handler);
574 ASSERT (result != NULL);
575 ASSERT (strcmp (result, expected) == 0);
580 # if (((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2) && !defined __UCLIBC__) || _LIBICONV_VERSION >= 0x0105
581 /* Test conversion from UTF-8 to ISO-8859-1 with transliteration. */
582 for (h = 0; h < SIZEOF (handlers); h++)
584 enum iconv_ilseq_handler handler = handlers[h];
585 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
586 static const char expected[] = "Costs: 27 EUR";
587 char *result = str_iconveha (input, "UTF-8", "ISO-8859-1", true, handler);
588 ASSERT (result != NULL);
589 ASSERT (strcmp (result, expected) == 0);