1 /* Test of character set conversion with error handling.
2 Copyright (C) 2007 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18 /* Written by Bruno Haible <bruno@clisp.org>, 2007. */
24 #include "striconveh.h"
34 #define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
35 #define ASSERT(expr) if (!(expr)) abort ();
40 static enum iconv_ilseq_handler handlers[] =
41 { iconveh_error, iconveh_question_mark, iconveh_escape_sequence };
45 /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
46 ISO-8859-2, and UTF-8. */
47 iconv_t cd_88591_to_88592 = iconv_open ("ISO-8859-2", "ISO-8859-1");
48 iconv_t cd_88592_to_88591 = iconv_open ("ISO-8859-1", "ISO-8859-2");
49 iconv_t cd_88591_to_utf8 = iconv_open ("UTF-8", "ISO-8859-1");
50 iconv_t cd_utf8_to_88591 = iconv_open ("ISO-8859-1", "UTF-8");
51 iconv_t cd_88592_to_utf8 = iconv_open ("UTF-8", "ISO-8859-2");
52 iconv_t cd_utf8_to_88592 = iconv_open ("ISO-8859-2", "UTF-8");
54 ASSERT (cd_88591_to_88592 != (iconv_t)(-1));
55 ASSERT (cd_88592_to_88591 != (iconv_t)(-1));
56 ASSERT (cd_88591_to_utf8 != (iconv_t)(-1));
57 ASSERT (cd_utf8_to_88591 != (iconv_t)(-1));
58 ASSERT (cd_88592_to_utf8 != (iconv_t)(-1));
59 ASSERT (cd_utf8_to_88592 != (iconv_t)(-1));
61 /* ------------------------ Test mem_cd_iconveh() ------------------------ */
63 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
64 for (h = 0; h < SIZEOF (handlers); h++)
66 enum iconv_ilseq_handler handler = handlers[h];
67 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
68 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
71 int retval = mem_cd_iconveh (input, strlen (input),
73 cd_88592_to_utf8, cd_utf8_to_88591,
77 ASSERT (length == strlen (expected));
78 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
82 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
83 for (h = 0; h < SIZEOF (handlers); h++)
85 enum iconv_ilseq_handler handler = handlers[h];
86 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
89 int retval = mem_cd_iconveh (input, strlen (input),
91 cd_88592_to_utf8, cd_utf8_to_88591,
97 ASSERT (retval == -1 && errno == EILSEQ);
98 ASSERT (result == NULL);
100 case iconveh_question_mark:
102 static const char expected[] = "Rafa? Maszkowski";
103 ASSERT (retval == 0);
104 ASSERT (length == strlen (expected));
105 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
109 case iconveh_escape_sequence:
111 static const char expected[] = "Rafa\\u0142 Maszkowski";
112 ASSERT (retval == 0);
113 ASSERT (length == strlen (expected));
114 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
121 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
122 for (h = 0; h < SIZEOF (handlers); h++)
124 enum iconv_ilseq_handler handler = handlers[h];
125 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
126 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
129 int retval = mem_cd_iconveh (input, strlen (input),
131 cd_88591_to_utf8, (iconv_t)(-1),
134 ASSERT (retval == 0);
135 ASSERT (length == strlen (expected));
136 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
140 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
141 for (h = 0; h < SIZEOF (handlers); h++)
143 enum iconv_ilseq_handler handler = handlers[h];
144 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
145 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
148 int retval = mem_cd_iconveh (input, strlen (input),
150 (iconv_t)(-1), cd_utf8_to_88591,
153 ASSERT (retval == 0);
154 ASSERT (length == strlen (expected));
155 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
159 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
160 for (h = 0; h < SIZEOF (handlers); h++)
162 enum iconv_ilseq_handler handler = handlers[h];
163 static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
166 int retval = mem_cd_iconveh (input, strlen (input),
168 (iconv_t)(-1), cd_utf8_to_88591,
174 ASSERT (retval == -1 && errno == EILSEQ);
175 ASSERT (result == NULL);
177 case iconveh_question_mark:
179 static const char expected[] = "Rafa? Maszkowski";
180 ASSERT (retval == 0);
181 ASSERT (length == strlen (expected));
182 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
186 case iconveh_escape_sequence:
188 static const char expected[] = "Rafa\\u0142 Maszkowski";
189 ASSERT (retval == 0);
190 ASSERT (length == strlen (expected));
191 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
198 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
199 for (h = 0; h < SIZEOF (handlers); h++)
201 enum iconv_ilseq_handler handler = handlers[h];
202 static const char input[] = "\342";
205 int retval = mem_cd_iconveh (input, strlen (input),
207 (iconv_t)(-1), cd_utf8_to_88591,
210 ASSERT (retval == 0);
211 ASSERT (length == 0);
216 /* ------------------------ Test str_cd_iconveh() ------------------------ */
218 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
219 for (h = 0; h < SIZEOF (handlers); h++)
221 enum iconv_ilseq_handler handler = handlers[h];
222 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
223 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
224 char *result = str_cd_iconveh (input,
226 cd_88592_to_utf8, cd_utf8_to_88591,
228 ASSERT (result != NULL);
229 ASSERT (strcmp (result, expected) == 0);
233 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
234 for (h = 0; h < SIZEOF (handlers); h++)
236 enum iconv_ilseq_handler handler = handlers[h];
237 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
238 char *result = str_cd_iconveh (input,
240 cd_88592_to_utf8, cd_utf8_to_88591,
245 ASSERT (result == NULL && errno == EILSEQ);
247 case iconveh_question_mark:
249 static const char expected[] = "Rafa? Maszkowski";
250 ASSERT (result != NULL);
251 ASSERT (strcmp (result, expected) == 0);
255 case iconveh_escape_sequence:
257 static const char expected[] = "Rafa\\u0142 Maszkowski";
258 ASSERT (result != NULL);
259 ASSERT (strcmp (result, expected) == 0);
266 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
267 for (h = 0; h < SIZEOF (handlers); h++)
269 enum iconv_ilseq_handler handler = handlers[h];
270 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
271 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
272 char *result = str_cd_iconveh (input,
274 cd_88591_to_utf8, (iconv_t)(-1),
276 ASSERT (result != NULL);
277 ASSERT (strcmp (result, expected) == 0);
281 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
282 for (h = 0; h < SIZEOF (handlers); h++)
284 enum iconv_ilseq_handler handler = handlers[h];
285 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
286 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
287 char *result = str_cd_iconveh (input,
289 (iconv_t)(-1), cd_utf8_to_88591,
291 ASSERT (result != NULL);
292 ASSERT (strcmp (result, expected) == 0);
296 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
297 for (h = 0; h < SIZEOF (handlers); h++)
299 enum iconv_ilseq_handler handler = handlers[h];
300 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
301 char *result = str_cd_iconveh (input,
303 (iconv_t)(-1), cd_utf8_to_88591,
308 ASSERT (result == NULL && errno == EILSEQ);
310 case iconveh_question_mark:
312 static const char expected[] = "Costs: 27 ?";
313 ASSERT (result != NULL);
314 ASSERT (strcmp (result, expected) == 0);
318 case iconveh_escape_sequence:
320 static const char expected[] = "Costs: 27 \\u20AC";
321 ASSERT (result != NULL);
322 ASSERT (strcmp (result, expected) == 0);
329 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
330 for (h = 0; h < SIZEOF (handlers); h++)
332 enum iconv_ilseq_handler handler = handlers[h];
333 static const char input[] = "\342";
334 char *result = str_cd_iconveh (input,
336 (iconv_t)(-1), cd_utf8_to_88591,
338 ASSERT (result != NULL);
339 ASSERT (strcmp (result, "") == 0);
343 iconv_close (cd_88591_to_88592);
344 iconv_close (cd_88592_to_88591);
345 iconv_close (cd_88591_to_utf8);
346 iconv_close (cd_utf8_to_88591);
347 iconv_close (cd_88592_to_utf8);
348 iconv_close (cd_utf8_to_88592);
350 /* ------------------------- Test str_iconveh() ------------------------- */
352 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
353 for (h = 0; h < SIZEOF (handlers); h++)
355 enum iconv_ilseq_handler handler = handlers[h];
356 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
357 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
358 char *result = str_iconveh (input, "ISO-8859-2", "ISO-8859-1", handler);
359 ASSERT (result != NULL);
360 ASSERT (strcmp (result, expected) == 0);
364 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
365 for (h = 0; h < SIZEOF (handlers); h++)
367 enum iconv_ilseq_handler handler = handlers[h];
368 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
369 char *result = str_iconveh (input, "ISO-8859-2", "ISO-8859-1", handler);
373 ASSERT (result == NULL && errno == EILSEQ);
375 case iconveh_question_mark:
377 static const char expected[] = "Rafa? Maszkowski";
378 ASSERT (result != NULL);
379 ASSERT (strcmp (result, expected) == 0);
383 case iconveh_escape_sequence:
385 static const char expected[] = "Rafa\\u0142 Maszkowski";
386 ASSERT (result != NULL);
387 ASSERT (strcmp (result, expected) == 0);
394 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
395 for (h = 0; h < SIZEOF (handlers); h++)
397 enum iconv_ilseq_handler handler = handlers[h];
398 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
399 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
400 char *result = str_iconveh (input, "ISO-8859-1", "UTF-8", handler);
401 ASSERT (result != NULL);
402 ASSERT (strcmp (result, expected) == 0);
406 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
407 for (h = 0; h < SIZEOF (handlers); h++)
409 enum iconv_ilseq_handler handler = handlers[h];
410 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
411 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
412 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
413 ASSERT (result != NULL);
414 ASSERT (strcmp (result, expected) == 0);
418 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
419 for (h = 0; h < SIZEOF (handlers); h++)
421 enum iconv_ilseq_handler handler = handlers[h];
422 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
423 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
427 ASSERT (result == NULL && errno == EILSEQ);
429 case iconveh_question_mark:
431 static const char expected[] = "Costs: 27 ?";
432 ASSERT (result != NULL);
433 ASSERT (strcmp (result, expected) == 0);
437 case iconveh_escape_sequence:
439 static const char expected[] = "Costs: 27 \\u20AC";
440 ASSERT (result != NULL);
441 ASSERT (strcmp (result, expected) == 0);
448 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
449 for (h = 0; h < SIZEOF (handlers); h++)
451 enum iconv_ilseq_handler handler = handlers[h];
452 static const char input[] = "\342";
453 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
454 ASSERT (result != NULL);
455 ASSERT (strcmp (result, "") == 0);