1 /* Test of conversion of multibyte character to wide character.
2 Copyright (C) 2008-2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
28 #if (defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__
31 test_one_locale (const char *name, int codepage)
38 /* Portable code to set the locale. */
40 char name_with_codepage[1024];
42 sprintf (name_with_codepage, "%s.%d", name, codepage);
45 if (setlocale (LC_ALL, name_with_codepage) == NULL)
49 /* Hacky way to set a locale.codepage combination that setlocale() refuses
52 /* Codepage of the current locale, set with setlocale().
53 Not necessarily the same as GetACP(). */
54 extern __declspec(dllimport) unsigned int __lc_codepage;
57 if (setlocale (LC_ALL, name) == NULL)
60 /* Clobber the codepage and MB_CUR_MAX, both set by setlocale(). */
61 __lc_codepage = codepage;
79 /* Test whether the codepage is really available. */
80 memset (&state, '\0', sizeof (mbstate_t));
81 if (mbrtowc (&wc, " ", 1, &state) == (size_t)(-1))
86 /* Test zero-length input. */
88 memset (&state, '\0', sizeof (mbstate_t));
89 wc = (wchar_t) 0xBADFACE;
90 ret = mbrtowc (&wc, "x", 0, &state);
91 /* gnulib's implementation returns (size_t)(-2).
92 The AIX 5.1 implementation returns (size_t)(-1).
93 glibc's implementation returns 0. */
94 ASSERT (ret == (size_t)(-2) || ret == (size_t)(-1) || ret == 0);
95 ASSERT (mbsinit (&state));
98 /* Test NUL byte input. */
100 memset (&state, '\0', sizeof (mbstate_t));
101 wc = (wchar_t) 0xBADFACE;
102 ret = mbrtowc (&wc, "", 1, &state);
105 ASSERT (mbsinit (&state));
106 ret = mbrtowc (NULL, "", 1, &state);
108 ASSERT (mbsinit (&state));
111 /* Test single-byte input. */
116 memset (&state, '\0', sizeof (mbstate_t));
117 for (c = 0; c < 0x100; c++)
120 case '\t': case '\v': case '\f':
121 case ' ': case '!': case '"': case '#': case '%':
122 case '&': case '\'': case '(': case ')': case '*':
123 case '+': case ',': case '-': case '.': case '/':
124 case '0': case '1': case '2': case '3': case '4':
125 case '5': case '6': case '7': case '8': case '9':
126 case ':': case ';': case '<': case '=': case '>':
128 case 'A': case 'B': case 'C': case 'D': case 'E':
129 case 'F': case 'G': case 'H': case 'I': case 'J':
130 case 'K': case 'L': case 'M': case 'N': case 'O':
131 case 'P': case 'Q': case 'R': case 'S': case 'T':
132 case 'U': case 'V': case 'W': case 'X': case 'Y':
134 case '[': case '\\': case ']': case '^': case '_':
135 case 'a': case 'b': case 'c': case 'd': case 'e':
136 case 'f': case 'g': case 'h': case 'i': case 'j':
137 case 'k': case 'l': case 'm': case 'n': case 'o':
138 case 'p': case 'q': case 'r': case 's': case 't':
139 case 'u': case 'v': case 'w': case 'x': case 'y':
140 case 'z': case '{': case '|': case '}': case '~':
141 /* c is in the ISO C "basic character set". */
143 wc = (wchar_t) 0xBADFACE;
144 ret = mbrtowc (&wc, buf, 1, &state);
147 ASSERT (mbsinit (&state));
148 ret = mbrtowc (NULL, buf, 1, &state);
150 ASSERT (mbsinit (&state));
155 /* Test special calling convention, passing a NULL pointer. */
157 memset (&state, '\0', sizeof (mbstate_t));
158 wc = (wchar_t) 0xBADFACE;
159 ret = mbrtowc (&wc, NULL, 5, &state);
161 ASSERT (wc == (wchar_t) 0xBADFACE);
162 ASSERT (mbsinit (&state));
168 /* Locale encoding is CP1252, an extension of ISO-8859-1. */
170 char input[] = "B\374\337er"; /* "Büßer" */
171 memset (&state, '\0', sizeof (mbstate_t));
173 wc = (wchar_t) 0xBADFACE;
174 ret = mbrtowc (&wc, input, 1, &state);
177 ASSERT (mbsinit (&state));
180 wc = (wchar_t) 0xBADFACE;
181 ret = mbrtowc (&wc, input + 1, 1, &state);
183 ASSERT (wctob (wc) == (unsigned char) '\374');
184 ASSERT (wc == 0x00FC);
185 ASSERT (mbsinit (&state));
188 /* Test support of NULL first argument. */
189 ret = mbrtowc (NULL, input + 2, 3, &state);
191 ASSERT (mbsinit (&state));
193 wc = (wchar_t) 0xBADFACE;
194 ret = mbrtowc (&wc, input + 2, 3, &state);
196 ASSERT (wctob (wc) == (unsigned char) '\337');
197 ASSERT (wc == 0x00DF);
198 ASSERT (mbsinit (&state));
201 wc = (wchar_t) 0xBADFACE;
202 ret = mbrtowc (&wc, input + 3, 2, &state);
205 ASSERT (mbsinit (&state));
208 wc = (wchar_t) 0xBADFACE;
209 ret = mbrtowc (&wc, input + 4, 1, &state);
212 ASSERT (mbsinit (&state));
217 /* Locale encoding is CP1256, not the same as ISO-8859-6. */
219 char input[] = "x\302\341\346y"; /* "xآلوy" */
220 memset (&state, '\0', sizeof (mbstate_t));
222 wc = (wchar_t) 0xBADFACE;
223 ret = mbrtowc (&wc, input, 1, &state);
226 ASSERT (mbsinit (&state));
229 wc = (wchar_t) 0xBADFACE;
230 ret = mbrtowc (&wc, input + 1, 1, &state);
232 ASSERT (wctob (wc) == (unsigned char) '\302');
233 ASSERT (wc == 0x0622);
234 ASSERT (mbsinit (&state));
237 /* Test support of NULL first argument. */
238 ret = mbrtowc (NULL, input + 2, 3, &state);
240 ASSERT (mbsinit (&state));
242 wc = (wchar_t) 0xBADFACE;
243 ret = mbrtowc (&wc, input + 2, 3, &state);
245 ASSERT (wctob (wc) == (unsigned char) '\341');
246 ASSERT (wc == 0x0644);
247 ASSERT (mbsinit (&state));
250 wc = (wchar_t) 0xBADFACE;
251 ret = mbrtowc (&wc, input + 3, 2, &state);
253 ASSERT (wctob (wc) == (unsigned char) '\346');
254 ASSERT (wc == 0x0648);
255 ASSERT (mbsinit (&state));
258 wc = (wchar_t) 0xBADFACE;
259 ret = mbrtowc (&wc, input + 4, 1, &state);
262 ASSERT (mbsinit (&state));
267 /* Locale encoding is CP932, similar to Shift_JIS. */
269 char input[] = "<\223\372\226\173\214\352>"; /* "<日本語>" */
270 memset (&state, '\0', sizeof (mbstate_t));
272 wc = (wchar_t) 0xBADFACE;
273 ret = mbrtowc (&wc, input, 1, &state);
276 ASSERT (mbsinit (&state));
279 wc = (wchar_t) 0xBADFACE;
280 ret = mbrtowc (&wc, input + 1, 2, &state);
282 ASSERT (wctob (wc) == EOF);
283 ASSERT (wc == 0x65E5);
284 ASSERT (mbsinit (&state));
288 wc = (wchar_t) 0xBADFACE;
289 ret = mbrtowc (&wc, input + 3, 1, &state);
290 ASSERT (ret == (size_t)(-2));
291 ASSERT (wc == (wchar_t) 0xBADFACE);
292 ASSERT (!mbsinit (&state));
295 wc = (wchar_t) 0xBADFACE;
296 ret = mbrtowc (&wc, input + 4, 4, &state);
298 ASSERT (wctob (wc) == EOF);
299 ASSERT (wc == 0x672C);
300 ASSERT (mbsinit (&state));
303 /* Test support of NULL first argument. */
304 ret = mbrtowc (NULL, input + 5, 3, &state);
306 ASSERT (mbsinit (&state));
308 wc = (wchar_t) 0xBADFACE;
309 ret = mbrtowc (&wc, input + 5, 3, &state);
311 ASSERT (wctob (wc) == EOF);
312 ASSERT (wc == 0x8A9E);
313 ASSERT (mbsinit (&state));
317 wc = (wchar_t) 0xBADFACE;
318 ret = mbrtowc (&wc, input + 7, 1, &state);
321 ASSERT (mbsinit (&state));
326 /* Locale encoding is CP950, similar to Big5. */
328 char input[] = "<\244\351\245\273\273\171>"; /* "<日本語>" */
329 memset (&state, '\0', sizeof (mbstate_t));
331 wc = (wchar_t) 0xBADFACE;
332 ret = mbrtowc (&wc, input, 1, &state);
335 ASSERT (mbsinit (&state));
338 wc = (wchar_t) 0xBADFACE;
339 ret = mbrtowc (&wc, input + 1, 2, &state);
341 ASSERT (wctob (wc) == EOF);
342 ASSERT (wc == 0x65E5);
343 ASSERT (mbsinit (&state));
347 wc = (wchar_t) 0xBADFACE;
348 ret = mbrtowc (&wc, input + 3, 1, &state);
349 ASSERT (ret == (size_t)(-2));
350 ASSERT (wc == (wchar_t) 0xBADFACE);
351 ASSERT (!mbsinit (&state));
354 wc = (wchar_t) 0xBADFACE;
355 ret = mbrtowc (&wc, input + 4, 4, &state);
357 ASSERT (wctob (wc) == EOF);
358 ASSERT (wc == 0x672C);
359 ASSERT (mbsinit (&state));
362 /* Test support of NULL first argument. */
363 ret = mbrtowc (NULL, input + 5, 3, &state);
365 ASSERT (mbsinit (&state));
367 wc = (wchar_t) 0xBADFACE;
368 ret = mbrtowc (&wc, input + 5, 3, &state);
370 ASSERT (wctob (wc) == EOF);
371 ASSERT (wc == 0x8A9E);
372 ASSERT (mbsinit (&state));
376 wc = (wchar_t) 0xBADFACE;
377 ret = mbrtowc (&wc, input + 7, 1, &state);
380 ASSERT (mbsinit (&state));
385 /* Locale encoding is CP936 = GBK, an extension of GB2312. */
387 char input[] = "<\310\325\261\276\325\132>"; /* "<日本語>" */
388 memset (&state, '\0', sizeof (mbstate_t));
390 wc = (wchar_t) 0xBADFACE;
391 ret = mbrtowc (&wc, input, 1, &state);
394 ASSERT (mbsinit (&state));
397 wc = (wchar_t) 0xBADFACE;
398 ret = mbrtowc (&wc, input + 1, 2, &state);
400 ASSERT (wctob (wc) == EOF);
401 ASSERT (wc == 0x65E5);
402 ASSERT (mbsinit (&state));
406 wc = (wchar_t) 0xBADFACE;
407 ret = mbrtowc (&wc, input + 3, 1, &state);
408 ASSERT (ret == (size_t)(-2));
409 ASSERT (wc == (wchar_t) 0xBADFACE);
410 ASSERT (!mbsinit (&state));
413 wc = (wchar_t) 0xBADFACE;
414 ret = mbrtowc (&wc, input + 4, 4, &state);
416 ASSERT (wctob (wc) == EOF);
417 ASSERT (wc == 0x672C);
418 ASSERT (mbsinit (&state));
421 /* Test support of NULL first argument. */
422 ret = mbrtowc (NULL, input + 5, 3, &state);
424 ASSERT (mbsinit (&state));
426 wc = (wchar_t) 0xBADFACE;
427 ret = mbrtowc (&wc, input + 5, 3, &state);
429 ASSERT (wctob (wc) == EOF);
430 ASSERT (wc == 0x8A9E);
431 ASSERT (mbsinit (&state));
435 wc = (wchar_t) 0xBADFACE;
436 ret = mbrtowc (&wc, input + 7, 1, &state);
439 ASSERT (mbsinit (&state));
444 /* Locale encoding is CP54936 = GB18030. */
446 char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */
447 memset (&state, '\0', sizeof (mbstate_t));
449 wc = (wchar_t) 0xBADFACE;
450 ret = mbrtowc (&wc, input, 1, &state);
453 ASSERT (mbsinit (&state));
456 wc = (wchar_t) 0xBADFACE;
457 ret = mbrtowc (&wc, input + 1, 1, &state);
458 ASSERT (ret == (size_t)(-2));
459 ASSERT (wc == (wchar_t) 0xBADFACE);
460 ASSERT (!mbsinit (&state));
463 wc = (wchar_t) 0xBADFACE;
464 ret = mbrtowc (&wc, input + 2, 7, &state);
466 ASSERT (wctob (wc) == EOF);
467 ASSERT (wc == 0x00FC);
468 ASSERT (mbsinit (&state));
471 /* Test support of NULL first argument. */
472 ret = mbrtowc (NULL, input + 3, 6, &state);
474 ASSERT (mbsinit (&state));
476 wc = (wchar_t) 0xBADFACE;
477 ret = mbrtowc (&wc, input + 3, 6, &state);
479 ASSERT (wctob (wc) == EOF);
480 ASSERT (wc == 0x00DF);
481 ASSERT (mbsinit (&state));
487 wc = (wchar_t) 0xBADFACE;
488 ret = mbrtowc (&wc, input + 7, 2, &state);
491 ASSERT (mbsinit (&state));
494 wc = (wchar_t) 0xBADFACE;
495 ret = mbrtowc (&wc, input + 8, 1, &state);
498 ASSERT (mbsinit (&state));
503 /* Locale encoding is CP65001 = UTF-8. */
505 char input[] = "B\303\274\303\237er"; /* "Büßer" */
506 memset (&state, '\0', sizeof (mbstate_t));
508 wc = (wchar_t) 0xBADFACE;
509 ret = mbrtowc (&wc, input, 1, &state);
512 ASSERT (mbsinit (&state));
515 wc = (wchar_t) 0xBADFACE;
516 ret = mbrtowc (&wc, input + 1, 1, &state);
517 ASSERT (ret == (size_t)(-2));
518 ASSERT (wc == (wchar_t) 0xBADFACE);
519 ASSERT (!mbsinit (&state));
522 wc = (wchar_t) 0xBADFACE;
523 ret = mbrtowc (&wc, input + 2, 5, &state);
525 ASSERT (wctob (wc) == EOF);
526 ASSERT (wc == 0x00FC);
527 ASSERT (mbsinit (&state));
530 /* Test support of NULL first argument. */
531 ret = mbrtowc (NULL, input + 3, 4, &state);
533 ASSERT (mbsinit (&state));
535 wc = (wchar_t) 0xBADFACE;
536 ret = mbrtowc (&wc, input + 3, 4, &state);
538 ASSERT (wctob (wc) == EOF);
539 ASSERT (wc == 0x00DF);
540 ASSERT (mbsinit (&state));
544 wc = (wchar_t) 0xBADFACE;
545 ret = mbrtowc (&wc, input + 5, 2, &state);
548 ASSERT (mbsinit (&state));
551 wc = (wchar_t) 0xBADFACE;
552 ret = mbrtowc (&wc, input + 6, 1, &state);
555 ASSERT (mbsinit (&state));
565 main (int argc, char *argv[])
567 int codepage = atoi (argv[argc - 1]);
572 for (i = 1; i < argc - 1; i++)
574 int ret = test_one_locale (argv[i], codepage);
582 fprintf (stderr, "Skipping test: found no locale with codepage %d\n",
591 main (int argc, char *argv[])
593 fputs ("Skipping test: not a native Windows system\n", stderr);