1 /* Test of uppercase mapping for UTF-16 strings.
2 Copyright (C) 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2009. */
29 #define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
30 #define ASSERT(expr) \
35 fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
43 check (const uint16_t *input, size_t input_length,
44 const char *iso639_language, uninorm_t nf,
45 const uint16_t *expected, size_t expected_length)
50 /* Test return conventions with resultbuf == NULL. */
51 result = u16_toupper (input, input_length, iso639_language, nf, NULL, &length);
52 if (!(result != NULL))
54 if (!(length == expected_length))
56 if (!(u16_cmp (result, expected, expected_length) == 0))
60 /* Test return conventions with resultbuf too small. */
61 if (expected_length > 0)
63 uint16_t *preallocated;
65 length = expected_length - 1;
66 preallocated = (uint16_t *) malloc (length * sizeof (uint16_t));
67 result = u16_toupper (input, input_length, iso639_language, nf, preallocated, &length);
68 if (!(result != NULL))
70 if (!(result != preallocated))
72 if (!(length == expected_length))
74 if (!(u16_cmp (result, expected, expected_length) == 0))
80 /* Test return conventions with resultbuf large enough. */
82 uint16_t *preallocated;
84 length = expected_length;
85 preallocated = (uint16_t *) malloc (length * sizeof (uint16_t));
86 result = u16_toupper (input, input_length, iso639_language, nf, preallocated, &length);
87 if (!(result != NULL))
89 if (!(result == preallocated))
91 if (!(length == expected_length))
93 if (!(u16_cmp (result, expected, expected_length) == 0))
104 { /* Empty string. */
105 ASSERT (check (NULL, 0, NULL, NULL, NULL, 0) == 0);
106 ASSERT (check (NULL, 0, NULL, UNINORM_NFC, NULL, 0) == 0);
110 { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */
111 static const uint16_t input[] =
112 { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ',
113 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443,
114 0x0439, 0x0442, 0x0435, '!', ' ',
115 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2,
116 '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ',
117 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n'
119 static const uint16_t casemapped[] =
120 { 'G', 'R', 0x00DC, 0x0053, 0x0053, ' ', 'G', 'O', 'T', 'T', '.', ' ',
121 0x0417, 0x0414, 0x0420, 0x0410, 0x0412, 0x0421, 0x0422, 0x0412, 0x0423,
122 0x0419, 0x0422, 0x0415, '!', ' ',
123 'X', '=', '(', '-', 'B', 0x00B1, 'S', 'Q', 'R', 'T', '(', 'B', 0x00B2,
124 '-', '4', 'A', 'C', ')', ')', '/', '(', '2', 'A', ')', ' ', ' ',
125 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n'
127 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
130 /* Case mapping can increase the number of Unicode characters. */
131 { /* LATIN SMALL LETTER N PRECEDED BY APOSTROPHE */
132 static const uint16_t input[] = { 0x0149 };
133 static const uint16_t casemapped[] = { 0x02BC, 0x004E };
134 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
136 { /* GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS */
137 static const uint16_t input[] = { 0x0390 };
138 static const uint16_t casemapped[] = { 0x0399, 0x0308, 0x0301 };
139 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
142 /* Turkish letters i İ ı I */
143 { /* LATIN CAPITAL LETTER I */
144 static const uint16_t input[] = { 0x0049 };
145 static const uint16_t casemapped[] = { 0x0049 };
146 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
147 ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0);
149 { /* LATIN SMALL LETTER I */
150 static const uint16_t input[] = { 0x0069 };
151 static const uint16_t casemapped[] = { 0x0049 };
152 static const uint16_t casemapped_tr[] = { 0x0130 };
153 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
154 ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped_tr, SIZEOF (casemapped_tr)) == 0);
156 { /* LATIN CAPITAL LETTER I WITH DOT ABOVE */
157 static const uint16_t input[] = { 0x0130 };
158 static const uint16_t casemapped[] = { 0x0130 };
159 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
160 ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0);
162 { /* LATIN SMALL LETTER DOTLESS I */
163 static const uint16_t input[] = { 0x0131 };
164 static const uint16_t casemapped[] = { 0x0049 };
165 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
166 ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0);
169 static const uint16_t input[] =
170 { 0x0074, 0x006F, 0x0070, 0x006B, 0x0061, 0x0070, 0x0131 };
171 static const uint16_t casemapped[] =
172 { 0x0054, 0x004F, 0x0050, 0x004B, 0x0041, 0x0050, 0x0049 };
173 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
174 ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0);
177 /* Uppercasing can increase the number of Unicode characters. */
179 static const uint16_t input[] = { 0x0068, 0x0065, 0x0069, 0x00DF };
180 static const uint16_t casemapped[] = { 0x0048, 0x0045, 0x0049, 0x0053, 0x0053 };
181 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
184 /* Case mappings for some characters can depend on the surrounding characters. */
185 { /* "περισσότερες πληροφορίες" */
186 static const uint16_t input[] =
188 0x03C0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4,
189 0x03B5, 0x03C1, 0x03B5, 0x03C2, 0x0020, 0x03C0, 0x03BB, 0x03B7,
190 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C2
192 static const uint16_t casemapped[] =
194 0x03A0, 0x0395, 0x03A1, 0x0399, 0x03A3, 0x03A3, 0x038C, 0x03A4,
195 0x0395, 0x03A1, 0x0395, 0x03A3, 0x0020, 0x03A0, 0x039B, 0x0397,
196 0x03A1, 0x039F, 0x03A6, 0x039F, 0x03A1, 0x038A, 0x0395, 0x03A3
198 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
201 /* Case mapping can require subsequent normalization. */
202 { /* LATIN SMALL LETTER J WITH CARON, COMBINING DOT BELOW */
203 static const uint16_t input[] = { 0x01F0, 0x0323 };
204 static const uint16_t casemapped[] = { 0x004A, 0x030C, 0x0323 };
205 static const uint16_t casemapped_normalized[] = { 0x004A, 0x0323, 0x030C };
206 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
207 ASSERT (check (input, SIZEOF (input), NULL, UNINORM_NFC, casemapped_normalized, SIZEOF (casemapped_normalized)) == 0);