1 /* Test of case and normalization insensitive comparison of UTF-32 strings.
2 Copyright (C) 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2009. */
28 #define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
29 #define ASSERT(expr) \
34 fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
42 #include "test-casecmp.h"
46 test_nonascii (int (*my_casecmp) (const uint32_t *, size_t, const uint32_t *, size_t, const char *, uninorm_t, int *))
48 /* Normalization effects. */
50 static const uint32_t input1[] = { 'H', 0x00F6, 'h', 'l', 'e' };
51 static const uint32_t input2[] = { 'H', 'O', 0x0308, 'h', 'L', 'e' };
52 static const uint32_t input3[] = { 'H', 0x00F6, 'h', 'l', 'e', 'n' };
53 static const uint32_t input4[] = { 'H', 'O', 0x0308, 'h', 'L', 'e', 'n' };
54 static const uint32_t input5[] = { 'H', 'u', 'r', 'z' };
57 ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0);
60 ASSERT (my_casecmp (input2, SIZEOF (input2), input1, SIZEOF (input1), NULL, UNINORM_NFD, &cmp) == 0);
63 ASSERT (my_casecmp (input3, SIZEOF (input3), input4, SIZEOF (input4), NULL, UNINORM_NFD, &cmp) == 0);
66 ASSERT (my_casecmp (input4, SIZEOF (input4), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0);
69 ASSERT (my_casecmp (input2, SIZEOF (input2), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0);
72 ASSERT (my_casecmp (input1, SIZEOF (input1), input4, SIZEOF (input4), NULL, UNINORM_NFD, &cmp) == 0);
75 ASSERT (my_casecmp (input1, SIZEOF (input1), input5, SIZEOF (input5), NULL, UNINORM_NFD, &cmp) == 0);
78 ASSERT (my_casecmp (input2, SIZEOF (input2), input5, SIZEOF (input5), NULL, UNINORM_NFD, &cmp) == 0);
81 { /* LATIN CAPITAL LETTER A WITH DIAERESIS */
82 static const uint32_t input1[] = { 0x00C4 };
83 static const uint32_t input2[] = { 0x0041, 0x0308 };
86 ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0);
89 { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
90 static const uint32_t input1[] = { 0x01DE };
91 static const uint32_t input2[] = { 0x0041, 0x0308, 0x0304 };
94 ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0);
97 { /* GREEK DIALYTIKA AND PERISPOMENI */
98 static const uint32_t input1[] = { 0x1FC1 };
99 static const uint32_t input2[] = { 0x00A8, 0x0342 };
102 ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0);
105 { /* HANGUL SYLLABLE GEUL */
106 static const uint32_t input1[] = { 0xAE00 };
107 static const uint32_t input2[] = { 0xADF8, 0x11AF };
108 static const uint32_t input3[] = { 0x1100, 0x1173, 0x11AF };
111 ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0);
114 ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0);
117 { /* HANGUL SYLLABLE GEU */
118 static const uint32_t input1[] = { 0xADF8 };
119 static const uint32_t input2[] = { 0x1100, 0x1173 };
122 ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0);
127 { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */
128 static const uint32_t input1[] =
129 { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ',
130 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443,
131 0x0439, 0x0442, 0x0435, '!', ' ',
132 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2,
133 '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ',
134 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n'
136 static const uint32_t input2[] =
137 { 'g', 'r', 0x00FC, 0x0073, 0x0073, ' ', 'g', 'o', 't', 't', '.', ' ',
138 0x0437, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443,
139 0x0439, 0x0442, 0x0435, '!', ' ',
140 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2,
141 '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ',
142 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n'
144 static const uint32_t input3[] =
145 { 'G', 'R', 0x00DC, 0x0053, 0x0053, ' ', 'G', 'O', 'T', 'T', '.', ' ',
146 0x0417, 0x0414, 0x0420, 0x0410, 0x0412, 0x0421, 0x0422, 0x0412, 0x0423,
147 0x0419, 0x0422, 0x0415, '!', ' ',
148 'X', '=', '(', '-', 'B', 0x00B1, 'S', 'Q', 'R', 'T', '(', 'B', 0x00B2,
149 '-', '4', 'A', 'C', ')', ')', '/', '(', '2', 'A', ')', ' ', ' ',
150 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n'
154 ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0);
157 ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0);
160 ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0);
163 ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0);
166 ASSERT (my_casecmp (input2, SIZEOF (input2), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0);
169 ASSERT (my_casecmp (input2, SIZEOF (input2), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0);
173 /* Case mapping can increase the number of Unicode characters. */
174 { /* LATIN SMALL LETTER N PRECEDED BY APOSTROPHE */
175 static const uint32_t input1[] = { 0x0149 };
176 static const uint32_t input2[] = { 0x02BC, 0x006E };
177 static const uint32_t input3[] = { 0x02BC, 0x004E };
180 ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0);
183 ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0);
186 ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0);
189 ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0);
192 { /* GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS */
193 static const uint32_t input1[] = { 0x0390 };
194 static const uint32_t input2[] = { 0x03B9, 0x0308, 0x0301 };
197 ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0);
200 ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0);
204 /* Turkish letters i İ ı I */
205 { /* LATIN CAPITAL LETTER I */
206 static const uint32_t input[] = { 0x0049 };
207 static const uint32_t casefolded[] = { 0x0069 };
208 static const uint32_t casefolded_tr[] = { 0x0131 };
211 ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0);
214 ASSERT (my_casecmp (input, SIZEOF (input), casefolded_tr, SIZEOF (casefolded_tr), "tr", NULL, &cmp) == 0);
217 { /* LATIN SMALL LETTER I */
218 static const uint32_t input[] = { 0x0069 };
219 static const uint32_t casefolded[] = { 0x0049 };
220 static const uint32_t casefolded_tr[] = { 0x0130 };
223 ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0);
226 ASSERT (my_casecmp (input, SIZEOF (input), casefolded_tr, SIZEOF (casefolded_tr), "tr", NULL, &cmp) == 0);
229 { /* LATIN CAPITAL LETTER I WITH DOT ABOVE */
230 static const uint32_t input[] = { 0x0130 };
231 static const uint32_t casefolded[] = { 0x0069, 0x0307 };
232 static const uint32_t casefolded_tr[] = { 0x0069 };
235 ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0);
238 ASSERT (my_casecmp (input, SIZEOF (input), casefolded_tr, SIZEOF (casefolded_tr), "tr", NULL, &cmp) == 0);
241 { /* LATIN SMALL LETTER DOTLESS I */
242 static const uint32_t input[] = { 0x0131 };
243 static const uint32_t casefolded[] = { 0x0049 };
246 ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0);
249 ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), "tr", NULL, &cmp) == 0);
253 static const uint32_t input[] =
254 { 0x0054, 0x004F, 0x0050, 0x004B, 0x0041, 0x0050, 0x0049 };
255 static const uint32_t casefolded[] =
256 { 0x0074, 0x006F, 0x0070, 0x006B, 0x0061, 0x0070, 0x0131 };
259 ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0);
262 ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), "tr", NULL, &cmp) == 0);
266 /* Uppercasing can increase the number of Unicode characters. */
268 static const uint32_t input1[] = { 0x0068, 0x0065, 0x0069, 0x00DF };
269 static const uint32_t input2[] = { 0x0068, 0x0065, 0x0069, 0x0073, 0x0073 };
272 ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0);
276 /* Case mappings for some characters can depend on the surrounding characters. */
277 { /* "περισσότερες πληροφορίες" */
278 static const uint32_t input1[] =
280 0x03C0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4,
281 0x03B5, 0x03C1, 0x03B5, 0x03C2, 0x0020, 0x03C0, 0x03BB, 0x03B7,
282 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C2
284 static const uint32_t input2[] =
286 0x03C0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4,
287 0x03B5, 0x03C1, 0x03B5, 0x03C3, 0x0020, 0x03C0, 0x03BB, 0x03B7,
288 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C3
290 static const uint32_t input3[] =
292 0x03A0, 0x0395, 0x03A1, 0x0399, 0x03A3, 0x03A3, 0x038C, 0x03A4,
293 0x0395, 0x03A1, 0x0395, 0x03A3, 0x0020, 0x03A0, 0x039B, 0x0397,
294 0x03A1, 0x039F, 0x03A6, 0x039F, 0x03A1, 0x038A, 0x0395, 0x03A3
298 ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0);
301 ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0);
304 ASSERT (my_casecmp (input2, SIZEOF (input2), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0);
308 /* Case mapping can require subsequent normalization. */
309 { /* LATIN SMALL LETTER J WITH CARON, COMBINING DOT BELOW */
310 static const uint32_t input[] = { 0x01F0, 0x0323 };
311 static const uint32_t casefolded[] = { 0x006A, 0x030C, 0x0323 };
312 static const uint32_t casefolded_decomposed[] = { 0x006A, 0x0323, 0x030C };
315 ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0);
318 ASSERT (my_casecmp (input, SIZEOF (input), casefolded_decomposed, SIZEOF (casefolded_decomposed), NULL, NULL, &cmp) == 0);
321 ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, UNINORM_NFD, &cmp) == 0);
324 ASSERT (my_casecmp (input, SIZEOF (input), casefolded_decomposed, SIZEOF (casefolded_decomposed), NULL, UNINORM_NFD, &cmp) == 0);
332 test_ascii (u32_casecmp, UNINORM_NFD);
333 test_nonascii (u32_casecmp);