1 /* Test of casefolding mapping for UTF-16 strings.
2 Copyright (C) 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2009. */
29 #define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
30 #define ASSERT(expr) \
35 fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
43 check (const uint16_t *input, size_t input_length,
44 const char *iso639_language, uninorm_t nf,
45 const uint16_t *expected, size_t expected_length)
50 /* Test return conventions with resultbuf == NULL. */
51 result = u16_casefold (input, input_length, iso639_language, nf, NULL, &length);
52 if (!(result != NULL))
54 if (!(length == expected_length))
56 if (!(u16_cmp (result, expected, expected_length) == 0))
60 /* Test return conventions with resultbuf too small. */
61 if (expected_length > 0)
63 uint16_t *preallocated;
65 length = expected_length - 1;
66 preallocated = (uint16_t *) malloc (length * sizeof (uint16_t));
67 result = u16_casefold (input, input_length, iso639_language, nf, preallocated, &length);
68 if (!(result != NULL))
70 if (!(result != preallocated))
72 if (!(length == expected_length))
74 if (!(u16_cmp (result, expected, expected_length) == 0))
80 /* Test return conventions with resultbuf large enough. */
82 uint16_t *preallocated;
84 length = expected_length;
85 preallocated = (uint16_t *) malloc (length * sizeof (uint16_t));
86 result = u16_casefold (input, input_length, iso639_language, nf, preallocated, &length);
87 if (!(result != NULL))
89 if (!(result == preallocated))
91 if (!(length == expected_length))
93 if (!(u16_cmp (result, expected, expected_length) == 0))
104 { /* Empty string. */
105 ASSERT (check (NULL, 0, NULL, NULL, NULL, 0) == 0);
106 ASSERT (check (NULL, 0, NULL, UNINORM_NFC, NULL, 0) == 0);
110 { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */
111 static const uint16_t input[] =
112 { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ',
113 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443,
114 0x0439, 0x0442, 0x0435, '!', ' ',
115 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2,
116 '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ',
117 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n'
119 static const uint16_t casefolded[] =
120 { 'g', 'r', 0x00FC, 0x0073, 0x0073, ' ', 'g', 'o', 't', 't', '.', ' ',
121 0x0437, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443,
122 0x0439, 0x0442, 0x0435, '!', ' ',
123 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2,
124 '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ',
125 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n'
127 ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0);
130 /* Case mapping can increase the number of Unicode characters. */
131 { /* LATIN SMALL LETTER N PRECEDED BY APOSTROPHE */
132 static const uint16_t input[] = { 0x0149 };
133 static const uint16_t casefolded[] = { 0x02BC, 0x006E };
134 ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0);
136 { /* GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS */
137 static const uint16_t input[] = { 0x0390 };
138 static const uint16_t casefolded[] = { 0x03B9, 0x0308, 0x0301 };
139 ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0);
142 /* Turkish letters i İ ı I */
143 { /* LATIN CAPITAL LETTER I */
144 static const uint16_t input[] = { 0x0049 };
145 static const uint16_t casefolded[] = { 0x0069 };
146 static const uint16_t casefolded_tr[] = { 0x0131 };
147 ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0);
148 ASSERT (check (input, SIZEOF (input), "tr", NULL, casefolded_tr, SIZEOF (casefolded_tr)) == 0);
150 { /* LATIN SMALL LETTER I */
151 static const uint16_t input[] = { 0x0069 };
152 static const uint16_t casefolded[] = { 0x0069 };
153 ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0);
154 ASSERT (check (input, SIZEOF (input), "tr", NULL, casefolded, SIZEOF (casefolded)) == 0);
156 { /* LATIN CAPITAL LETTER I WITH DOT ABOVE */
157 static const uint16_t input[] = { 0x0130 };
158 static const uint16_t casefolded[] = { 0x0069, 0x0307 };
159 static const uint16_t casefolded_tr[] = { 0x0069 };
160 ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0);
161 ASSERT (check (input, SIZEOF (input), "tr", NULL, casefolded_tr, SIZEOF (casefolded_tr)) == 0);
163 { /* LATIN SMALL LETTER DOTLESS I */
164 static const uint16_t input[] = { 0x0131 };
165 static const uint16_t casefolded[] = { 0x0131 };
166 ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0);
167 ASSERT (check (input, SIZEOF (input), "tr", NULL, casefolded, SIZEOF (casefolded)) == 0);
170 static const uint16_t input[] =
171 { 0x0074, 0x006F, 0x0070, 0x006B, 0x0061, 0x0070, 0x0131 };
172 static const uint16_t casefolded[] =
173 { 0x0074, 0x006F, 0x0070, 0x006B, 0x0061, 0x0070, 0x0131 };
174 ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0);
175 ASSERT (check (input, SIZEOF (input), "tr", NULL, casefolded, SIZEOF (casefolded)) == 0);
178 /* Uppercasing can increase the number of Unicode characters. */
180 static const uint16_t input[] = { 0x0068, 0x0065, 0x0069, 0x00DF };
181 static const uint16_t casefolded[] = { 0x0068, 0x0065, 0x0069, 0x0073, 0x0073 };
182 ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0);
185 /* Case mappings for some characters can depend on the surrounding characters. */
186 { /* "περισσότερες πληροφορίες" */
187 static const uint16_t input[] =
189 0x03C0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4,
190 0x03B5, 0x03C1, 0x03B5, 0x03C2, 0x0020, 0x03C0, 0x03BB, 0x03B7,
191 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C2
193 static const uint16_t casefolded[] =
195 0x03C0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4,
196 0x03B5, 0x03C1, 0x03B5, 0x03C3, 0x0020, 0x03C0, 0x03BB, 0x03B7,
197 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C3
199 ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0);
202 /* Case mapping can require subsequent normalization. */
203 { /* LATIN SMALL LETTER J WITH CARON, COMBINING DOT BELOW */
204 static const uint16_t input[] = { 0x01F0, 0x0323 };
205 static const uint16_t casefolded[] = { 0x006A, 0x030C, 0x0323 };
206 static const uint16_t casefolded_decomposed[] = { 0x006A, 0x0323, 0x030C };
207 static const uint16_t casefolded_normalized[] = { 0x01F0, 0x0323 };
208 ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0);
209 ASSERT (check (input, SIZEOF (input), NULL, UNINORM_NFD, casefolded_decomposed, SIZEOF (casefolded_decomposed)) == 0);
210 ASSERT (check (input, SIZEOF (input), NULL, UNINORM_NFC, casefolded_normalized, SIZEOF (casefolded_normalized)) == 0);