1 /* Test of compatibility normalization of UTF-8 strings.
2 Copyright (C) 2009-2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2009. */
21 #if GNULIB_TEST_UNINORM_U8_NORMALIZE
33 check (const uint8_t *input, size_t input_length,
34 const uint8_t *expected, size_t expected_length)
39 /* Test return conventions with resultbuf == NULL. */
40 result = u8_normalize (UNINORM_NFKC, input, input_length, NULL, &length);
41 if (!(result != NULL))
43 if (!(length == expected_length))
45 if (!(u8_cmp (result, expected, expected_length) == 0))
49 /* Test return conventions with resultbuf too small. */
50 if (expected_length > 0)
52 uint8_t *preallocated;
54 length = expected_length - 1;
55 preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
56 result = u8_normalize (UNINORM_NFKC, input, input_length, preallocated, &length);
57 if (!(result != NULL))
59 if (!(result != preallocated))
61 if (!(length == expected_length))
63 if (!(u8_cmp (result, expected, expected_length) == 0))
69 /* Test return conventions with resultbuf large enough. */
71 uint8_t *preallocated;
73 length = expected_length;
74 preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
75 result = u8_normalize (UNINORM_NFKC, input, input_length, preallocated, &length);
76 if (!(result != NULL))
78 if (!(preallocated == NULL || result == preallocated))
80 if (!(length == expected_length))
82 if (!(u8_cmp (result, expected, expected_length) == 0))
94 ASSERT (check (NULL, 0, NULL, 0) == 0);
97 static const uint8_t input[] = { 0x20 };
98 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
101 { /* LATIN CAPITAL LETTER A WITH DIAERESIS */
102 static const uint8_t input[] = { 0xC3, 0x84 };
103 static const uint8_t decomposed[] = { 0x41, 0xCC, 0x88 };
104 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
105 ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
108 { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
109 static const uint8_t input[] = { 0xC7, 0x9E };
110 static const uint8_t decomposed[] = { 0x41, 0xCC, 0x88, 0xCC, 0x84 };
111 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
112 ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
115 { /* ANGSTROM SIGN */
116 static const uint8_t input[] = { 0xE2, 0x84, 0xAB };
117 static const uint8_t decomposed[] = { 0x41, 0xCC, 0x8A };
118 static const uint8_t expected[] = { 0xC3, 0x85 };
119 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
120 ASSERT (check (decomposed, SIZEOF (decomposed), expected, SIZEOF (expected)) == 0);
121 ASSERT (check (expected, SIZEOF (expected), expected, SIZEOF (expected)) == 0);
124 { /* GREEK DIALYTIKA AND PERISPOMENI */
125 static const uint8_t input[] = { 0xE1, 0xBF, 0x81 };
126 static const uint8_t decomposed[] = { 0x20, 0xCC, 0x88, 0xCD, 0x82 };
127 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0);
128 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
131 { /* SCRIPT SMALL L */
132 static const uint8_t input[] = { 0xE2, 0x84, 0x93 };
133 static const uint8_t decomposed[] = { 0x6C };
134 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0);
135 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
138 { /* NO-BREAK SPACE */
139 static const uint8_t input[] = { 0xC2, 0xA0 };
140 static const uint8_t decomposed[] = { 0x20 };
141 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0);
142 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
145 { /* ARABIC LETTER VEH INITIAL FORM */
146 static const uint8_t input[] = { 0xEF, 0xAD, 0xAC };
147 static const uint8_t decomposed[] = { 0xDA, 0xA4 };
148 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0);
149 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
152 { /* ARABIC LETTER VEH MEDIAL FORM */
153 static const uint8_t input[] = { 0xEF, 0xAD, 0xAD };
154 static const uint8_t decomposed[] = { 0xDA, 0xA4 };
155 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0);
156 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
159 { /* ARABIC LETTER VEH FINAL FORM */
160 static const uint8_t input[] = { 0xEF, 0xAD, 0xAB };
161 static const uint8_t decomposed[] = { 0xDA, 0xA4 };
162 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0);
163 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
166 { /* ARABIC LETTER VEH ISOLATED FORM */
167 static const uint8_t input[] = { 0xEF, 0xAD, 0xAA };
168 static const uint8_t decomposed[] = { 0xDA, 0xA4 };
169 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0);
170 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
173 { /* CIRCLED NUMBER FIFTEEN */
174 static const uint8_t input[] = { 0xE2, 0x91, 0xAE };
175 static const uint8_t decomposed[] = { 0x31, 0x35 };
176 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0);
177 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
180 { /* TRADE MARK SIGN */
181 static const uint8_t input[] = { 0xE2, 0x84, 0xA2 };
182 static const uint8_t decomposed[] = { 0x54, 0x4D };
183 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0);
184 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
187 { /* LATIN SUBSCRIPT SMALL LETTER I */
188 static const uint8_t input[] = { 0xE1, 0xB5, 0xA2 };
189 static const uint8_t decomposed[] = { 0x69 };
190 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0);
191 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
194 { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */
195 static const uint8_t input[] = { 0xEF, 0xB8, 0xB5 };
196 static const uint8_t decomposed[] = { 0x28 };
197 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0);
198 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
201 { /* FULLWIDTH LATIN CAPITAL LETTER A */
202 static const uint8_t input[] = { 0xEF, 0xBC, 0xA1 };
203 static const uint8_t decomposed[] = { 0x41 };
204 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0);
205 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
208 { /* HALFWIDTH IDEOGRAPHIC COMMA */
209 static const uint8_t input[] = { 0xEF, 0xBD, 0xA4 };
210 static const uint8_t decomposed[] = { 0xE3, 0x80, 0x81 };
211 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0);
212 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
215 { /* SMALL IDEOGRAPHIC COMMA */
216 static const uint8_t input[] = { 0xEF, 0xB9, 0x91 };
217 static const uint8_t decomposed[] = { 0xE3, 0x80, 0x81 };
218 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0);
219 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
223 static const uint8_t input[] = { 0xE3, 0x8E, 0x92 };
224 static const uint8_t decomposed[] = { 0x4D, 0x48, 0x7A };
225 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0);
226 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
229 { /* VULGAR FRACTION THREE EIGHTHS */
230 static const uint8_t input[] = { 0xE2, 0x85, 0x9C };
231 static const uint8_t decomposed[] = { 0x33, 0xE2, 0x81, 0x84, 0x38 };
232 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0);
233 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
237 static const uint8_t input[] = { 0xC2, 0xB5 };
238 static const uint8_t decomposed[] = { 0xCE, 0xBC };
239 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0);
240 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
243 { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */
244 static const uint8_t input[] = { 0xEF, 0xB7, 0xBA };
245 static const uint8_t decomposed[] =
246 { 0xD8, 0xB5, 0xD9, 0x84, 0xD9, 0x89, 0x20, 0xD8, 0xA7, 0xD9, 0x84, 0xD9,
247 0x84, 0xD9, 0x87, 0x20, 0xD8, 0xB9, 0xD9, 0x84, 0xD9, 0x8A, 0xD9, 0x87,
248 0x20, 0xD9, 0x88, 0xD8, 0xB3, 0xD9, 0x84, 0xD9, 0x85
250 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0);
251 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
254 { /* HANGUL SYLLABLE GEUL */
255 static const uint8_t input[] = { 0xEA, 0xB8, 0x80 };
256 static const uint8_t decomposed[] =
257 { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF };
258 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
259 ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
262 { /* HANGUL SYLLABLE GEU */
263 static const uint8_t input[] = { 0xEA, 0xB7, 0xB8 };
264 static const uint8_t decomposed[] = { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3 };
265 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
266 ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
269 { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */
270 static const uint8_t input[] =
271 { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
272 ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
273 0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9,
274 0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
275 's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')',
276 '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
277 0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
279 0xEA, 0xB8, 0x80, '\n'
281 static const uint8_t decomposed[] =
282 { 'G', 'r', 0x75, 0xCC, 0x88, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
283 ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
284 0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB8, 0xCC, 0x86,
285 0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
286 's', 'q', 'r', 't', '(', 'b', 0x32, '-', '4', 'a', 'c', ')', ')',
287 '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
288 0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
289 0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE1, 0x86, 0xAB,
290 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF, '\n'
292 static const uint8_t expected[] =
293 { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
294 ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
295 0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9,
296 0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
297 's', 'q', 'r', 't', '(', 'b', 0x32, '-', '4', 'a', 'c', ')', ')',
298 '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
299 0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
301 0xEA, 0xB8, 0x80, '\n'
303 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
304 ASSERT (check (decomposed, SIZEOF (decomposed), expected, SIZEOF (expected)) == 0);
305 ASSERT (check (expected, SIZEOF (expected), expected, SIZEOF (expected)) == 0);
309 /* Declare failure if test takes too long, by using default abort
310 caused by SIGALRM. */
311 signal (SIGALRM, SIG_DFL);
315 /* Check that the sorting is not O(n²) but O(n log n). */
318 for (pass = 0; pass < 3; pass++)
322 uint8_t *input = (uint8_t *) malloc (2 * (2 * m - 1) * sizeof (uint8_t));
325 uint8_t *expected = input + (2 * m - 1);
327 size_t m2 = (m - 1) / 2;
328 /* NB: m1 + m2 == m - 1. */
337 for (i = 0; i < m1; i++)
342 for (i = 0; i < m2; i++)
350 for (i = 0; i < m2; i++)
355 for (i = 0; i < m1; i++)
363 for (i = 0; i < m2; i++)
384 for (i = 0; i < m1; i++)
389 for (i = 0; i < m2 - 1; i++)
395 for (; repeat > 0; repeat--)
397 ASSERT (check (input, 2 * m - 1, expected, 2 * m - 2) == 0);
398 ASSERT (check (expected, 2 * m - 2, expected, 2 * m - 2) == 0);