1 /* Test of compatibility decomposition of UTF-8 strings.
2 Copyright (C) 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2009. */
21 #if GNULIB_UNINORM_U8_NORMALIZE
32 #define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
33 #define ASSERT(expr) \
38 fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
46 check (const uint8_t *input, size_t input_length,
47 const uint8_t *expected, size_t expected_length)
52 /* Test return conventions with resultbuf == NULL. */
53 result = u8_normalize (UNINORM_NFKD, input, input_length, NULL, &length);
54 if (!(result != NULL))
56 if (!(length == expected_length))
58 if (!(u8_cmp (result, expected, expected_length) == 0))
62 /* Test return conventions with resultbuf too small. */
63 if (expected_length > 0)
65 uint8_t *preallocated;
67 length = expected_length - 1;
68 preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
69 result = u8_normalize (UNINORM_NFKD, input, input_length, preallocated, &length);
70 if (!(result != NULL))
72 if (!(result != preallocated))
74 if (!(length == expected_length))
76 if (!(u8_cmp (result, expected, expected_length) == 0))
82 /* Test return conventions with resultbuf large enough. */
84 uint8_t *preallocated;
86 length = expected_length;
87 preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
88 result = u8_normalize (UNINORM_NFKD, input, input_length, preallocated, &length);
89 if (!(result != NULL))
91 if (!(preallocated == NULL || result == preallocated))
93 if (!(length == expected_length))
95 if (!(u8_cmp (result, expected, expected_length) == 0))
106 { /* Empty string. */
107 ASSERT (check (NULL, 0, NULL, 0) == 0);
110 static const uint8_t input[] = { 0x20 };
111 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
114 { /* LATIN CAPITAL LETTER A WITH DIAERESIS */
115 static const uint8_t input[] = { 0xC3, 0x84 };
116 static const uint8_t expected[] = { 0x41, 0xCC, 0x88 };
117 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
120 { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
121 static const uint8_t input[] = { 0xC7, 0x9E };
122 static const uint8_t expected[] = { 0x41, 0xCC, 0x88, 0xCC, 0x84 };
123 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
126 { /* GREEK DIALYTIKA AND PERISPOMENI */
127 static const uint8_t input[] = { 0xE1, 0xBF, 0x81 };
128 static const uint8_t expected[] = { 0x20, 0xCC, 0x88, 0xCD, 0x82 };
129 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
132 { /* SCRIPT SMALL L */
133 static const uint8_t input[] = { 0xE2, 0x84, 0x93 };
134 static const uint8_t expected[] = { 0x6C };
135 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
138 { /* NO-BREAK SPACE */
139 static const uint8_t input[] = { 0xC2, 0xA0 };
140 static const uint8_t expected[] = { 0x20 };
141 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
144 { /* ARABIC LETTER VEH INITIAL FORM */
145 static const uint8_t input[] = { 0xEF, 0xAD, 0xAC };
146 static const uint8_t expected[] = { 0xDA, 0xA4 };
147 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
150 { /* ARABIC LETTER VEH MEDIAL FORM */
151 static const uint8_t input[] = { 0xEF, 0xAD, 0xAD };
152 static const uint8_t expected[] = { 0xDA, 0xA4 };
153 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
156 { /* ARABIC LETTER VEH FINAL FORM */
157 static const uint8_t input[] = { 0xEF, 0xAD, 0xAB };
158 static const uint8_t expected[] = { 0xDA, 0xA4 };
159 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
162 { /* ARABIC LETTER VEH ISOLATED FORM */
163 static const uint8_t input[] = { 0xEF, 0xAD, 0xAA };
164 static const uint8_t expected[] = { 0xDA, 0xA4 };
165 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
168 { /* CIRCLED NUMBER FIFTEEN */
169 static const uint8_t input[] = { 0xE2, 0x91, 0xAE };
170 static const uint8_t expected[] = { 0x31, 0x35 };
171 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
174 { /* TRADE MARK SIGN */
175 static const uint8_t input[] = { 0xE2, 0x84, 0xA2 };
176 static const uint8_t expected[] = { 0x54, 0x4D };
177 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
180 { /* LATIN SUBSCRIPT SMALL LETTER I */
181 static const uint8_t input[] = { 0xE1, 0xB5, 0xA2 };
182 static const uint8_t expected[] = { 0x69 };
183 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
186 { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */
187 static const uint8_t input[] = { 0xEF, 0xB8, 0xB5 };
188 static const uint8_t expected[] = { 0x28 };
189 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
192 { /* FULLWIDTH LATIN CAPITAL LETTER A */
193 static const uint8_t input[] = { 0xEF, 0xBC, 0xA1 };
194 static const uint8_t expected[] = { 0x41 };
195 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
198 { /* HALFWIDTH IDEOGRAPHIC COMMA */
199 static const uint8_t input[] = { 0xEF, 0xBD, 0xA4 };
200 static const uint8_t expected[] = { 0xE3, 0x80, 0x81 };
201 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
204 { /* SMALL IDEOGRAPHIC COMMA */
205 static const uint8_t input[] = { 0xEF, 0xB9, 0x91 };
206 static const uint8_t expected[] = { 0xE3, 0x80, 0x81 };
207 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
211 static const uint8_t input[] = { 0xE3, 0x8E, 0x92 };
212 static const uint8_t expected[] = { 0x4D, 0x48, 0x7A };
213 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
216 { /* VULGAR FRACTION THREE EIGHTHS */
217 static const uint8_t input[] = { 0xE2, 0x85, 0x9C };
218 static const uint8_t expected[] = { 0x33, 0xE2, 0x81, 0x84, 0x38 };
219 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
223 static const uint8_t input[] = { 0xC2, 0xB5 };
224 static const uint8_t expected[] = { 0xCE, 0xBC };
225 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
228 { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */
229 static const uint8_t input[] = { 0xEF, 0xB7, 0xBA };
230 static const uint8_t expected[] =
231 { 0xD8, 0xB5, 0xD9, 0x84, 0xD9, 0x89, 0x20, 0xD8, 0xA7, 0xD9, 0x84, 0xD9,
232 0x84, 0xD9, 0x87, 0x20, 0xD8, 0xB9, 0xD9, 0x84, 0xD9, 0x8A, 0xD9, 0x87,
233 0x20, 0xD9, 0x88, 0xD8, 0xB3, 0xD9, 0x84, 0xD9, 0x85
235 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
238 { /* HANGUL SYLLABLE GEUL */
239 static const uint8_t input[] = { 0xEA, 0xB8, 0x80 };
240 static const uint8_t expected[] =
241 { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF };
242 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
245 { /* HANGUL SYLLABLE GEU */
246 static const uint8_t input[] = { 0xEA, 0xB7, 0xB8 };
247 static const uint8_t expected[] = { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3 };
248 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
251 { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */
252 static const uint8_t input[] =
253 { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
254 ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
255 0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9,
256 0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
257 's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')',
258 '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
259 0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
261 0xEA, 0xB8, 0x80, '\n'
263 static const uint8_t expected[] =
264 { 'G', 'r', 0x75, 0xCC, 0x88, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
265 ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
266 0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB8, 0xCC, 0x86,
267 0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
268 's', 'q', 'r', 't', '(', 'b', 0x32, '-', '4', 'a', 'c', ')', ')',
269 '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
270 0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
271 0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE1, 0x86, 0xAB,
272 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF, '\n'
274 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
278 /* Declare failure if test takes too long, by using default abort
279 caused by SIGALRM. */
280 signal (SIGALRM, SIG_DFL);
284 /* Check that the sorting is not O(n²) but O(n log n). */
287 for (pass = 0; pass < 3; pass++)
291 uint8_t *input = (uint8_t *) malloc (2 * (2 * m - 1) * sizeof (uint8_t));
294 uint8_t *expected = input + (2 * m - 1);
296 size_t m2 = (m - 1) / 2;
297 /* NB: m1 + m2 == m - 1. */
306 for (i = 0; i < m1; i++)
311 for (i = 0; i < m2; i++)
319 for (i = 0; i < m2; i++)
324 for (i = 0; i < m1; i++)
332 for (i = 0; i < m2; i++)
352 for (i = 0; i < m1; i++)
357 for (i = 0; i < m2; i++)
363 for (; repeat > 0; repeat--)
364 ASSERT (check (input, 2 * m - 1, expected, 2 * m - 1) == 0);