1 /* Test of Unicode compliance of normalization of UTF-32 strings.
2 Copyright (C) 2009-2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2009. */
22 #include "test-u32-normalize-big.h"
24 #if GNULIB_TEST_UNINORM_U32_NORMALIZE
33 #define ASSERT_WITH_LINE(expr, file, line) \
38 fprintf (stderr, "%s:%d: assertion failed for %s:%u\n", \
39 __FILE__, __LINE__, file, line); \
47 cmp_ucs4_t (const void *a, const void *b)
49 ucs4_t a_value = *(const ucs4_t *)a;
50 ucs4_t b_value = *(const ucs4_t *)b;
51 return (a_value < b_value ? -1 : a_value > b_value ? 1 : 0);
55 read_normalization_test_file (const char *filename,
56 struct normalization_test_file *file)
61 struct normalization_test_line *lines;
63 size_t lines_allocated;
65 stream = fopen (filename, "r");
68 fprintf (stderr, "error during fopen of '%s'\n", filename);
72 for (part_index = 0; part_index < 4; part_index++)
74 file->parts[part_index].lines = NULL;
75 file->parts[part_index].lines_length = 0;
90 struct normalization_test_line line;
91 size_t sequence_index;
100 if (c == EOF || c == '\n')
104 while (ptr < buf + 1000);
109 /* Ignore empty lines and comment lines. */
110 if (buf[0] == '\0' || buf[0] == '#')
113 /* Handle lines that introduce a new part. */
116 /* Switch to the next part. */
120 (struct normalization_test_line *)
121 xnrealloc (lines, lines_length, sizeof (struct normalization_test_line));
122 file->parts[part_index].lines = lines;
123 file->parts[part_index].lines_length = lines_length;
132 /* It's a line containing 5 sequences of Unicode characters.
133 Parse it and append it to the current part. */
134 if (!(part_index >= 0 && part_index < 4))
136 fprintf (stderr, "unexpected structure of '%s'\n", filename);
140 line.lineno = lineno;
141 for (sequence_index = 0; sequence_index < 5; sequence_index++)
142 line.sequences[sequence_index] = NULL;
143 for (sequence_index = 0; sequence_index < 5; sequence_index++)
145 uint32_t *sequence = XNMALLOC (1, uint32_t);
146 size_t sequence_length = 0;
153 uc = strtoul (ptr, &endptr, 16);
158 /* Append uc to the sequence. */
161 xnrealloc (sequence, sequence_length + 2, sizeof (uint32_t));
162 sequence[sequence_length] = uc;
168 if (sequence_length == 0)
170 fprintf (stderr, "empty character sequence in '%s'\n", filename);
173 sequence[sequence_length] = 0; /* terminator */
175 line.sequences[sequence_index] = sequence;
179 fprintf (stderr, "error parsing '%s'\n", filename);
185 /* Append the line to the current part. */
186 if (lines_length == lines_allocated)
188 lines_allocated = 2 * lines_allocated;
189 if (lines_allocated < 7)
192 (struct normalization_test_line *)
193 xnrealloc (lines, lines_allocated, sizeof (struct normalization_test_line));
195 lines[lines_length] = line;
202 (struct normalization_test_line *)
203 xnrealloc (lines, lines_length, sizeof (struct normalization_test_line));
204 file->parts[part_index].lines = lines;
205 file->parts[part_index].lines_length = lines_length;
209 /* Collect all c1 values from the part 1 in an array. */
210 const struct normalization_test_part *p = &file->parts[1];
211 ucs4_t *c1_array = XNMALLOC (p->lines_length + 1, ucs4_t);
214 for (line_index = 0; line_index < p->lines_length; line_index++)
216 const uint32_t *sequence = p->lines[line_index].sequences[0];
217 /* In part 1, every sequences[0] consists of a single character. */
218 if (!(sequence[0] != 0 && sequence[1] == 0))
220 c1_array[line_index] = sequence[0];
223 /* Sort this array. */
224 qsort (c1_array, p->lines_length, sizeof (ucs4_t), cmp_ucs4_t);
226 /* Add the sentinel at the end. */
227 c1_array[p->lines_length] = 0x110000;
229 file->part1_c1_sorted = c1_array;
232 file->filename = xstrdup (filename);
234 if (ferror (stream) || fclose (stream))
236 fprintf (stderr, "error reading from '%s'\n", filename);
242 test_specific (const struct normalization_test_file *file,
243 int (*check) (const uint32_t *c1, size_t c1_length,
244 const uint32_t *c2, size_t c2_length,
245 const uint32_t *c3, size_t c3_length,
246 const uint32_t *c4, size_t c4_length,
247 const uint32_t *c5, size_t c5_length))
251 for (part_index = 0; part_index < 4; part_index++)
253 const struct normalization_test_part *p = &file->parts[part_index];
256 for (line_index = 0; line_index < p->lines_length; line_index++)
258 const struct normalization_test_line *l = &p->lines[line_index];
260 ASSERT_WITH_LINE (check (l->sequences[0], u32_strlen (l->sequences[0]),
261 l->sequences[1], u32_strlen (l->sequences[1]),
262 l->sequences[2], u32_strlen (l->sequences[2]),
263 l->sequences[3], u32_strlen (l->sequences[3]),
264 l->sequences[4], u32_strlen (l->sequences[4]))
266 file->filename, l->lineno);
272 test_other (const struct normalization_test_file *file, uninorm_t nf)
274 /* Check that for every character not listed in part 1 of the
275 NormalizationTest.txt file, the character maps to itself in each
276 of the four normalization forms. */
277 const ucs4_t *p = file->part1_c1_sorted;
280 for (uc = 0; uc < 0x110000; uc++)
282 if (uc >= 0xD800 && uc < 0xE000)
284 /* A surrogate, not a character. Skip uc. */
298 result = u32_normalize (nf, input, 1, NULL, &length);
299 ASSERT (result != NULL && length == 1 && result[0] == uc);