1 /* Test of lowercase mapping for UTF-16 strings.
2 Copyright (C) 2009, 2010 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2009. */
30 check (const uint16_t *input, size_t input_length,
31 const char *iso639_language, uninorm_t nf,
32 const uint16_t *expected, size_t expected_length)
37 /* Test return conventions with resultbuf == NULL. */
38 result = u16_tolower (input, input_length, iso639_language, nf, NULL, &length);
39 if (!(result != NULL))
41 if (!(length == expected_length))
43 if (!(u16_cmp (result, expected, expected_length) == 0))
47 /* Test return conventions with resultbuf too small. */
48 if (expected_length > 0)
50 uint16_t *preallocated;
52 length = expected_length - 1;
53 preallocated = (uint16_t *) malloc (length * sizeof (uint16_t));
54 result = u16_tolower (input, input_length, iso639_language, nf, preallocated, &length);
55 if (!(result != NULL))
57 if (!(result != preallocated))
59 if (!(length == expected_length))
61 if (!(u16_cmp (result, expected, expected_length) == 0))
67 /* Test return conventions with resultbuf large enough. */
69 uint16_t *preallocated;
71 length = expected_length;
72 preallocated = (uint16_t *) malloc (length * sizeof (uint16_t));
73 result = u16_tolower (input, input_length, iso639_language, nf, preallocated, &length);
74 if (!(result != NULL))
76 if (!(preallocated == NULL || result == preallocated))
78 if (!(length == expected_length))
80 if (!(u16_cmp (result, expected, expected_length) == 0))
92 ASSERT (check (NULL, 0, NULL, NULL, NULL, 0) == 0);
93 ASSERT (check (NULL, 0, NULL, UNINORM_NFC, NULL, 0) == 0);
97 { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */
98 static const uint16_t input[] =
99 { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ',
100 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443,
101 0x0439, 0x0442, 0x0435, '!', ' ',
102 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2,
103 '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ',
104 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n'
106 static const uint16_t casemapped[] =
107 { 'g', 'r', 0x00FC, 0x00DF, ' ', 'g', 'o', 't', 't', '.', ' ',
108 0x0437, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443,
109 0x0439, 0x0442, 0x0435, '!', ' ',
110 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2,
111 '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ',
112 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n'
114 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
117 /* Turkish letters i İ ı I */
118 { /* LATIN CAPITAL LETTER I */
119 static const uint16_t input[] = { 0x0049 };
120 static const uint16_t casemapped[] = { 0x0069 };
121 static const uint16_t casemapped_tr[] = { 0x0131 };
122 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
123 ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped_tr, SIZEOF (casemapped_tr)) == 0);
125 { /* LATIN SMALL LETTER I */
126 static const uint16_t input[] = { 0x0069 };
127 static const uint16_t casemapped[] = { 0x0069 };
128 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
129 ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0);
131 { /* LATIN CAPITAL LETTER I WITH DOT ABOVE */
132 static const uint16_t input[] = { 0x0130 };
133 static const uint16_t casemapped[] = { 0x0069, 0x0307 };
134 static const uint16_t casemapped_tr[] = { 0x0069 };
135 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
136 ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped_tr, SIZEOF (casemapped_tr)) == 0);
138 { /* LATIN SMALL LETTER DOTLESS I */
139 static const uint16_t input[] = { 0x0131 };
140 static const uint16_t casemapped[] = { 0x0131 };
141 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
142 ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0);
145 static const uint16_t input[] =
146 { 0x0054, 0x004F, 0x0050, 0x004B, 0x0041, 0x0050, 0x0049 };
147 static const uint16_t casemapped[] =
148 { 0x0074, 0x006F, 0x0070, 0x006B, 0x0061, 0x0070, 0x0131 };
149 ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0);
152 /* Uppercasing can increase the number of Unicode characters. */
154 static const uint16_t input[] = { 0x0048, 0x0045, 0x0049, 0x00DF };
155 static const uint16_t casemapped[] = { 0x0068, 0x0065, 0x0069, 0x00DF };
156 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
159 /* Case mappings for some characters can depend on the surrounding characters. */
160 { /* "ΠΕΡΙΣΣΌΤΕΡΕΣ ΠΛΗΡΟΦΟΡΊΕΣ" */
161 static const uint16_t input[] =
163 0x03A0, 0x0395, 0x03A1, 0x0399, 0x03A3, 0x03A3, 0x038C, 0x03A4,
164 0x0395, 0x03A1, 0x0395, 0x03A3, 0x0020, 0x03A0, 0x039B, 0x0397,
165 0x03A1, 0x039F, 0x03A6, 0x039F, 0x03A1, 0x038A, 0x0395, 0x03A3
167 static const uint16_t casemapped[] =
169 0x03C0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4,
170 0x03B5, 0x03C1, 0x03B5, 0x03C2, 0x0020, 0x03C0, 0x03BB, 0x03B7,
171 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C2
173 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
176 static const uint16_t input[] = { 0x03A3 };
177 static const uint16_t casemapped[] = { 0x03C3 };
178 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
181 static const uint16_t input[] = { 0x0391, 0x03A3 };
182 static const uint16_t casemapped[] = { 0x03B1, 0x03C2 };
183 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
185 /* It's a final sigma only if not followed by a case-ignorable sequence and
186 then a cased letter. Note that U+0345 and U+037A are simultaneously
187 case-ignorable and cased (which is a bit paradoxical). */
188 { /* "ΑΣΑ" -> "ασα" */
189 static const uint16_t input[] = { 0x0391, 0x03A3, 0x0391 };
190 static const uint16_t casemapped[] = { 0x03B1, 0x03C3, 0x03B1 };
191 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
193 { /* "ΑΣ:" -> "ας:" */
194 static const uint16_t input[] = { 0x0391, 0x03A3, 0x003A };
195 static const uint16_t casemapped[] = { 0x03B1, 0x03C2, 0x003A };
196 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
198 { /* "ΑΣ:Α" -> "ασ:α" */
199 static const uint16_t input[] = { 0x0391, 0x03A3, 0x003A, 0x0391 };
200 static const uint16_t casemapped[] = { 0x03B1, 0x03C3, 0x003A, 0x03B1 };
201 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
203 { /* "ΑΣ:ͺ" -> "ασ:ͺ" */
204 static const uint16_t input[] = { 0x0391, 0x03A3, 0x003A, 0x037A };
205 static const uint16_t casemapped[] = { 0x03B1, 0x03C3, 0x003A, 0x037A };
206 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
208 { /* "ΑΣ:ͺ " -> "ασ:ͺ " */
209 static const uint16_t input[] = { 0x0391, 0x03A3, 0x003A, 0x037A, 0x0020 };
210 static const uint16_t casemapped[] = { 0x03B1, 0x03C3, 0x003A, 0x037A, 0x0020 };
211 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
213 /* It's a final sigma only if preceded by a case-ignorable sequence and
214 a cased letter before it. Note that U+0345 and U+037A are simultaneously
215 case-ignorable and cased (which is a bit paradoxical). */
217 static const uint16_t input[] = { 0x003A, 0x03A3 };
218 static const uint16_t casemapped[] = { 0x003A, 0x03C3 };
219 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
221 { /* "Α:Σ" -> "α:ς" */
222 static const uint16_t input[] = { 0x0391, 0x003A, 0x03A3 };
223 static const uint16_t casemapped[] = { 0x03B1, 0x003A, 0x03C2 };
224 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
226 { /* "ͺ:Σ" -> "ͺ:ς" */
227 static const uint16_t input[] = { 0x037A, 0x003A, 0x03A3 };
228 static const uint16_t casemapped[] = { 0x037A, 0x003A, 0x03C2 };
229 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
231 { /* " ͺ:Σ" -> " ͺ:ς" */
232 static const uint16_t input[] = { 0x0020, 0x037A, 0x003A, 0x03A3 };
233 static const uint16_t casemapped[] = { 0x0020, 0x037A, 0x003A, 0x03C2 };
234 ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);