2009-02-21 Bruno Haible <bruno@clisp.org>
+ Tests for module 'uninorm/nfd'.
+ * tests/uninorm/test-nfd.c: New file.
+ * tests/uninorm/test-u8-nfd.c: New file.
+ * tests/uninorm/test-u16-nfd.c: New file.
+ * tests/uninorm/test-u32-nfd.c: New file.
+ * tests/uninorm/test-u32-nfd-big.sh: New file.
+ * tests/uninorm/test-u32-nfd-big.c: New file.
+ * tests/uninorm/test-u32-normalize-big.h: New file.
+ * tests/uninorm/test-u32-normalize-big.c: New file.
+ * tests/uninorm/NormalizationTest.txt: New file, created from
+ Unicode 5.1.0 NormalizationTest.txt.
+ * modules/uninorm/nfd-tests: New file.
+
New module 'uninorm/nfd'.
* lib/uninorm/nfd.c: New file.
* modules/uninorm/nfd: New file.
--- /dev/null
+Files:
+tests/uninorm/test-nfd.c
+tests/uninorm/test-u8-nfd.c
+tests/uninorm/test-u16-nfd.c
+tests/uninorm/test-u32-nfd.c
+tests/uninorm/test-u32-nfd-big.sh
+tests/uninorm/test-u32-nfd-big.c
+tests/uninorm/test-u32-normalize-big.h
+tests/uninorm/test-u32-normalize-big.c
+tests/uninorm/NormalizationTest.txt
+
+Depends-on:
+unistr/u8-cmp
+unistr/u16-cmp
+unistr/u32-cmp
+unistr/u32-strlen
+xalloc
+progname
+
+configure.ac:
+AC_CHECK_DECLS_ONCE([alarm])
+
+Makefile.am:
+TESTS += test-nfd uninorm/test-u32-nfd-big.sh
+check_PROGRAMS += test-nfd test-u32-nfd-big
+test_nfd_SOURCES = \
+ uninorm/test-nfd.c \
+ uninorm/test-u8-nfd.c \
+ uninorm/test-u16-nfd.c \
+ uninorm/test-u32-nfd.c
+test_u32_nfd_big_SOURCES = \
+ uninorm/test-u32-nfd-big.c \
+ uninorm/test-u32-normalize-big.c
+
--- /dev/null
+/* Test of canonical decomposition of Unicode strings.
+ Copyright (C) 2009 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2009. */
+
+#include <config.h>
+
+#include "uninorm.h"
+
+/* Check that UNINORM_NFD is defined and links. */
+uninorm_t n = UNINORM_NFD;
+
+extern void test_u8_nfd (void);
+extern void test_u16_nfd (void);
+extern void test_u32_nfd (void);
+
+int
+main ()
+{
+ test_u32_nfd ();
+ test_u16_nfd ();
+ test_u8_nfd ();
+
+ return 0;
+}
--- /dev/null
+/* Test of canonical decomposition of UTF-16 strings.
+ Copyright (C) 2009 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2009. */
+
+#include <config.h>
+
+#if GNULIB_UNINORM_U16_NORMALIZE
+
+#include "uninorm.h"
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "unistr.h"
+
+#define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
+#define ASSERT(expr) \
+ do \
+ { \
+ if (!(expr)) \
+ { \
+ fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
+ fflush (stderr); \
+ abort (); \
+ } \
+ } \
+ while (0)
+
+static int
+check (const uint16_t *input, size_t input_length,
+ const uint16_t *expected, size_t expected_length)
+{
+ size_t length;
+ uint16_t *result;
+
+ /* Test return conventions with resultbuf == NULL. */
+ result = u16_normalize (UNINORM_NFD, input, input_length, NULL, &length);
+ if (!(result != NULL))
+ return 1;
+ if (!(length == expected_length))
+ return 2;
+ if (!(u16_cmp (result, expected, expected_length) == 0))
+ return 3;
+ free (result);
+
+ /* Test return conventions with resultbuf too small. */
+ if (expected_length > 0)
+ {
+ uint16_t *preallocated;
+
+ length = expected_length - 1;
+ preallocated = (uint16_t *) malloc (length * sizeof (uint16_t));
+ result = u16_normalize (UNINORM_NFD, input, input_length, preallocated, &length);
+ if (!(result != NULL))
+ return 4;
+ if (!(result != preallocated))
+ return 5;
+ if (!(length == expected_length))
+ return 6;
+ if (!(u16_cmp (result, expected, expected_length) == 0))
+ return 7;
+ free (result);
+ free (preallocated);
+ }
+
+ /* Test return conventions with resultbuf large enough. */
+ {
+ uint16_t *preallocated;
+
+ length = expected_length;
+ preallocated = (uint16_t *) malloc (length * sizeof (uint16_t));
+ result = u16_normalize (UNINORM_NFD, input, input_length, preallocated, &length);
+ if (!(result != NULL))
+ return 8;
+ if (!(result == preallocated))
+ return 9;
+ if (!(length == expected_length))
+ return 10;
+ if (!(u16_cmp (result, expected, expected_length) == 0))
+ return 11;
+ free (preallocated);
+ }
+
+ return 0;
+}
+
+void
+test_u16_nfd (void)
+{
+ { /* SPACE */
+ static const uint16_t input[] = { 0x0020 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* LATIN CAPITAL LETTER A WITH DIAERESIS */
+ static const uint16_t input[] = { 0x00C4 };
+ static const uint16_t expected[] = { 0x0041, 0x0308 };
+ ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
+ }
+
+ { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
+ static const uint16_t input[] = { 0x01DE };
+ static const uint16_t expected[] = { 0x0041, 0x0308, 0x0304 };
+ ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
+ }
+
+ { /* GREEK DIALYTIKA AND PERISPOMENI */
+ static const uint16_t input[] = { 0x1FC1 };
+ static const uint16_t expected[] = { 0x00A8, 0x0342 };
+ ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
+ }
+
+ { /* SCRIPT SMALL L */
+ static const uint16_t input[] = { 0x2113 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* NO-BREAK SPACE */
+ static const uint16_t input[] = { 0x00A0 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* ARABIC LETTER VEH INITIAL FORM */
+ static const uint16_t input[] = { 0xFB6C };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* ARABIC LETTER VEH MEDIAL FORM */
+ static const uint16_t input[] = { 0xFB6D };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* ARABIC LETTER VEH FINAL FORM */
+ static const uint16_t input[] = { 0xFB6B };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* ARABIC LETTER VEH ISOLATED FORM */
+ static const uint16_t input[] = { 0xFB6A };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* CIRCLED NUMBER FIFTEEN */
+ static const uint16_t input[] = { 0x246E };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* TRADE MARK SIGN */
+ static const uint16_t input[] = { 0x2122 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* LATIN SUBSCRIPT SMALL LETTER I */
+ static const uint16_t input[] = { 0x1D62 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */
+ static const uint16_t input[] = { 0xFE35 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* FULLWIDTH LATIN CAPITAL LETTER A */
+ static const uint16_t input[] = { 0xFF21 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* HALFWIDTH IDEOGRAPHIC COMMA */
+ static const uint16_t input[] = { 0xFF64 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* SMALL IDEOGRAPHIC COMMA */
+ static const uint16_t input[] = { 0xFE51 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* SQUARE MHZ */
+ static const uint16_t input[] = { 0x3392 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* VULGAR FRACTION THREE EIGHTHS */
+ static const uint16_t input[] = { 0x215C };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* MICRO SIGN */
+ static const uint16_t input[] = { 0x00B5 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */
+ static const uint16_t input[] = { 0xFDFA };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* HANGUL SYLLABLE GEUL */
+ static const uint16_t input[] = { 0xAE00 };
+ static const uint16_t expected[] = { 0x1100, 0x1173, 0x11AF };
+ ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
+ }
+
+ { /* HANGUL SYLLABLE GEU */
+ static const uint16_t input[] = { 0xADF8 };
+ static const uint16_t expected[] = { 0x1100, 0x1173 };
+ ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
+ }
+
+ { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */
+ static const uint16_t input[] =
+ { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ',
+ 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443,
+ 0x0439, 0x0442, 0x0435, '!', ' ',
+ 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2,
+ '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ',
+ 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n'
+ };
+ static const uint16_t expected[] =
+ { 'G', 'r', 0x0075, 0x0308, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ',
+ 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443,
+ 0x0438, 0x0306, 0x0442, 0x0435, '!', ' ',
+ 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2,
+ '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ',
+ 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',',
+ 0x1112, 0x1161, 0x11AB, 0x1100, 0x1173, 0x11AF, '\n'
+ };
+ ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
+ }
+
+#if HAVE_DECL_ALARM
+ /* Declare failure if test takes too long, by using default abort
+ caused by SIGALRM. */
+ signal (SIGALRM, SIG_DFL);
+ alarm (50);
+#endif
+
+ /* Check that the sorting is not O(n²) but O(n log n). */
+ {
+ int pass;
+ for (pass = 0; pass < 3; pass++)
+ {
+ size_t repeat = 1;
+ size_t m = 100000;
+ uint16_t *input = (uint16_t *) malloc (2 * m * sizeof (uint16_t));
+ if (input != NULL)
+ {
+ uint16_t *expected = input + m;
+ size_t m1 = m / 2;
+ size_t m2 = (m - 1) / 2;
+ /* NB: m1 + m2 == m - 1. */
+ uint16_t *p;
+ size_t i;
+
+ input[0] = 0x0041;
+ p = input + 1;
+ switch (pass)
+ {
+ case 0:
+ for (i = 0; i < m1; i++)
+ *p++ = 0x0319;
+ for (i = 0; i < m2; i++)
+ *p++ = 0x0300;
+ break;
+
+ case 1:
+ for (i = 0; i < m2; i++)
+ *p++ = 0x0300;
+ for (i = 0; i < m1; i++)
+ *p++ = 0x0319;
+ break;
+
+ case 2:
+ for (i = 0; i < m2; i++)
+ {
+ *p++ = 0x0319;
+ *p++ = 0x0300;
+ }
+ for (; i < m1; i++)
+ *p++ = 0x0319;
+ break;
+
+ default:
+ abort ();
+ }
+
+ expected[0] = 0x0041;
+ p = expected + 1;
+ for (i = 0; i < m1; i++)
+ *p++ = 0x0319;
+ for (i = 0; i < m2; i++)
+ *p++ = 0x0300;
+
+ for (; repeat > 0; repeat--)
+ ASSERT (check (input, m, expected, m) == 0);
+
+ free (input);
+ }
+ }
+ }
+}
+
+#else
+
+void
+test_u16_nfd (void)
+{
+}
+
+#endif
--- /dev/null
+/* Test of Unicode compliance of canonical decomposition of UTF-32 strings.
+ Copyright (C) 2009 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2009. */
+
+#include <config.h>
+
+#if GNULIB_UNINORM_U32_NORMALIZE
+
+#include "uninorm.h"
+
+#include <stdlib.h>
+
+#include "unistr.h"
+#include "progname.h"
+#include "test-u32-normalize-big.h"
+
+static int
+check (const uint32_t *c1, size_t c1_length,
+ const uint32_t *c2, size_t c2_length,
+ const uint32_t *c3, size_t c3_length,
+ const uint32_t *c4, size_t c4_length,
+ const uint32_t *c5, size_t c5_length)
+{
+ /* Check
+ c3 == NFD(c1) == NFD(c2) == NFD(c3)
+ c5 == NFD(c4) == NFD(c5)
+ */
+ {
+ size_t length;
+ uint32_t *result;
+
+ result = u32_normalize (UNINORM_NFD, c1, c1_length, NULL, &length);
+ if (!(result != NULL
+ && length == c3_length
+ && u32_cmp (result, c3, c3_length) == 0))
+ return 1;
+ free (result);
+ }
+ {
+ size_t length;
+ uint32_t *result;
+
+ result = u32_normalize (UNINORM_NFD, c2, c2_length, NULL, &length);
+ if (!(result != NULL
+ && length == c3_length
+ && u32_cmp (result, c3, c3_length) == 0))
+ return 2;
+ free (result);
+ }
+ {
+ size_t length;
+ uint32_t *result;
+
+ result = u32_normalize (UNINORM_NFD, c3, c3_length, NULL, &length);
+ if (!(result != NULL
+ && length == c3_length
+ && u32_cmp (result, c3, c3_length) == 0))
+ return 3;
+ free (result);
+ }
+ {
+ size_t length;
+ uint32_t *result;
+
+ result = u32_normalize (UNINORM_NFD, c4, c4_length, NULL, &length);
+ if (!(result != NULL
+ && length == c5_length
+ && u32_cmp (result, c5, c5_length) == 0))
+ return 4;
+ free (result);
+ }
+ {
+ size_t length;
+ uint32_t *result;
+
+ result = u32_normalize (UNINORM_NFD, c5, c5_length, NULL, &length);
+ if (!(result != NULL
+ && length == c5_length
+ && u32_cmp (result, c5, c5_length) == 0))
+ return 5;
+ free (result);
+ }
+ return 0;
+}
+
+int
+main (int argc, char *argv[])
+{
+ struct normalization_test_file file;
+
+ set_program_name (argv[0]);
+ read_normalization_test_file (argv[1], &file);
+
+ test_specific (&file, check);
+ test_other (&file, UNINORM_NFD);
+
+ return 0;
+}
+
+#else
+
+#include <stdio.h>
+
+int
+main ()
+{
+ fprintf (stderr, "Skipping test: uninorm/u32-normalize module not included.\n");
+ return 77;
+}
+
+#endif
--- /dev/null
+#!/bin/sh
+exec ./test-u32-nfd-big${EXEEXT} "$srcdir/uninorm/NormalizationTest.txt"
--- /dev/null
+/* Test of canonical decomposition of UTF-32 strings.
+ Copyright (C) 2009 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2009. */
+
+#include <config.h>
+
+#if GNULIB_UNINORM_U32_NORMALIZE
+
+#include "uninorm.h"
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "unistr.h"
+
+#define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
+#define ASSERT(expr) \
+ do \
+ { \
+ if (!(expr)) \
+ { \
+ fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
+ fflush (stderr); \
+ abort (); \
+ } \
+ } \
+ while (0)
+
+static int
+check (const uint32_t *input, size_t input_length,
+ const uint32_t *expected, size_t expected_length)
+{
+ size_t length;
+ uint32_t *result;
+
+ /* Test return conventions with resultbuf == NULL. */
+ result = u32_normalize (UNINORM_NFD, input, input_length, NULL, &length);
+ if (!(result != NULL))
+ return 1;
+ if (!(length == expected_length))
+ return 2;
+ if (!(u32_cmp (result, expected, expected_length) == 0))
+ return 3;
+ free (result);
+
+ /* Test return conventions with resultbuf too small. */
+ if (expected_length > 0)
+ {
+ uint32_t *preallocated;
+
+ length = expected_length - 1;
+ preallocated = (uint32_t *) malloc (length * sizeof (uint32_t));
+ result = u32_normalize (UNINORM_NFD, input, input_length, preallocated, &length);
+ if (!(result != NULL))
+ return 4;
+ if (!(result != preallocated))
+ return 5;
+ if (!(length == expected_length))
+ return 6;
+ if (!(u32_cmp (result, expected, expected_length) == 0))
+ return 7;
+ free (result);
+ free (preallocated);
+ }
+
+ /* Test return conventions with resultbuf large enough. */
+ {
+ uint32_t *preallocated;
+
+ length = expected_length;
+ preallocated = (uint32_t *) malloc (length * sizeof (uint32_t));
+ result = u32_normalize (UNINORM_NFD, input, input_length, preallocated, &length);
+ if (!(result != NULL))
+ return 8;
+ if (!(result == preallocated))
+ return 9;
+ if (!(length == expected_length))
+ return 10;
+ if (!(u32_cmp (result, expected, expected_length) == 0))
+ return 11;
+ free (preallocated);
+ }
+
+ return 0;
+}
+
+void
+test_u32_nfd (void)
+{
+ { /* SPACE */
+ static const uint32_t input[] = { 0x0020 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* LATIN CAPITAL LETTER A WITH DIAERESIS */
+ static const uint32_t input[] = { 0x00C4 };
+ static const uint32_t expected[] = { 0x0041, 0x0308 };
+ ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
+ }
+
+ { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
+ static const uint32_t input[] = { 0x01DE };
+ static const uint32_t expected[] = { 0x0041, 0x0308, 0x0304 };
+ ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
+ }
+
+ { /* GREEK DIALYTIKA AND PERISPOMENI */
+ static const uint32_t input[] = { 0x1FC1 };
+ static const uint32_t expected[] = { 0x00A8, 0x0342 };
+ ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
+ }
+
+ { /* SCRIPT SMALL L */
+ static const uint32_t input[] = { 0x2113 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* NO-BREAK SPACE */
+ static const uint32_t input[] = { 0x00A0 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* ARABIC LETTER VEH INITIAL FORM */
+ static const uint32_t input[] = { 0xFB6C };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* ARABIC LETTER VEH MEDIAL FORM */
+ static const uint32_t input[] = { 0xFB6D };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* ARABIC LETTER VEH FINAL FORM */
+ static const uint32_t input[] = { 0xFB6B };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* ARABIC LETTER VEH ISOLATED FORM */
+ static const uint32_t input[] = { 0xFB6A };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* CIRCLED NUMBER FIFTEEN */
+ static const uint32_t input[] = { 0x246E };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* TRADE MARK SIGN */
+ static const uint32_t input[] = { 0x2122 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* LATIN SUBSCRIPT SMALL LETTER I */
+ static const uint32_t input[] = { 0x1D62 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */
+ static const uint32_t input[] = { 0xFE35 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* FULLWIDTH LATIN CAPITAL LETTER A */
+ static const uint32_t input[] = { 0xFF21 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* HALFWIDTH IDEOGRAPHIC COMMA */
+ static const uint32_t input[] = { 0xFF64 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* SMALL IDEOGRAPHIC COMMA */
+ static const uint32_t input[] = { 0xFE51 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* SQUARE MHZ */
+ static const uint32_t input[] = { 0x3392 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* VULGAR FRACTION THREE EIGHTHS */
+ static const uint32_t input[] = { 0x215C };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* MICRO SIGN */
+ static const uint32_t input[] = { 0x00B5 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */
+ static const uint32_t input[] = { 0xFDFA };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* HANGUL SYLLABLE GEUL */
+ static const uint32_t input[] = { 0xAE00 };
+ static const uint32_t expected[] = { 0x1100, 0x1173, 0x11AF };
+ ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
+ }
+
+ { /* HANGUL SYLLABLE GEU */
+ static const uint32_t input[] = { 0xADF8 };
+ static const uint32_t expected[] = { 0x1100, 0x1173 };
+ ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
+ }
+
+ { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */
+ static const uint32_t input[] =
+ { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ',
+ 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443,
+ 0x0439, 0x0442, 0x0435, '!', ' ',
+ 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2,
+ '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ',
+ 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n'
+ };
+ static const uint32_t expected[] =
+ { 'G', 'r', 0x0075, 0x0308, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ',
+ 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443,
+ 0x0438, 0x0306, 0x0442, 0x0435, '!', ' ',
+ 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2,
+ '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ',
+ 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',',
+ 0x1112, 0x1161, 0x11AB, 0x1100, 0x1173, 0x11AF, '\n'
+ };
+ ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
+ }
+
+#if HAVE_DECL_ALARM
+ /* Declare failure if test takes too long, by using default abort
+ caused by SIGALRM. */
+ signal (SIGALRM, SIG_DFL);
+ alarm (50);
+#endif
+
+ /* Check that the sorting is not O(n²) but O(n log n). */
+ {
+ int pass;
+ for (pass = 0; pass < 3; pass++)
+ {
+ size_t repeat = 1;
+ size_t m = 100000;
+ uint32_t *input = (uint32_t *) malloc (2 * m * sizeof (uint32_t));
+ if (input != NULL)
+ {
+ uint32_t *expected = input + m;
+ size_t m1 = m / 2;
+ size_t m2 = (m - 1) / 2;
+ /* NB: m1 + m2 == m - 1. */
+ uint32_t *p;
+ size_t i;
+
+ input[0] = 0x0041;
+ p = input + 1;
+ switch (pass)
+ {
+ case 0:
+ for (i = 0; i < m1; i++)
+ *p++ = 0x0319;
+ for (i = 0; i < m2; i++)
+ *p++ = 0x0300;
+ break;
+
+ case 1:
+ for (i = 0; i < m2; i++)
+ *p++ = 0x0300;
+ for (i = 0; i < m1; i++)
+ *p++ = 0x0319;
+ break;
+
+ case 2:
+ for (i = 0; i < m2; i++)
+ {
+ *p++ = 0x0319;
+ *p++ = 0x0300;
+ }
+ for (; i < m1; i++)
+ *p++ = 0x0319;
+ break;
+
+ default:
+ abort ();
+ }
+
+ expected[0] = 0x0041;
+ p = expected + 1;
+ for (i = 0; i < m1; i++)
+ *p++ = 0x0319;
+ for (i = 0; i < m2; i++)
+ *p++ = 0x0300;
+
+ for (; repeat > 0; repeat--)
+ ASSERT (check (input, m, expected, m) == 0);
+
+ free (input);
+ }
+ }
+ }
+}
+
+#else
+
+void
+test_u32_nfd (void)
+{
+}
+
+#endif
--- /dev/null
+/* Test of Unicode compliance of normalization of UTF-32 strings.
+ Copyright (C) 2009 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2009. */
+
+#include <config.h>
+
+/* Specification. */
+#include "test-u32-normalize-big.h"
+
+#if GNULIB_UNINORM_U32_NORMALIZE
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "xalloc.h"
+#include "unistr.h"
+
+#define ASSERT(expr) \
+ do \
+ { \
+ if (!(expr)) \
+ { \
+ fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
+ fflush (stderr); \
+ abort (); \
+ } \
+ } \
+ while (0)
+
+#define ASSERT_WITH_LINE(expr, file, line) \
+ do \
+ { \
+ if (!(expr)) \
+ { \
+ fprintf (stderr, "%s:%d: assertion failed for %s:%u\n", \
+ __FILE__, __LINE__, file, line); \
+ fflush (stderr); \
+ abort (); \
+ } \
+ } \
+ while (0)
+
+static int
+cmp_ucs4_t (const void *a, const void *b)
+{
+ ucs4_t a_value = *(const ucs4_t *)a;
+ ucs4_t b_value = *(const ucs4_t *)b;
+ return (a_value < b_value ? -1 : a_value > b_value ? 1 : 0);
+}
+
+void
+read_normalization_test_file (const char *filename,
+ struct normalization_test_file *file)
+{
+ FILE *stream;
+ unsigned int lineno;
+ int part_index;
+ struct normalization_test_line *lines;
+ size_t lines_length;
+ size_t lines_allocated;
+
+ stream = fopen (filename, "r");
+ if (stream == NULL)
+ {
+ fprintf (stderr, "error during fopen of '%s'\n", filename);
+ exit (1);
+ }
+
+ for (part_index = 0; part_index < 4; part_index++)
+ {
+ file->parts[part_index].lines = NULL;
+ file->parts[part_index].lines_length = 0;
+ }
+
+ lineno = 0;
+
+ part_index = -1;
+ lines = NULL;
+ lines_length = 0;
+ lines_allocated = 0;
+
+ for (;;)
+ {
+ char buf[1000+1];
+ char *ptr;
+ int c;
+ struct normalization_test_line line;
+ size_t sequence_index;
+
+ lineno++;
+
+ /* Read a line. */
+ ptr = buf;
+ do
+ {
+ c = getc (stream);
+ if (c == EOF || c == '\n')
+ break;
+ *ptr++ = c;
+ }
+ while (ptr < buf + 1000);
+ *ptr = '\0';
+ if (c == EOF)
+ break;
+
+ /* Ignore empty lines and comment lines. */
+ if (buf[0] == '\0' || buf[0] == '#')
+ continue;
+
+ /* Handle lines that introduce a new part. */
+ if (buf[0] == '@')
+ {
+ /* Switch to the next part. */
+ if (part_index >= 0)
+ {
+ lines =
+ (struct normalization_test_line *)
+ xnrealloc (lines, lines_length, sizeof (struct normalization_test_line));
+ file->parts[part_index].lines = lines;
+ file->parts[part_index].lines_length = lines_length;
+ }
+ part_index++;
+ lines = NULL;
+ lines_length = 0;
+ lines_allocated = 0;
+ continue;
+ }
+
+ /* It's a line containing 5 sequences of Unicode characters.
+ Parse it and append it to the current part. */
+ if (!(part_index >= 0 && part_index < 4))
+ {
+ fprintf (stderr, "unexpected structure of '%s'\n", filename);
+ exit (1);
+ }
+ ptr = buf;
+ line.lineno = lineno;
+ for (sequence_index = 0; sequence_index < 5; sequence_index++)
+ line.sequences[sequence_index] = NULL;
+ for (sequence_index = 0; sequence_index < 5; sequence_index++)
+ {
+ uint32_t *sequence = XNMALLOC (1, uint32_t);
+ size_t sequence_length = 0;
+
+ for (;;)
+ {
+ char *endptr;
+ unsigned int uc;
+
+ uc = strtoul (ptr, &endptr, 16);
+ if (endptr == ptr)
+ break;
+ ptr = endptr;
+
+ /* Append uc to the sequence. */
+ sequence =
+ (uint32_t *)
+ xnrealloc (sequence, sequence_length + 2, sizeof (uint32_t));
+ sequence[sequence_length] = uc;
+ sequence_length++;
+
+ if (*ptr == ' ')
+ ptr++;
+ }
+ if (sequence_length == 0)
+ {
+ fprintf (stderr, "empty character sequence in '%s'\n", filename);
+ exit (1);
+ }
+ sequence[sequence_length] = 0; /* terminator */
+
+ line.sequences[sequence_index] = sequence;
+
+ if (*ptr != ';')
+ {
+ fprintf (stderr, "error parsing '%s'\n", filename);
+ exit (1);
+ }
+ ptr++;
+ }
+
+ /* Append the line to the current part. */
+ if (lines_length == lines_allocated)
+ {
+ lines_allocated = 2 * lines_allocated;
+ if (lines_allocated < 7)
+ lines_allocated = 7;
+ lines =
+ (struct normalization_test_line *)
+ xnrealloc (lines, lines_allocated, sizeof (struct normalization_test_line));
+ }
+ lines[lines_length] = line;
+ lines_length++;
+ }
+
+ if (part_index >= 0)
+ {
+ lines =
+ (struct normalization_test_line *)
+ xnrealloc (lines, lines_length, sizeof (struct normalization_test_line));
+ file->parts[part_index].lines = lines;
+ file->parts[part_index].lines_length = lines_length;
+ }
+
+ {
+ /* Collect all c1 values from the part 1 in an array. */
+ const struct normalization_test_part *p = &file->parts[1];
+ ucs4_t *c1_array = XNMALLOC (p->lines_length + 1, ucs4_t);
+ size_t line_index;
+
+ for (line_index = 0; line_index < p->lines_length; line_index++)
+ {
+ const unsigned int *sequence = p->lines[line_index].sequences[0];
+ /* In part 1, every sequences[0] consists of a single character. */
+ if (!(sequence[0] != 0 && sequence[1] == 0))
+ abort ();
+ c1_array[line_index] = sequence[0];
+ }
+
+ /* Sort this array. */
+ qsort (c1_array, p->lines_length, sizeof (ucs4_t), cmp_ucs4_t);
+
+ /* Add the sentinel at the end. */
+ c1_array[p->lines_length] = 0x110000;
+
+ file->part1_c1_sorted = c1_array;
+ }
+
+ file->filename = xstrdup (filename);
+
+ if (ferror (stream) || fclose (stream))
+ {
+ fprintf (stderr, "error reading from '%s'\n", filename);
+ exit (1);
+ }
+}
+
+void
+test_specific (const struct normalization_test_file *file,
+ int (*check) (const uint32_t *c1, size_t c1_length,
+ const uint32_t *c2, size_t c2_length,
+ const uint32_t *c3, size_t c3_length,
+ const uint32_t *c4, size_t c4_length,
+ const uint32_t *c5, size_t c5_length))
+{
+ size_t part_index;
+
+ for (part_index = 0; part_index < 4; part_index++)
+ {
+ const struct normalization_test_part *p = &file->parts[part_index];
+ size_t line_index;
+
+ for (line_index = 0; line_index < p->lines_length; line_index++)
+ {
+ const struct normalization_test_line *l = &p->lines[line_index];
+
+ ASSERT_WITH_LINE (check (l->sequences[0], u32_strlen (l->sequences[0]),
+ l->sequences[1], u32_strlen (l->sequences[1]),
+ l->sequences[2], u32_strlen (l->sequences[2]),
+ l->sequences[3], u32_strlen (l->sequences[3]),
+ l->sequences[4], u32_strlen (l->sequences[4]))
+ == 0,
+ file->filename, l->lineno);
+ }
+ }
+}
+
+void
+test_other (const struct normalization_test_file *file, uninorm_t nf)
+{
+ /* Check that for every character not listed in part 1 of the
+ NormalizationTest.txt file, the character maps to itself in each
+ of the four normalization forms. */
+ const ucs4_t *p = file->part1_c1_sorted;
+ ucs4_t uc;
+
+ for (uc = 0; uc < 0x110000; uc++)
+ {
+ if (uc >= 0xD800 && uc < 0xE000)
+ {
+ /* A surrogate, not a character. Skip uc. */
+ }
+ else if (uc == *p)
+ {
+ /* Skip uc. */
+ p++;
+ }
+ else
+ {
+ uint32_t input[1];
+ size_t length;
+ uint32_t *result;
+
+ input[0] = uc;
+ result = u32_normalize (nf, input, 1, NULL, &length);
+ ASSERT (result != NULL && length == 1 && result[0] == uc);
+ }
+ }
+}
+
+#endif
--- /dev/null
+/* Test of Unicode compliance of normalization of UTF-32 strings.
+ Copyright (C) 2009 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <stddef.h>
+
+#include "unitypes.h"
+#include "uninorm.h"
+
+/* The NormalizationTest.txt is from www.unicode.org, with stripped comments:
+ sed -e 's| *#.*||' < .../ucd/NormalizationTest.txt \
+ > tests/uninorm/NormalizationTest.txt
+ It is only used to verify the compliance of this implementation of the
+ Unicode normalization forms. It is not used by the library code, only
+ by the unit tests. */
+
+/* Representation of a line in the NormalizationTest.txt file. */
+struct normalization_test_line
+{
+ unsigned int lineno;
+ uint32_t *sequences[5];
+};
+
+/* Representation of a delimited part of the NormalizationTest.txt file. */
+struct normalization_test_part
+{
+ struct normalization_test_line *lines;
+ size_t lines_length;
+};
+
+/* Representation of the entire NormalizationTest.txt file. */
+struct normalization_test_file
+{
+ struct normalization_test_part parts[4];
+ /* The set of c1 values from part 1, sorted in ascending order, with a
+ sentinel value of 0x110000 at the end. */
+ ucs4_t *part1_c1_sorted;
+ /* The filename of the NormalizationTest.txt file. */
+ const char *filename;
+};
+
+/* Read the NormalizationTest.txt file and return its contents. */
+extern void
+ read_normalization_test_file (const char *filename,
+ struct normalization_test_file *file);
+
+/* Perform the first compliance test. */
+extern void
+ test_specific (const struct normalization_test_file *file,
+ int (*check) (const uint32_t *c1, size_t c1_length,
+ const uint32_t *c2, size_t c2_length,
+ const uint32_t *c3, size_t c3_length,
+ const uint32_t *c4, size_t c4_length,
+ const uint32_t *c5, size_t c5_length));
+
+/* Perform the second compliance test. */
+extern void
+ test_other (const struct normalization_test_file *file, uninorm_t nf);
--- /dev/null
+/* Test of canonical decomposition of UTF-8 strings.
+ Copyright (C) 2009 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2009. */
+
+#include <config.h>
+
+#if GNULIB_UNINORM_U8_NORMALIZE
+
+#include "uninorm.h"
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "unistr.h"
+
+#define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
+#define ASSERT(expr) \
+ do \
+ { \
+ if (!(expr)) \
+ { \
+ fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
+ fflush (stderr); \
+ abort (); \
+ } \
+ } \
+ while (0)
+
+static int
+check (const uint8_t *input, size_t input_length,
+ const uint8_t *expected, size_t expected_length)
+{
+ size_t length;
+ uint8_t *result;
+
+ /* Test return conventions with resultbuf == NULL. */
+ result = u8_normalize (UNINORM_NFD, input, input_length, NULL, &length);
+ if (!(result != NULL))
+ return 1;
+ if (!(length == expected_length))
+ return 2;
+ if (!(u8_cmp (result, expected, expected_length) == 0))
+ return 3;
+ free (result);
+
+ /* Test return conventions with resultbuf too small. */
+ if (expected_length > 0)
+ {
+ uint8_t *preallocated;
+
+ length = expected_length - 1;
+ preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
+ result = u8_normalize (UNINORM_NFD, input, input_length, preallocated, &length);
+ if (!(result != NULL))
+ return 4;
+ if (!(result != preallocated))
+ return 5;
+ if (!(length == expected_length))
+ return 6;
+ if (!(u8_cmp (result, expected, expected_length) == 0))
+ return 7;
+ free (result);
+ free (preallocated);
+ }
+
+ /* Test return conventions with resultbuf large enough. */
+ {
+ uint8_t *preallocated;
+
+ length = expected_length;
+ preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
+ result = u8_normalize (UNINORM_NFD, input, input_length, preallocated, &length);
+ if (!(result != NULL))
+ return 8;
+ if (!(result == preallocated))
+ return 9;
+ if (!(length == expected_length))
+ return 10;
+ if (!(u8_cmp (result, expected, expected_length) == 0))
+ return 11;
+ free (preallocated);
+ }
+
+ return 0;
+}
+
+void
+test_u8_nfd (void)
+{
+ { /* SPACE */
+ static const uint8_t input[] = { 0x20 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* LATIN CAPITAL LETTER A WITH DIAERESIS */
+ static const uint8_t input[] = { 0xC3, 0x84 };
+ static const uint8_t expected[] = { 0x41, 0xCC, 0x88 };
+ ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
+ }
+
+ { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
+ static const uint8_t input[] = { 0xC7, 0x9E };
+ static const uint8_t expected[] = { 0x41, 0xCC, 0x88, 0xCC, 0x84 };
+ ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
+ }
+
+ { /* GREEK DIALYTIKA AND PERISPOMENI */
+ static const uint8_t input[] = { 0xE1, 0xBF, 0x81 };
+ static const uint8_t expected[] = { 0xC2, 0xA8, 0xCD, 0x82 };
+ ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
+ }
+
+ { /* SCRIPT SMALL L */
+ static const uint8_t input[] = { 0xE2, 0x84, 0x93 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* NO-BREAK SPACE */
+ static const uint8_t input[] = { 0xC2, 0xA0 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* ARABIC LETTER VEH INITIAL FORM */
+ static const uint8_t input[] = { 0xEF, 0xAD, 0xAC };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* ARABIC LETTER VEH MEDIAL FORM */
+ static const uint8_t input[] = { 0xEF, 0xAD, 0xAD };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* ARABIC LETTER VEH FINAL FORM */
+ static const uint8_t input[] = { 0xEF, 0xAD, 0xAB };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* ARABIC LETTER VEH ISOLATED FORM */
+ static const uint8_t input[] = { 0xEF, 0xAD, 0xAA };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* CIRCLED NUMBER FIFTEEN */
+ static const uint8_t input[] = { 0xE2, 0x91, 0xAE };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* TRADE MARK SIGN */
+ static const uint8_t input[] = { 0xE2, 0x84, 0xA2 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* LATIN SUBSCRIPT SMALL LETTER I */
+ static const uint8_t input[] = { 0xE1, 0xB5, 0xA2 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */
+ static const uint8_t input[] = { 0xEF, 0xB8, 0xB5 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* FULLWIDTH LATIN CAPITAL LETTER A */
+ static const uint8_t input[] = { 0xEF, 0xBC, 0xA1 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* HALFWIDTH IDEOGRAPHIC COMMA */
+ static const uint8_t input[] = { 0xEF, 0xBD, 0xA4 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* SMALL IDEOGRAPHIC COMMA */
+ static const uint8_t input[] = { 0xEF, 0xB9, 0x91 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* SQUARE MHZ */
+ static const uint8_t input[] = { 0xE3, 0x8E, 0x92 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* VULGAR FRACTION THREE EIGHTHS */
+ static const uint8_t input[] = { 0xE2, 0x85, 0x9C };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* MICRO SIGN */
+ static const uint8_t input[] = { 0xC2, 0xB5 };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */
+ static const uint8_t input[] = { 0xEF, 0xB7, 0xBA };
+ ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
+ }
+
+ { /* HANGUL SYLLABLE GEUL */
+ static const uint8_t input[] = { 0xEA, 0xB8, 0x80 };
+ static const uint8_t expected[] =
+ { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF };
+ ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
+ }
+
+ { /* HANGUL SYLLABLE GEU */
+ static const uint8_t input[] = { 0xEA, 0xB7, 0xB8 };
+ static const uint8_t expected[] = { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3 };
+ ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
+ }
+
+ { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */
+ static const uint8_t input[] =
+ { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
+ ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
+ 0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9,
+ 0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
+ 's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')',
+ '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
+ 0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
+ 0xED, 0x95, 0x9C,
+ 0xEA, 0xB8, 0x80, '\n'
+ };
+ static const uint8_t expected[] =
+ { 'G', 'r', 0x75, 0xCC, 0x88, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
+ ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
+ 0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB8, 0xCC, 0x86,
+ 0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
+ 's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')',
+ '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
+ 0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
+ 0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE1, 0x86, 0xAB,
+ 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF, '\n'
+ };
+ ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
+ }
+
+#if HAVE_DECL_ALARM
+ /* Declare failure if test takes too long, by using default abort
+ caused by SIGALRM. */
+ signal (SIGALRM, SIG_DFL);
+ alarm (50);
+#endif
+
+ /* Check that the sorting is not O(n²) but O(n log n). */
+ {
+ int pass;
+ for (pass = 0; pass < 3; pass++)
+ {
+ size_t repeat = 1;
+ size_t m = 100000;
+ uint8_t *input = (uint8_t *) malloc (2 * (2 * m - 1) * sizeof (uint8_t));
+ if (input != NULL)
+ {
+ uint8_t *expected = input + (2 * m - 1);
+ size_t m1 = m / 2;
+ size_t m2 = (m - 1) / 2;
+ /* NB: m1 + m2 == m - 1. */
+ uint8_t *p;
+ size_t i;
+
+ input[0] = 0x41;
+ p = input + 1;
+ switch (pass)
+ {
+ case 0:
+ for (i = 0; i < m1; i++)
+ {
+ *p++ = 0xCC;
+ *p++ = 0x99;
+ }
+ for (i = 0; i < m2; i++)
+ {
+ *p++ = 0xCC;
+ *p++ = 0x80;
+ }
+ break;
+
+ case 1:
+ for (i = 0; i < m2; i++)
+ {
+ *p++ = 0xCC;
+ *p++ = 0x80;
+ }
+ for (i = 0; i < m1; i++)
+ {
+ *p++ = 0xCC;
+ *p++ = 0x99;
+ }
+ break;
+
+ case 2:
+ for (i = 0; i < m2; i++)
+ {
+ *p++ = 0xCC;
+ *p++ = 0x99;
+ *p++ = 0xCC;
+ *p++ = 0x80;
+ }
+ for (; i < m1; i++)
+ {
+ *p++ = 0xCC;
+ *p++ = 0x99;
+ }
+ break;
+
+ default:
+ abort ();
+ }
+
+ expected[0] = 0x41;
+ p = expected + 1;
+ for (i = 0; i < m1; i++)
+ {
+ *p++ = 0xCC;
+ *p++ = 0x99;
+ }
+ for (i = 0; i < m2; i++)
+ {
+ *p++ = 0xCC;
+ *p++ = 0x80;
+ }
+
+ for (; repeat > 0; repeat--)
+ ASSERT (check (input, 2 * m - 1, expected, 2 * m - 1) == 0);
+
+ free (input);
+ }
+ }
+ }
+}
+
+#else
+
+void
+test_u8_nfd (void)
+{
+}
+
+#endif