From 736119c28bcec850f35474aa183633a347b228eb Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Sun, 11 Feb 2007 19:16:21 +0000 Subject: [PATCH] Tests for module 'mbsstr'. --- m4/locale-fr.m4 | 169 ++++++++++++++++++++++++++++++++++++++++++ modules/mbsstr-tests | 22 ++++++ tests/test-mbsstr1.c | 133 +++++++++++++++++++++++++++++++++ tests/test-mbsstr2.c | 146 ++++++++++++++++++++++++++++++++++++ tests/test-mbsstr2.sh | 15 ++++ tests/test-mbsstr3.c | 85 +++++++++++++++++++++ tests/test-mbsstr3.sh | 15 ++++ 7 files changed, 585 insertions(+) create mode 100644 m4/locale-fr.m4 create mode 100644 modules/mbsstr-tests create mode 100644 tests/test-mbsstr1.c create mode 100644 tests/test-mbsstr2.c create mode 100755 tests/test-mbsstr2.sh create mode 100644 tests/test-mbsstr3.c create mode 100755 tests/test-mbsstr3.sh diff --git a/m4/locale-fr.m4 b/m4/locale-fr.m4 new file mode 100644 index 0000000000..22864cebb6 --- /dev/null +++ b/m4/locale-fr.m4 @@ -0,0 +1,169 @@ +# locale-fr.m4 serial 3 (gettext-0.15) +dnl Copyright (C) 2003, 2005-2006 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +dnl From Bruno Haible. + +dnl Determine the name of a french locale with traditional encoding. +AC_DEFUN([gt_LOCALE_FR], +[ + AC_REQUIRE([AC_CANONICAL_HOST]) + AC_REQUIRE([AM_LANGINFO_CODESET]) + AC_CACHE_CHECK([for a traditional french locale], gt_cv_locale_fr, [ + macosx= + case "$host_os" in + darwin[56]*) ;; + darwin*) macosx=yes;; + esac + if test -n "$macosx"; then + # On Darwin 7 (MacOS X), the libc supports some locales in non-UTF-8 + # encodings, but the kernel does not support them. The documentation + # says: + # "... all code that calls BSD system routines should ensure + # that the const *char parameters of these routines are in UTF-8 + # encoding. All BSD system functions expect their string + # parameters to be in UTF-8 encoding and nothing else." + # See the comments in config.charset. Therefore we bypass the test. + gt_cv_locale_fr=none + else + AC_LANG_CONFTEST([AC_LANG_SOURCE([ +changequote(,)dnl +#include +#include +#if HAVE_LANGINFO_CODESET +# include +#endif +struct tm t; +char buf[16]; +int main () { + /* Check whether the given locale name is recognized by the system. */ + if (setlocale (LC_ALL, "") == NULL) return 1; + /* Check whether nl_langinfo(CODESET) is nonempty. + On MacOS X 10.3.5 (Darwin 7.5) in the fr_FR locale, nl_langinfo(CODESET) + is empty, and the behaviour of Tcl 8.4 in this locale is not useful. */ +#if HAVE_LANGINFO_CODESET + if (nl_langinfo (CODESET) [0] == '\0') return 1; +#endif + /* Check whether in the abbreviation of the second month, the second + character (should be U+00E9: LATIN SMALL LETTER E WITH ACUTE) is only + one byte long. This excludes the UTF-8 encoding. */ + t.tm_year = 1975 - 1900; t.tm_mon = 2 - 1; t.tm_mday = 4; + if (strftime (buf, sizeof (buf), "%b", &t) < 3 || buf[2] != 'v') return 1; + return 0; +} +changequote([,])dnl + ])]) + if AC_TRY_EVAL([ac_link]) && test -s conftest$ac_exeext; then + # Setting LC_ALL is not enough. Need to set LC_TIME to empty, because + # otherwise on MacOS X 10.3.5 the LC_TIME=C from the beginning of the + # configure script would override the LC_ALL setting. Likewise for + # LC_CTYPE, which is also set at the beginning of the configure script. + # Test for the usual locale name. + if (LC_ALL=fr_FR LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_fr=fr_FR + else + # Test for the locale name with explicit encoding suffix. + if (LC_ALL=fr_FR.ISO-8859-1 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_fr=fr_FR.ISO-8859-1 + else + # Test for the AIX, OSF/1, FreeBSD, NetBSD, OpenBSD locale name. + if (LC_ALL=fr_FR.ISO8859-1 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_fr=fr_FR.ISO8859-1 + else + # Test for the HP-UX locale name. + if (LC_ALL=fr_FR.iso88591 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_fr=fr_FR.iso88591 + else + # Test for the Solaris 7 locale name. + if (LC_ALL=fr LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_fr=fr + else + # Special test for NetBSD 1.6. + if test -f /usr/share/locale/fr_FR.ISO8859-1/LC_CTYPE; then + gt_cv_locale_fr=fr_FR.ISO8859-1 + else + # None found. + gt_cv_locale_fr=none + fi + fi + fi + fi + fi + fi + fi + rm -fr conftest* + fi + ]) + LOCALE_FR=$gt_cv_locale_fr + AC_SUBST([LOCALE_FR]) +]) + +dnl Determine the name of a french locale with UTF-8 encoding. +AC_DEFUN([gt_LOCALE_FR_UTF8], +[ + AC_REQUIRE([AM_LANGINFO_CODESET]) + AC_CACHE_CHECK([for a french Unicode locale], gt_cv_locale_fr_utf8, [ + AC_LANG_CONFTEST([AC_LANG_SOURCE([ +changequote(,)dnl +#include +#include +#if HAVE_LANGINFO_CODESET +# include +#endif +struct tm t; +char buf[16]; +int main () { + /* On BeOS, locales are not implemented in libc. Rather, libintl + imitates locale dependent behaviour by looking at the environment + variables, and all locales use the UTF-8 encoding. */ +#if !defined(__BEOS__) + /* Check whether the given locale name is recognized by the system. */ + if (setlocale (LC_ALL, "") == NULL) return 1; + /* Check whether nl_langinfo(CODESET) is nonempty. + On MacOS X 10.3.5 (Darwin 7.5) in the fr_FR locale, nl_langinfo(CODESET) + is empty, and the behaviour of Tcl 8.4 in this locale is not useful. */ +# if HAVE_LANGINFO_CODESET + if (nl_langinfo (CODESET) [0] == '\0') return 1; +# endif + /* Check whether in the abbreviation of the second month, the second + character (should be U+00E9: LATIN SMALL LETTER E WITH ACUTE) is + two bytes long, with UTF-8 encoding. */ + t.tm_year = 1975 - 1900; t.tm_mon = 2 - 1; t.tm_mday = 4; + if (strftime (buf, sizeof (buf), "%b", &t) < 4 + || buf[1] != (char) 0xc3 || buf[2] != (char) 0xa9 || buf[3] != 'v') + return 1; +#endif + return 0; +} +changequote([,])dnl + ])]) + if AC_TRY_EVAL([ac_link]) && test -s conftest$ac_exeext; then + # Setting LC_ALL is not enough. Need to set LC_TIME to empty, because + # otherwise on MacOS X 10.3.5 the LC_TIME=C from the beginning of the + # configure script would override the LC_ALL setting. Likewise for + # LC_CTYPE, which is also set at the beginning of the configure script. + # Test for the usual locale name. + if (LC_ALL=fr_FR LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_fr_utf8=fr_FR + else + # Test for the locale name with explicit encoding suffix. + if (LC_ALL=fr_FR.UTF-8 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_fr_utf8=fr_FR.UTF-8 + else + # Test for the Solaris 7 locale name. + if (LC_ALL=fr.UTF-8 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_fr_utf8=fr.UTF-8 + else + # None found. + gt_cv_locale_fr_utf8=none + fi + fi + fi + fi + rm -fr conftest* + ]) + LOCALE_FR_UTF8=$gt_cv_locale_fr_utf8 + AC_SUBST([LOCALE_FR_UTF8]) +]) diff --git a/modules/mbsstr-tests b/modules/mbsstr-tests new file mode 100644 index 0000000000..d3cfbfd597 --- /dev/null +++ b/modules/mbsstr-tests @@ -0,0 +1,22 @@ +Files: +tests/test-mbsstr1.c +tests/test-mbsstr2.sh +tests/test-mbsstr2.c +tests/test-mbsstr3.sh +tests/test-mbsstr3.c +m4/locale-fr.m4 +m4/locale-zh.m4 +m4/codeset.m4 + +Depends-on: + +configure.ac: +gt_LOCALE_FR_UTF8 +gt_LOCALE_ZH_CN + +Makefile.am: +TESTS += test-mbsstr1 test-mbsstr2.sh test-mbsstr3.sh +TESTS_ENVIRONMENT += EXEEXT='@EXEEXT@' LOCALE_FR_UTF8='@LOCALE_FR_UTF8@' LOCALE_ZH_CN='@LOCALE_ZH_CN@' +EXTRA_DIST += test-mbsstr2.sh test-mbsstr3.sh +check_PROGRAMS += test-mbsstr1 test-mbsstr2 test-mbsstr3 + diff --git a/tests/test-mbsstr1.c b/tests/test-mbsstr1.c new file mode 100644 index 0000000000..549121206e --- /dev/null +++ b/tests/test-mbsstr1.c @@ -0,0 +1,133 @@ +/* Test of searching in a string. + Copyright (C) 2007 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +/* Written by Bruno Haible , 2007. */ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include + +#include + +#define ASSERT(expr) if (!(expr)) abort (); + +int +main () +{ + /* This test is executed in the C locale. */ + + { + const char input[] = "foo"; + const char *result = mbsstr (input, ""); + ASSERT (result == input); + } + + { + const char input[] = "foo"; + const char *result = mbsstr (input, "o"); + ASSERT (result == input + 1); + } + + { + const char input[] = "ABC ABCDAB ABCDABCDABDE"; + const char *result = mbsstr (input, "ABCDABD"); + ASSERT (result == input + 15); + } + + { + const char input[] = "ABC ABCDAB ABCDABCDABDE"; + const char *result = mbsstr (input, "ABCDABE"); + ASSERT (result == NULL); + } + + /* Check that a very long haystack is handled quickly if the needle is + short and occurs near the beginning. */ + { + size_t repeat = 10000; + size_t m = 1000000; + char *needle = + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; + char *haystack = (char *) malloc (m + 1); + if (haystack != NULL) + { + memset (haystack, 'A', m); + haystack[0] = 'B'; + haystack[m] = '\0'; + + for (; repeat > 0; repeat--) + { + ASSERT (mbsstr (haystack, needle) == haystack + 1); + } + + free (haystack); + } + } + + /* Check that a very long needle is discarded quickly if the haystack is + short. */ + { + size_t repeat = 10000; + size_t m = 1000000; + char *haystack = + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "ABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABAB"; + char *needle = (char *) malloc (m + 1); + if (needle != NULL) + { + memset (needle, 'A', m); + needle[m] = '\0'; + + for (; repeat > 0; repeat--) + { + ASSERT (mbsstr (haystack, needle) == NULL); + } + + free (needle); + } + } + + /* Check that the asymptotic worst-case complexity is not quadratic. */ + { + size_t m = 1000000; + char *haystack = (char *) malloc (2 * m + 2); + char *needle = (char *) malloc (m + 2); + if (haystack != NULL && needle != NULL) + { + const char *result; + + memset (haystack, 'A', 2 * m); + haystack[2 * m] = 'B'; + haystack[2 * m + 1] = '\0'; + + memset (needle, 'A', m); + needle[m] = 'B'; + needle[m + 1] = '\0'; + + result = mbsstr (haystack, needle); + ASSERT (result == haystack + m); + } + if (needle != NULL) + free (needle); + if (haystack != NULL) + free (haystack); + } + + return 0; +} diff --git a/tests/test-mbsstr2.c b/tests/test-mbsstr2.c new file mode 100644 index 0000000000..6aebf56af3 --- /dev/null +++ b/tests/test-mbsstr2.c @@ -0,0 +1,146 @@ +/* Test of searching in a string. + Copyright (C) 2007 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +/* Written by Bruno Haible , 2007. */ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include + +#include +#include + +#define ASSERT(expr) if (!(expr)) abort (); + +int +main () +{ + /* configure should already have checked that the locale is supported. */ + if (setlocale (LC_ALL, "") == NULL) + return 1; + + { + const char input[] = "f\303\266\303\266"; + const char *result = mbsstr (input, ""); + ASSERT (result == input); + } + + { + const char input[] = "f\303\266\303\266"; + const char *result = mbsstr (input, "\303\266"); + ASSERT (result == input + 1); + } + + { + const char input[] = "f\303\266\303\266"; + const char *result = mbsstr (input, "\266\303"); + ASSERT (result == NULL); + } + + { + const char input[] = "\303\204BC \303\204BCD\303\204B \303\204BCD\303\204BCD\303\204BDE"; /* "ÄBC ÄBCDÄB ÄBCDÄBCDÄBDE" */ + const char *result = mbsstr (input, "\303\204BCD\303\204BD"); /* "ÄBCDÄBD" */ + ASSERT (result == input + 19); + } + + { + const char input[] = "\303\204BC \303\204BCD\303\204B \303\204BCD\303\204BCD\303\204BDE"; /* "ÄBC ÄBCDÄB ÄBCDÄBCDÄBDE" */ + const char *result = mbsstr (input, "\303\204BCD\303\204BE"); /* "ÄBCDÄBE" */ + ASSERT (result == NULL); + } + + /* Check that a very long haystack is handled quickly if the needle is + short and occurs near the beginning. */ + { + size_t repeat = 10000; + size_t m = 1000000; + char *needle = + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; + char *haystack = (char *) malloc (m + 1); + if (haystack != NULL) + { + memset (haystack, 'A', m); + haystack[0] = '\303'; haystack[1] = '\204'; + haystack[m] = '\0'; + + for (; repeat > 0; repeat--) + { + ASSERT (mbsstr (haystack, needle) == haystack + 2); + } + + free (haystack); + } + } + + /* Check that a very long needle is discarded quickly if the haystack is + short. */ + { + size_t repeat = 10000; + size_t m = 1000000; + char *haystack = + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "A\303\207A\303\207A\303\207A\303\207A\303\207A\303\207A\303\207" + "A\303\207A\303\207A\303\207A\303\207A\303\207A\303\207A\303\207" + "A\303\207A\303\207A\303\207A\303\207A\303\207A\303\207A\303\207" + "A\303\207A\303\207A\303\207A\303\207A\303\207A\303\207A\303\207" + "A\303\207A\303\207A\303\207A\303\207A\303\207A\303\207"; + char *needle = (char *) malloc (m + 1); + if (needle != NULL) + { + memset (needle, 'A', m); + needle[m] = '\0'; + + for (; repeat > 0; repeat--) + { + ASSERT (mbsstr (haystack, needle) == NULL); + } + + free (needle); + } + } + + /* Check that the asymptotic worst-case complexity is not quadratic. */ + { + size_t m = 1000000; + char *haystack = (char *) malloc (2 * m + 3); + char *needle = (char *) malloc (m + 3); + if (haystack != NULL && needle != NULL) + { + const char *result; + + memset (haystack, 'A', 2 * m); + haystack[2 * m] = '\303'; haystack[2 * m + 1] = '\207'; + haystack[2 * m + 2] = '\0'; + + memset (needle, 'A', m); + needle[m] = '\303'; needle[m + 1] = '\207'; + needle[m + 2] = '\0'; + + result = mbsstr (haystack, needle); + ASSERT (result == haystack + m); + } + if (needle != NULL) + free (needle); + if (haystack != NULL) + free (haystack); + } + + return 0; +} diff --git a/tests/test-mbsstr2.sh b/tests/test-mbsstr2.sh new file mode 100755 index 0000000000..79d06df649 --- /dev/null +++ b/tests/test-mbsstr2.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +# Test whether a specific UTF-8 locale is installed. +: ${LOCALE_FR_UTF8=fr_FR.UTF-8} +if test $LOCALE_FR_UTF8 = none; then + if test -f /usr/bin/localedef; then + echo "Skipping test: no french Unicode locale is installed" + else + echo "Skipping test: no french Unicode locale is supported" + fi + exit 77 +fi + +LC_ALL=$LOCALE_FR_UTF8 \ +./test-mbsstr2${EXEEXT} diff --git a/tests/test-mbsstr3.c b/tests/test-mbsstr3.c new file mode 100644 index 0000000000..5461196b92 --- /dev/null +++ b/tests/test-mbsstr3.c @@ -0,0 +1,85 @@ +/* Test of searching in a string. + Copyright (C) 2007 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +/* Written by Bruno Haible , 2007. */ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include + +#include +#include + +#define ASSERT(expr) if (!(expr)) abort (); + +int +main () +{ + /* configure should already have checked that the locale is supported. */ + if (setlocale (LC_ALL, "") == NULL) + return 1; + + /* Tests with a character < 0x30. */ + { + const char input[] = "\312\276\300\375 \312\276\300\375 \312\276\300\375"; /* "示例 示例 示例" */ + const char *result = mbsstr (input, " "); + ASSERT (result == input + 4); + } + + { + const char input[] = "\312\276\300\375"; /* "示例" */ + const char *result = mbsstr (input, " "); + ASSERT (result == NULL); + } + + /* Tests with a character >= 0x30. */ + { + const char input[] = "\272\305123\324\313\320\320\241\243"; /* "号123运行。" */ + const char *result = mbsstr (input, "2"); + ASSERT (result == input + 3); + } + + /* The following tests show how mbsstr() is different from strstr(). */ + + { + const char input[] = "\313\320\320\320"; /* "诵行" */ + const char *result = mbsstr (input, "\320\320"); /* "行" */ + ASSERT (result == input + 2); + } + + { + const char input[] = "\203\062\332\066123\324\313\320\320\241\243"; /* "씋123运行。" */ + const char *result = mbsstr (input, "2"); + ASSERT (result == input + 5); + } + + { + const char input[] = "\312\276\300\375 \312\276\300\375 \312\276\300\375"; /* "示例 示例 示例" */ + const char *result = mbsstr (input, "\276\300"); /* "纠" */ + ASSERT (result == NULL); + } + + { + const char input[] = "\312\276\300\375 \312\276\300\375 \312\276\300\375"; /* "示例 示例 示例" */ + const char *result = mbsstr (input, "\375 "); /* invalid multibyte sequence */ + ASSERT (result == NULL); + } + + return 0; +} diff --git a/tests/test-mbsstr3.sh b/tests/test-mbsstr3.sh new file mode 100755 index 0000000000..732c01fd05 --- /dev/null +++ b/tests/test-mbsstr3.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +# Test whether a specific GB18030 locale is installed. +: ${LOCALE_ZH_CN=zh_CN.GB18030} +if test $LOCALE_ZH_CN = none; then + if test -f /usr/bin/localedef; then + echo "Skipping test: no chinese GB18030 locale is installed" + else + echo "Skipping test: no chinese GB18030 locale is supported" + fi + exit 77 +fi + +LC_ALL=$LOCALE_ZH_CN \ +./test-mbsstr3${EXEEXT} -- 2.30.2