+2007-02-11 Bruno Haible <bruno@clisp.org>
+
+ * modules/mbscasestr-tests: New file.
+ * tests/test-mbscasestr1.c: New file.
+ * tests/test-mbscasestr2.sh: New file.
+ * tests/test-mbscasestr2.c: New file.
+ * tests/test-mbscasestr3.sh: New file.
+ * tests/test-mbscasestr3.c: New file.
+ * tests/test-mbscasestr4.sh: New file.
+ * tests/test-mbscasestr4.c: New file.
+
2007-02-11 Bruno Haible <bruno@clisp.org>
Ensure O(n) worst-case complexity of mbsstr.
--- /dev/null
+Files:
+tests/test-mbscasestr1.c
+tests/test-mbscasestr2.sh
+tests/test-mbscasestr2.c
+tests/test-mbscasestr3.sh
+tests/test-mbscasestr3.c
+tests/test-mbscasestr4.sh
+tests/test-mbscasestr4.c
+m4/locale-fr.m4
+m4/locale-tr.m4
+m4/locale-zh.m4
+m4/codeset.m4
+
+Depends-on:
+
+configure.ac:
+gt_LOCALE_FR_UTF8
+gt_LOCALE_TR_UTF8
+gt_LOCALE_ZH_CN
+
+Makefile.am:
+TESTS += test-mbscasestr1 test-mbscasestr2.sh test-mbscasestr3.sh test-mbscasestr4.sh
+TESTS_ENVIRONMENT += EXEEXT='@EXEEXT@' LOCALE_FR_UTF8='@LOCALE_FR_UTF8@' LOCALE_TR_UTF8='@LOCALE_TR_UTF8@' LOCALE_ZH_CN='@LOCALE_ZH_CN@'
+EXTRA_DIST += test-mbscasestr2.sh test-mbscasestr3.sh test-mbscasestr4.sh
+check_PROGRAMS += test-mbscasestr1 test-mbscasestr2 test-mbscasestr3 test-mbscasestr4
+
--- /dev/null
+/* Test of case-insensitive searching in a string.
+ Copyright (C) 2007 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2007. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <string.h>
+
+#include <stdlib.h>
+
+#define ASSERT(expr) if (!(expr)) abort ();
+
+int
+main ()
+{
+ /* This test is executed in the C locale. */
+
+ {
+ const char input[] = "foo";
+ const char *result = mbscasestr (input, "");
+ ASSERT (result == input);
+ }
+
+ {
+ const char input[] = "foo";
+ const char *result = mbscasestr (input, "O");
+ ASSERT (result == input + 1);
+ }
+
+ {
+ const char input[] = "ABC ABCDAB ABCDABCDABDE";
+ const char *result = mbscasestr (input, "ABCDaBD");
+ ASSERT (result == input + 15);
+ }
+
+ {
+ const char input[] = "ABC ABCDAB ABCDABCDABDE";
+ const char *result = mbscasestr (input, "ABCDaBE");
+ ASSERT (result == NULL);
+ }
+
+ /* Check that a very long haystack is handled quickly if the needle is
+ short and occurs near the beginning. */
+ {
+ size_t repeat = 10000;
+ size_t m = 1000000;
+ char *needle =
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaAaaaaaaAAAAaaaaaaa"
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
+ char *haystack = (char *) malloc (m + 1);
+ if (haystack != NULL)
+ {
+ memset (haystack, 'A', m);
+ haystack[0] = 'B';
+ haystack[m] = '\0';
+
+ for (; repeat > 0; repeat--)
+ {
+ ASSERT (mbscasestr (haystack, needle) == haystack + 1);
+ }
+
+ free (haystack);
+ }
+ }
+
+ /* Check that a very long needle is discarded quickly if the haystack is
+ short. */
+ {
+ size_t repeat = 10000;
+ size_t m = 1000000;
+ char *haystack =
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ "ABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABAB";
+ char *needle = (char *) malloc (m + 1);
+ if (needle != NULL)
+ {
+ memset (needle, 'A', m);
+ needle[m] = '\0';
+
+ for (; repeat > 0; repeat--)
+ {
+ ASSERT (mbscasestr (haystack, needle) == NULL);
+ }
+
+ free (needle);
+ }
+ }
+
+ /* Check that the asymptotic worst-case complexity is not quadratic. */
+ {
+ size_t m = 1000000;
+ char *haystack = (char *) malloc (2 * m + 2);
+ char *needle = (char *) malloc (m + 2);
+ if (haystack != NULL && needle != NULL)
+ {
+ const char *result;
+
+ memset (haystack, 'A', 2 * m);
+ haystack[2 * m] = 'B';
+ haystack[2 * m + 1] = '\0';
+
+ memset (needle, 'a', m);
+ needle[m] = 'B';
+ needle[m + 1] = '\0';
+
+ result = mbscasestr (haystack, needle);
+ ASSERT (result == haystack + m);
+ }
+ if (needle != NULL)
+ free (needle);
+ if (haystack != NULL)
+ free (haystack);
+ }
+
+ return 0;
+}
--- /dev/null
+/* Test of searching in a string.
+ Copyright (C) 2007 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2007. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <string.h>
+
+#include <locale.h>
+#include <stdlib.h>
+
+#define ASSERT(expr) if (!(expr)) abort ();
+
+int
+main ()
+{
+ /* configure should already have checked that the locale is supported. */
+ if (setlocale (LC_ALL, "") == NULL)
+ return 1;
+
+ {
+ const char input[] = "f\303\266\303\266";
+ const char *result = mbscasestr (input, "");
+ ASSERT (result == input);
+ }
+
+ {
+ const char input[] = "f\303\266\303\266";
+ const char *result = mbscasestr (input, "\303\266");
+ ASSERT (result == input + 1);
+ }
+
+ {
+ const char input[] = "f\303\266\303\266";
+ const char *result = mbscasestr (input, "\266\303");
+ ASSERT (result == NULL);
+ }
+
+ {
+ const char input[] = "\303\204BC \303\204BCD\303\204B \303\204BCD\303\204BCD\303\204BDE"; /* "ÄBC ÄBCDÄB ÄBCDÄBCDÄBDE" */
+ const char *result = mbscasestr (input, "\303\244BCD\303\204BD"); /* "äBCDÄBD" */
+ ASSERT (result == input + 19);
+ }
+
+ {
+ const char input[] = "\303\204BC \303\204BCD\303\204B \303\204BCD\303\204BCD\303\204BDE"; /* "ÄBC ÄBCDÄB ÄBCDÄBCDÄBDE" */
+ const char *result = mbscasestr (input, "\303\204BCD\303\204BE"); /* "ÄBCDÄBE" */
+ ASSERT (result == NULL);
+ }
+
+ /* Check that a very long haystack is handled quickly if the needle is
+ short and occurs near the beginning. */
+ {
+ size_t repeat = 10000;
+ size_t m = 1000000;
+ char *needle =
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
+ char *haystack = (char *) malloc (m + 1);
+ if (haystack != NULL)
+ {
+ memset (haystack, 'a', m);
+ haystack[0] = '\303'; haystack[1] = '\204';
+ haystack[m] = '\0';
+
+ for (; repeat > 0; repeat--)
+ {
+ ASSERT (mbscasestr (haystack, needle) == haystack + 2);
+ }
+
+ free (haystack);
+ }
+ }
+
+ /* Check that a very long needle is discarded quickly if the haystack is
+ short. */
+ {
+ size_t repeat = 10000;
+ size_t m = 1000000;
+ char *haystack =
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ "A\303\207A\303\207A\303\207A\303\207A\303\207A\303\207A\303\207"
+ "A\303\207A\303\207A\303\207A\303\207A\303\207A\303\207A\303\207"
+ "A\303\207A\303\207A\303\207A\303\207A\303\207A\303\207A\303\207"
+ "A\303\207A\303\207A\303\207A\303\207A\303\207A\303\207A\303\207"
+ "A\303\207A\303\207A\303\207A\303\207A\303\207A\303\207";
+ char *needle = (char *) malloc (m + 1);
+ if (needle != NULL)
+ {
+ memset (needle, 'A', m);
+ needle[m] = '\0';
+
+ for (; repeat > 0; repeat--)
+ {
+ ASSERT (mbscasestr (haystack, needle) == NULL);
+ }
+
+ free (needle);
+ }
+ }
+
+ /* Check that the asymptotic worst-case complexity is not quadratic. */
+ {
+ size_t m = 1000000;
+ char *haystack = (char *) malloc (2 * m + 3);
+ char *needle = (char *) malloc (m + 3);
+ if (haystack != NULL && needle != NULL)
+ {
+ const char *result;
+
+ memset (haystack, 'A', 2 * m);
+ haystack[2 * m] = '\303'; haystack[2 * m + 1] = '\247';
+ haystack[2 * m + 2] = '\0';
+
+ memset (needle, 'a', m);
+ needle[m] = '\303'; needle[m + 1] = '\207';
+ needle[m + 2] = '\0';
+
+ result = mbscasestr (haystack, needle);
+ ASSERT (result == haystack + m);
+ }
+ if (needle != NULL)
+ free (needle);
+ if (haystack != NULL)
+ free (haystack);
+ }
+
+ return 0;
+}
--- /dev/null
+#!/bin/sh
+
+# Test whether a specific UTF-8 locale is installed.
+: ${LOCALE_FR_UTF8=fr_FR.UTF-8}
+if test $LOCALE_FR_UTF8 = none; then
+ if test -f /usr/bin/localedef; then
+ echo "Skipping test: no french Unicode locale is installed"
+ else
+ echo "Skipping test: no french Unicode locale is supported"
+ fi
+ exit 77
+fi
+
+LC_ALL=$LOCALE_FR_UTF8 \
+./test-mbscasestr2${EXEEXT}
--- /dev/null
+/* Test of case-insensitive searching in a string.
+ Copyright (C) 2007 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2007. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <string.h>
+
+#include <locale.h>
+#include <stdlib.h>
+
+#define ASSERT(expr) if (!(expr)) abort ();
+
+int
+main ()
+{
+ /* configure should already have checked that the locale is supported. */
+ if (setlocale (LC_ALL, "") == NULL)
+ return 1;
+
+ /* Tests with a character < 0x30. */
+ {
+ const char input[] = "\312\276\300\375 \312\276\300\375 \312\276\300\375"; /* "示例 示例 示例" */
+ const char *result = mbscasestr (input, " ");
+ ASSERT (result == input + 4);
+ }
+
+ {
+ const char input[] = "\312\276\300\375"; /* "示例" */
+ const char *result = mbscasestr (input, " ");
+ ASSERT (result == NULL);
+ }
+
+ /* Tests with a character >= 0x30. */
+ {
+ const char input[] = "\272\305123\324\313\320\320\241\243"; /* "号123运行。" */
+ const char *result = mbscasestr (input, "2");
+ ASSERT (result == input + 3);
+ }
+
+ /* The following tests show how mbscasestr() is different from
+ strcasestr(). */
+
+ {
+ const char input[] = "\313\320\320\320"; /* "诵行" */
+ const char *result = mbscasestr (input, "\320\320"); /* "行" */
+ ASSERT (result == input + 2);
+ }
+
+ {
+ const char input[] = "\203\062\332\066123\324\313\320\320\241\243"; /* "씋123运行。" */
+ const char *result = mbscasestr (input, "2");
+ ASSERT (result == input + 5);
+ }
+
+ {
+ const char input[] = "\312\276\300\375 \312\276\300\375 \312\276\300\375"; /* "示例 示例 示例" */
+ const char *result = mbscasestr (input, "\276\300"); /* "纠" */
+ ASSERT (result == NULL);
+ }
+
+ {
+ const char input[] = "\312\276\300\375 \312\276\300\375 \312\276\300\375"; /* "示例 示例 示例" */
+ const char *result = mbscasestr (input, "\375 "); /* invalid multibyte sequence */
+ ASSERT (result == NULL);
+ }
+
+ return 0;
+}
--- /dev/null
+#!/bin/sh
+
+# Test whether a specific GB18030 locale is installed.
+: ${LOCALE_ZH_CN=zh_CN.GB18030}
+if test $LOCALE_ZH_CN = none; then
+ if test -f /usr/bin/localedef; then
+ echo "Skipping test: no chinese GB18030 locale is installed"
+ else
+ echo "Skipping test: no chinese GB18030 locale is supported"
+ fi
+ exit 77
+fi
+
+LC_ALL=$LOCALE_ZH_CN \
+./test-mbscasestr3${EXEEXT}
--- /dev/null
+/* Test of case-insensitive searching in a string.
+ Copyright (C) 2007 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2007. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <string.h>
+
+#include <locale.h>
+#include <stdlib.h>
+
+#define ASSERT(expr) if (!(expr)) abort ();
+
+int
+main ()
+{
+ /* configure should already have checked that the locale is supported. */
+ if (setlocale (LC_ALL, "") == NULL)
+ return 1;
+
+ {
+ const char input[] = "GOLD NEEDLE BEATS TIN NEEDLE";
+ ASSERT (mbscasestr (input, "Needle") == input + 5);
+ }
+
+ /* The following tests show how mbscasestr() is different from
+ strcasestr(). */
+
+ {
+ const char input[] = "s\303\266zc\303\274k"; /* sözcük */
+ ASSERT (mbscasestr (input, "\303\266z") == input + 1);
+ ASSERT (mbscasestr (input, "\303\266c") == NULL);
+ }
+
+ /* This test shows how a string of larger size can be found in a string of
+ smaller size. */
+ {
+ const char input[] = "*Tbilisi imini*";
+ ASSERT (mbscasestr (input, "TB\304\260L\304\260S\304\260 \304\260m\304\260n\304\260") == input + 1); /* TBİLİSİ İmİnİ */
+ }
+
+ return 0;
+}
--- /dev/null
+#!/bin/sh
+
+# Test whether a specific Turkish locale is installed.
+: ${LOCALE_TR_UTF8=tr_TR.UTF-8}
+if test $LOCALE_TR_UTF8 = none; then
+ if test -f /usr/bin/localedef; then
+ echo "Skipping test: no turkish Unicode locale is installed"
+ else
+ echo "Skipping test: no turkish Unicode locale is supported"
+ fi
+ exit 77
+fi
+
+LC_ALL=$LOCALE_TR_UTF8 \
+./test-mbscasestr4${EXEEXT}