+2007-02-04 Bruno Haible <bruno@clisp.org>
+
+ New module mbscasestr, reduced goal of strcasestr.
+ * modules/mbscasestr: New file.
+ * lib/mbscasestr.c: New file, copied from lib/strcasestr.c.
+ (mbscasestr): Renamed from strcasestr.
+ * lib/strcasestr.c: Don't include mbuiter.h.
+ (strcasestr): Remove support for multibyte locales.
+ * lib/string_.h (strcasestr): Don`t rename. Declare only if missing.
+ Change the conditional link warning.
+ (mbscasestr): New declaration.
+ * m4/mbscasestr.m4: New file.
+ * m4/strcasestr.m4 (gl_FUNC_STRCASESTR): Enable the replacement only if
+ the system does not have strcasestr. Set HAVE_STRCASESTR instead of
+ REPLACE_STRCASESTR.
+ * m4/string_h.m4 (gl_HEADER_STRING_H_DEFAULTS): Initialize
+ HAVE_STRCASESTR instead of REPLACE_STRCASESTR.
+ (gl_STRING_MODULE_INDICATOR_DEFAULTS): Initialize GNULIB_MBSCASESTR.
+ * modules/string (string.h): Also substitute GNULIB_MBSCASESTR.
+ Substitute HAVE_STRCASESTR instead of REPLACE_STRCASESTR.
+ * MODULES.html.sh (Internationalization functions): Add mbscasestr.
+
2007-02-04 Bruno Haible <bruno@clisp.org>
Simplify handling of strncasecmp.
func_module mbsrchr
func_module mbsstr
func_module mbscasecmp
+ func_module mbscasestr
func_module mbswidth
func_module memcasecmp
func_module memcoll
#define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
-/* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive
- comparison.
+/* Find the first occurrence of the character string NEEDLE in the character
+ string HAYSTACK, using case-insensitive comparison.
Note: This function may, in multibyte locales, return success even if
strlen (haystack) < strlen (needle) ! */
char *
-strcasestr (const char *haystack, const char *needle)
+mbscasestr (const char *haystack, const char *needle)
{
/* Be careful not to look at the entire extent of haystack or needle
until needed. This is useful because of these two cases:
#include <ctype.h>
#include <stddef.h> /* for NULL, in case a nonstandard string.h lacks it */
-#if HAVE_MBRTOWC
-# include "mbuiter.h"
-#endif
-
#define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
/* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive
char *
strcasestr (const char *haystack, const char *needle)
{
- /* Be careful not to look at the entire extent of haystack or needle
- until needed. This is useful because of these two cases:
- - haystack may be very long, and a match of needle found early,
- - needle may be very long, and not even a short initial segment of
- needle may be found in haystack. */
-#if HAVE_MBRTOWC
- if (MB_CUR_MAX > 1)
+ if (*needle != '\0')
{
- mbui_iterator_t iter_needle;
+ /* Speed up the following searches of needle by caching its first
+ character. */
+ unsigned char b = TOLOWER ((unsigned char) *needle);
- mbui_init (iter_needle, needle);
- if (mbui_avail (iter_needle))
+ needle++;
+ for (;; haystack++)
{
- mbchar_t b;
- mbui_iterator_t iter_haystack;
-
- mb_copy (&b, &mbui_cur (iter_needle));
- if (b.wc_valid)
- b.wc = towlower (b.wc);
-
- mbui_init (iter_haystack, haystack);
- for (;; mbui_advance (iter_haystack))
+ if (*haystack == '\0')
+ /* No match. */
+ return NULL;
+ if (TOLOWER ((unsigned char) *haystack) == b)
+ /* The first character matches. */
{
- mbchar_t c;
+ const char *rhaystack = haystack + 1;
+ const char *rneedle = needle;
- if (!mbui_avail (iter_haystack))
- /* No match. */
- return NULL;
-
- mb_copy (&c, &mbui_cur (iter_haystack));
- if (c.wc_valid)
- c.wc = towlower (c.wc);
- if (mb_equal (c, b))
- /* The first character matches. */
+ for (;; rhaystack++, rneedle++)
{
- mbui_iterator_t rhaystack;
- mbui_iterator_t rneedle;
-
- memcpy (&rhaystack, &iter_haystack, sizeof (mbui_iterator_t));
- mbui_advance (rhaystack);
-
- mbui_init (rneedle, needle);
- if (!mbui_avail (rneedle))
- abort ();
- mbui_advance (rneedle);
-
- for (;; mbui_advance (rhaystack), mbui_advance (rneedle))
- {
- if (!mbui_avail (rneedle))
- /* Found a match. */
- return (char *) mbui_cur_ptr (iter_haystack);
- if (!mbui_avail (rhaystack))
- /* No match. */
- return NULL;
- if (!mb_caseequal (mbui_cur (rhaystack),
- mbui_cur (rneedle)))
- /* Nothing in this round. */
- break;
- }
+ if (*rneedle == '\0')
+ /* Found a match. */
+ return (char *) haystack;
+ if (*rhaystack == '\0')
+ /* No match. */
+ return NULL;
+ if (TOLOWER ((unsigned char) *rhaystack)
+ != TOLOWER ((unsigned char) *rneedle))
+ /* Nothing in this round. */
+ break;
}
}
}
- else
- return (char *) haystack;
}
else
-#endif
- {
- if (*needle != '\0')
- {
- /* Speed up the following searches of needle by caching its first
- character. */
- unsigned char b = TOLOWER ((unsigned char) *needle);
-
- needle++;
- for (;; haystack++)
- {
- if (*haystack == '\0')
- /* No match. */
- return NULL;
- if (TOLOWER ((unsigned char) *haystack) == b)
- /* The first character matches. */
- {
- const char *rhaystack = haystack + 1;
- const char *rneedle = needle;
-
- for (;; rhaystack++, rneedle++)
- {
- if (*rneedle == '\0')
- /* Found a match. */
- return (char *) haystack;
- if (*rhaystack == '\0')
- /* No match. */
- return NULL;
- if (TOLOWER ((unsigned char) *rhaystack)
- != TOLOWER ((unsigned char) *rneedle))
- /* Nothing in this round. */
- break;
- }
- }
- }
- }
- else
- return (char *) haystack;
- }
+ return (char *) haystack;
}
#endif
/* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive
- comparison.
- Note: This function may, in multibyte locales, return success even if
- strlen (haystack) < strlen (needle) ! */
-#if @GNULIB_STRCASESTR@
-# if @REPLACE_STRCASESTR@
-# undef strcasestr
-# define strcasestr rpl_strcasestr
+ comparison. */
+#if ! @HAVE_STRCASESTR@
extern char *strcasestr (const char *haystack, const char *needle);
-# endif
-#elif defined GNULIB_POSIXCHECK
+#endif
+#if defined GNULIB_POSIXCHECK
+/* strcasestr() does not work with multibyte strings:
+ It is a glibc extension, and glibc implements it only for unibyte
+ locales. */
# undef strcasestr
# define strcasestr(a,b) \
- (GL_LINK_WARNING ("strcasestr is often incorrectly implemented for multibyte locales - use gnulib module 'strcasestr' for correct and portable internationalization"), \
+ (GL_LINK_WARNING ("strcasestr does work correctly on character strings in multibyte locales - use mbscasestr if you care about internationalization, or use c-strcasestr if you want a locale independent function"), \
strcasestr (a, b))
#endif
extern int mbscasecmp (const char *s1, const char *s2);
#endif
+#if @GNULIB_MBSCASESTR@
+/* Find the first occurrence of the character string NEEDLE in the character
+ string HAYSTACK, using case-insensitive comparison.
+ Note: This function may, in multibyte locales, return success even if
+ strlen (haystack) < strlen (needle) !
+ Unlike strcasestr(), this function works correctly in multibyte locales. */
+extern char * mbscasestr (const char *haystack, const char *needle);
+#endif
+
#ifdef __cplusplus
}
-# strcasestr.m4 serial 4
+# strcasestr.m4 serial 5
dnl Copyright (C) 2005, 2007 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
AC_DEFUN([gl_FUNC_STRCASESTR],
[
AC_REQUIRE([gl_HEADER_STRING_H_DEFAULTS])
- dnl No known system has a strcasestr() function that works correctly in
- dnl multibyte locales. Therefore we use our version always.
- AC_LIBOBJ(strcasestr)
- REPLACE_STRCASESTR=1
- gl_PREREQ_STRCASESTR
+ AC_REPLACE_FUNCS(strcasestr)
+ if test $ac_cv_func_strcasestr = no; then
+ HAVE_STRCASESTR=0
+ gl_PREREQ_STRCASESTR
+ fi
])
# Prerequisites of lib/strcasestr.c.
HAVE_DECL_STRNLEN=1; AC_SUBST([HAVE_DECL_STRNLEN])
HAVE_STRPBRK=1; AC_SUBST([HAVE_STRPBRK])
HAVE_STRSEP=1; AC_SUBST([HAVE_STRSEP])
+ HAVE_STRCASESTR=1; AC_SUBST([HAVE_STRCASESTR])
HAVE_DECL_STRTOK_R=1; AC_SUBST([HAVE_DECL_STRTOK_R])
- REPLACE_STRCASESTR=0; AC_SUBST([REPLACE_STRCASESTR])
])
AC_DEFUN([gl_STRING_MODULE_INDICATOR],
GNULIB_MBSRCHR=0; AC_SUBST([GNULIB_MBSRCHR])
GNULIB_MBSSTR=0; AC_SUBST([GNULIB_MBSSTR])
GNULIB_MBSCASECMP=0; AC_SUBST([GNULIB_MBSCASECMP])
+ GNULIB_MBSCASESTR=0; AC_SUBST([GNULIB_MBSCASESTR])
])
-e 's|@''GNULIB_MBSRCHR''@|$(GNULIB_MBSRCHR)|g' \
-e 's|@''GNULIB_MBSSTR''@|$(GNULIB_MBSSTR)|g' \
-e 's|@''GNULIB_MBSCASECMP''@|$(GNULIB_MBSCASECMP)|g' \
+ -e 's|@''GNULIB_MBSCASESTR''@|$(GNULIB_MBSCASESTR)|g' \
-e 's|@''GNULIB_MEMMEM''@|$(GNULIB_MEMMEM)|g' \
-e 's|@''GNULIB_MEMPCPY''@|$(GNULIB_MEMPCPY)|g' \
-e 's|@''GNULIB_MEMRCHR''@|$(GNULIB_MEMRCHR)|g' \
-e 's|@''HAVE_DECL_STRNLEN''@|$(HAVE_DECL_STRNLEN)|g' \
-e 's|@''HAVE_STRPBRK''@|$(HAVE_STRPBRK)|g' \
-e 's|@''HAVE_STRSEP''@|$(HAVE_STRSEP)|g' \
+ -e 's|@''HAVE_STRCASESTR''@|$(HAVE_STRCASESTR)|g' \
-e 's|@''HAVE_DECL_STRTOK_R''@|$(HAVE_DECL_STRTOK_R)|g' \
- -e 's|@''REPLACE_STRCASESTR''@|$(REPLACE_STRCASESTR)|g' \
< $(srcdir)/string_.h; \
} > $@-t
mv $@-t $@