strings.
+2007-02-04 Bruno Haible <bruno@clisp.org>
+
+ New module mbscasecmp, reduced goal of strcasecmp.
+ * modules/mbscasecmp: New file.
+ * lib/mbscasecmp.c: New file, copied from lib/strcasecmp.c.
+ (mbscasecmp): Renamed from strcasecmp.
+ * lib/strcasecmp.c: Don't include mbuiter.h.
+ (strcasecmp): Remove support for multibyte locales.
+ * lib/string_.h (strcasecmp): Don`t rename. Declare only if missing.
+ Change the conditional link warning.
+ (mbscasecmp): New declaration.
+ * m4/mbscasecmp.m4: New file.
+ * m4/string_h.m4 (gl_STRING_MODULE_INDICATOR_DEFAULTS): Initialize
+ GNULIB_MBSCASECMP.
+ * modules/string (string.h): Also substitute GNULIB_MBSCASECMP.
+ * MODULES.html.sh (Internationalization functions): Add mbscasecmp.
+
2007-02-04 Bruno Haible <bruno@clisp.org>
New module mbsstr. Remove module strstr.
func_module mbschr
func_module mbsrchr
func_module mbsstr
+ func_module mbscasecmp
func_module mbswidth
func_module memcasecmp
func_module memcoll
#define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
-/* Compare strings S1 and S2, ignoring case, returning less than, equal to or
- greater than zero if S1 is lexicographically less than, equal to or greater
- than S2.
+/* Compare the character strings S1 and S2, ignoring case, returning less than,
+ equal to or greater than zero if S1 is lexicographically less than, equal to
+ or greater than S2.
Note: This function may, in multibyte locales, return 0 for strings of
different lengths! */
int
-strcasecmp (const char *s1, const char *s2)
+mbscasecmp (const char *s1, const char *s2)
{
if (s1 == s2)
return 0;
/* Case-insensitive string comparison function.
Copyright (C) 1998-1999, 2005-2007 Free Software Foundation, Inc.
- Written by Bruno Haible <bruno@clisp.org>, 2005,
- based on earlier glibc code.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <ctype.h>
#include <limits.h>
-#if HAVE_MBRTOWC
-# include "mbuiter.h"
-#endif
-
#define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
/* Compare strings S1 and S2, ignoring case, returning less than, equal to or
greater than zero if S1 is lexicographically less than, equal to or greater
than S2.
- Note: This function may, in multibyte locales, return 0 for strings of
- different lengths! */
+ Note: This function does not work with multibyte strings! */
+
int
strcasecmp (const char *s1, const char *s2)
{
- if (s1 == s2)
+ const unsigned char *p1 = (const unsigned char *) s1;
+ const unsigned char *p2 = (const unsigned char *) s2;
+ unsigned char c1, c2;
+
+ if (p1 == p2)
return 0;
- /* Be careful not to look at the entire extent of s1 or s2 until needed.
- This is useful because when two strings differ, the difference is
- most often already in the very few first characters. */
-#if HAVE_MBRTOWC
- if (MB_CUR_MAX > 1)
+ do
{
- mbui_iterator_t iter1;
- mbui_iterator_t iter2;
+ c1 = TOLOWER (*p1);
+ c2 = TOLOWER (*p2);
- mbui_init (iter1, s1);
- mbui_init (iter2, s2);
+ if (c1 == '\0')
+ break;
- while (mbui_avail (iter1) && mbui_avail (iter2))
- {
- int cmp = mb_casecmp (mbui_cur (iter1), mbui_cur (iter2));
-
- if (cmp != 0)
- return cmp;
-
- mbui_advance (iter1);
- mbui_advance (iter2);
- }
- if (mbui_avail (iter1))
- /* s2 terminated before s1. */
- return 1;
- if (mbui_avail (iter2))
- /* s1 terminated before s2. */
- return -1;
- return 0;
+ ++p1;
+ ++p2;
}
- else
-#endif
- {
- const unsigned char *p1 = (const unsigned char *) s1;
- const unsigned char *p2 = (const unsigned char *) s2;
- unsigned char c1, c2;
-
- do
- {
- c1 = TOLOWER (*p1);
- c2 = TOLOWER (*p2);
-
- if (c1 == '\0')
- break;
+ while (c1 == c2);
- ++p1;
- ++p2;
- }
- while (c1 == c2);
-
- if (UCHAR_MAX <= INT_MAX)
- return c1 - c2;
- else
- /* On machines where 'char' and 'int' are types of the same size, the
- difference of two 'unsigned char' values - including the sign bit -
- doesn't fit in an 'int'. */
- return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
- }
+ if (UCHAR_MAX <= INT_MAX)
+ return c1 - c2;
+ else
+ /* On machines where 'char' and 'int' are types of the same size, the
+ difference of two 'unsigned char' values - including the sign bit -
+ doesn't fit in an 'int'. */
+ return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
}
/* Compare strings S1 and S2, ignoring case, returning less than, equal to or
greater than zero if S1 is lexicographically less than, equal to or greater
than S2.
- Note: This function may, in multibyte locales, return 0 for strings of
- different lengths!
- No known system has a strcasecmp() function that works correctly in
- multibyte locales. Therefore use our version always, if the
- strcase module is available. */
-#if @GNULIB_STRCASE@
-# if @REPLACE_STRCASECMP@
-# define strcasecmp rpl_strcasecmp
-extern int strcasecmp (char const *__s1, char const *__s2);
-# endif
-#elif defined GNULIB_POSIXCHECK
+ Note: This function does not work in multibyte locales. */
+#if ! @HAVE_STRCASECMP@
+extern int strcasecmp (char const *s1, char const *s2);
+#endif
+#if defined GNULIB_POSIXCHECK
+/* strcasecmp() does not work with multibyte strings:
+ POSIX says that it operates on "strings", and "string" in POSIX is defined
+ as a sequence of bytes, not of characters. */
# undef strcasecmp
# define strcasecmp(a,b) \
- (GL_LINK_WARNING ("strcasecmp is often incorrectly implemented for multibyte locales - use gnulib module 'strcase' for correct and portable internationalization"), \
+ (GL_LINK_WARNING ("strcasecmp cannot work correctly on character strings in multibyte locales - use mbscasecmp if you care about internationalization, or use c_strcasecmp (from gnulib module c-strcase) if you want a locale independent function"), \
strcasecmp (a, b))
#endif
extern char * mbsstr (const char *haystack, const char *needle);
#endif
+#if @GNULIB_MBSCASECMP@
+/* Compare the character strings S1 and S2, ignoring case, returning less than,
+ equal to or greater than zero if S1 is lexicographically less than, equal to
+ or greater than S2.
+ Note: This function may, in multibyte locales, return 0 for strings of
+ different lengths!
+ Unlike strcasecmp(), this function works correctly in multibyte locales. */
+extern int mbscasecmp (const char *s1, const char *s2);
+#endif
+
#ifdef __cplusplus
}
GNULIB_MBSCHR=0; AC_SUBST([GNULIB_MBSCHR])
GNULIB_MBSRCHR=0; AC_SUBST([GNULIB_MBSRCHR])
GNULIB_MBSSTR=0; AC_SUBST([GNULIB_MBSSTR])
+ GNULIB_MBSCASECMP=0; AC_SUBST([GNULIB_MBSCASECMP])
])
-e 's|@''GNULIB_MBSCHR''@|$(GNULIB_MBSCHR)|g' \
-e 's|@''GNULIB_MBSRCHR''@|$(GNULIB_MBSRCHR)|g' \
-e 's|@''GNULIB_MBSSTR''@|$(GNULIB_MBSSTR)|g' \
+ -e 's|@''GNULIB_MBSCASECMP''@|$(GNULIB_MBSCASECMP)|g' \
-e 's|@''GNULIB_MEMMEM''@|$(GNULIB_MEMMEM)|g' \
-e 's|@''GNULIB_MEMPCPY''@|$(GNULIB_MEMPCPY)|g' \
-e 's|@''GNULIB_MEMRCHR''@|$(GNULIB_MEMRCHR)|g' \