-/* strcasecmp.c -- case insensitive string comparator
- Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+/* Case-insensitive string comparison function.
+ Copyright (C) 1998, 1999, 2005 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2005,
+ based on earlier glibc code.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
-#if HAVE_CONFIG_H
+#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
-#ifdef LENGTH_LIMIT
-# define STRXCASECMP_FUNCTION strncasecmp
-# define STRXCASECMP_DECLARE_N , size_t n
-# define LENGTH_LIMIT_EXPR(Expr) Expr
-#else
-# define STRXCASECMP_FUNCTION strcasecmp
-# define STRXCASECMP_DECLARE_N /* empty */
-# define LENGTH_LIMIT_EXPR(Expr) 0
-#endif
+/* Specification. */
+#include "strcase.h"
-#include <stddef.h>
#include <ctype.h>
-#define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
+#if HAVE_MBRTOWC
-/* Compare {{no more than N characters of }}strings S1 and S2,
- ignoring case, returning less than, equal to or
- greater than zero if S1 is lexicographically less
- than, equal to or greater than S2. */
+#include "strnlen1.h"
-int
-STRXCASECMP_FUNCTION (const char *s1, const char *s2 STRXCASECMP_DECLARE_N)
+/* Like mbiter.h, except it doesn't look at the entire string. */
+
+#include "mbchar.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <wchar.h>
+#include <wctype.h>
+
+struct mbiter_multi
+{
+ bool at_end; /* true if the end of the string has been reached */
+ bool in_shift; /* true if next byte may not be interpreted as ASCII */
+ mbstate_t state; /* if in_shift: current shift state */
+ bool next_done; /* true if mbi_avail has already filled the following */
+ struct mbchar cur; /* the current character:
+ const char *cur.ptr pointer to current character
+ The following are only valid after mbi_avail.
+ size_t cur.bytes number of bytes of current character
+ bool cur.wc_valid true if wc is a valid wide character
+ wchar_t cur.wc if wc_valid: the current character
+ */
+};
+
+static inline void
+mbiter_multi_next (struct mbiter_multi *iter)
{
- register const unsigned char *p1 = (const unsigned char *) s1;
- register const unsigned char *p2 = (const unsigned char *) s2;
- unsigned char c1, c2;
+ if (iter->next_done)
+ return;
+ if (iter->in_shift)
+ goto with_shift;
+ /* Handle most ASCII characters quickly, without calling mbrtowc(). */
+ if (is_basic (*iter->cur.ptr))
+ {
+ /* These characters are part of the basic character set. ISO C 99
+ guarantees that their wide character code is identical to their
+ char code. */
+ iter->cur.bytes = 1;
+ iter->cur.wc = *iter->cur.ptr;
+ iter->cur.wc_valid = true;
+ }
+ else
+ {
+ assert (mbsinit (&iter->state));
+ iter->in_shift = true;
+ with_shift:
+ iter->cur.bytes = mbrtowc (&iter->cur.wc, iter->cur.ptr,
+ strnlen1 (iter->cur.ptr, MB_CUR_MAX),
+ &iter->state);
+ if (iter->cur.bytes == (size_t) -1)
+ {
+ /* An invalid multibyte sequence was encountered. */
+ iter->cur.bytes = 1;
+ iter->cur.wc_valid = false;
+ /* Whether to set iter->in_shift = false and reset iter->state
+ or not is not very important; the string is bogus anyway. */
+ }
+ else if (iter->cur.bytes == (size_t) -2)
+ {
+ /* An incomplete multibyte character at the end. */
+ iter->cur.bytes = strlen (iter->cur.ptr) + 1;
+ iter->cur.wc_valid = false;
+ /* Whether to set iter->in_shift = false and reset iter->state
+ or not is not important; the string end is reached anyway. */
+ }
+ else
+ {
+ if (iter->cur.bytes == 0)
+ {
+ /* A null wide character was encountered. */
+ iter->cur.bytes = 1;
+ assert (*iter->cur.ptr == '\0');
+ assert (iter->cur.wc == 0);
+ }
+ iter->cur.wc_valid = true;
+
+ /* When in the initial state, we can go back treating ASCII
+ characters more quickly. */
+ if (mbsinit (&iter->state))
+ iter->in_shift = false;
+ }
+ }
+ iter->next_done = true;
+}
+
+static inline void
+mbiter_multi_reloc (struct mbiter_multi *iter, ptrdiff_t ptrdiff)
+{
+ iter->cur.ptr += ptrdiff;
+}
- if (p1 == p2 || LENGTH_LIMIT_EXPR (n == 0))
+/* Iteration macros. */
+typedef struct mbiter_multi mbi_iterator_t;
+#define mbi_init(iter, startptr) \
+ ((iter).cur.ptr = (startptr), (iter).at_end = false, \
+ (iter).in_shift = false, memset (&(iter).state, '\0', sizeof (mbstate_t)), \
+ (iter).next_done = false)
+#define mbi_avail(iter) \
+ (!(iter).at_end && (mbiter_multi_next (&(iter)), true))
+#define mbi_advance(iter) \
+ ((mb_isnul ((iter).cur) ? ((iter).at_end = true) : 0), \
+ (iter).cur.ptr += (iter).cur.bytes, (iter).next_done = false)
+
+/* Access to the current character. */
+#define mbi_cur(iter) (iter).cur
+#define mbi_cur_ptr(iter) (iter).cur.ptr
+
+#endif
+
+#define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
+
+/* Compare strings S1 and S2, ignoring case, returning less than, equal to or
+ greater than zero if S1 is lexicographically less than, equal to or greater
+ than S2.
+ Note: This function may, in multibyte locales, return 0 for strings of
+ different lengths! */
+int
+strcasecmp (const char *s1, const char *s2)
+{
+ if (s1 == s2)
return 0;
- do
+ /* Be careful not to look at the entire extent of s1 or s2 until needed.
+ This is useful because when two strings differ, the difference is
+ most often already in the very few first characters. */
+#if HAVE_MBRTOWC
+ if (MB_CUR_MAX > 1)
{
- c1 = TOLOWER (*p1);
- c2 = TOLOWER (*p2);
+ mbi_iterator_t iter1;
+ mbi_iterator_t iter2;
- if (LENGTH_LIMIT_EXPR (--n == 0) || c1 == '\0')
- break;
+ mbi_init (iter1, s1);
+ mbi_init (iter2, s2);
- ++p1;
- ++p2;
+ while (mbi_avail (iter1) && mbi_avail (iter2))
+ {
+ /* Sort invalid characters after all valid ones. */
+ if (!mbi_cur (iter1).wc_valid)
+ {
+ if (!mbi_cur (iter2).wc_valid)
+ {
+ /* Compare two invalid characters. */
+ int cmp;
+
+ if (mbi_cur (iter1).bytes > mbi_cur (iter2).bytes)
+ return 1;
+ if (mbi_cur (iter1).bytes < mbi_cur (iter2).bytes)
+ return -1;
+ cmp = memcmp (mbi_cur_ptr (iter1), mbi_cur_ptr (iter2),
+ mbi_cur (iter1).bytes);
+ if (cmp != 0)
+ return cmp;
+ }
+ else
+ /* mbi_cur (iter1) invalid, mbi_cur (iter2) valid. */
+ return 1;
+ }
+ else
+ {
+ if (!mbi_cur (iter2).wc_valid)
+ /* mbi_cur (iter1) valid, mbi_cur (iter2) invalid. */
+ return -1;
+ else
+ {
+ /* Compare two valid characters. */
+ wchar_t c1 = towlower (mbi_cur (iter1).wc);
+ wchar_t c2 = towlower (mbi_cur (iter2).wc);
+
+ if (c1 > c2)
+ return 1;
+ if (c1 < c2)
+ return -1;
+ }
+ }
+ mbi_advance (iter1);
+ mbi_advance (iter2);
+ }
+ if (mbi_avail (iter1))
+ /* s2 terminated before s1. */
+ return 1;
+ if (mbi_avail (iter2))
+ /* s1 terminated before s2. */
+ return -1;
+ return 0;
}
- while (c1 == c2);
+ else
+#endif
+ {
+ const unsigned char *p1 = (const unsigned char *) s1;
+ const unsigned char *p2 = (const unsigned char *) s2;
+ unsigned char c1, c2;
- return c1 - c2;
+ do
+ {
+ c1 = TOLOWER (*p1);
+ c2 = TOLOWER (*p2);
+
+ if (c1 == '\0')
+ break;
+
+ ++p1;
+ ++p2;
+ }
+ while (c1 == c2);
+
+ return c1 - c2;
+ }
}
-#define LENGTH_LIMIT
-#include "strcasecmp.c"
+/* strncasecmp.c -- case insensitive string comparator
+ Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+/* Specification. */
+#include "strcase.h"
+
+#include <ctype.h>
+
+#define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
+
+/* Compare no more than N bytes of strings S1 and S2,
+ ignoring case, returning less than, equal to or
+ greater than zero if S1 is lexicographically less
+ than, equal to or greater than S2. */
+
+int
+strncasecmp (const char *s1, const char *s2, size_t n)
+{
+ register const unsigned char *p1 = (const unsigned char *) s1;
+ register const unsigned char *p2 = (const unsigned char *) s2;
+ unsigned char c1, c2;
+
+ if (p1 == p2 || n == 0)
+ return 0;
+
+ do
+ {
+ c1 = TOLOWER (*p1);
+ c2 = TOLOWER (*p2);
+
+ if (--n == 0 || c1 == '\0')
+ break;
+
+ ++p1;
+ ++p2;
+ }
+ while (c1 == c2);
+
+ return c1 - c2;
+}