1 /* Case-insensitive string comparison function.
2 Copyright (C) 1998, 1999, 2005 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2005,
4 based on earlier glibc code.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
33 /* Like mbiter.h, except it doesn't look at the entire string. */
45 bool at_end; /* true if the end of the string has been reached */
46 bool in_shift; /* true if next byte may not be interpreted as ASCII */
47 mbstate_t state; /* if in_shift: current shift state */
48 bool next_done; /* true if mbi_avail has already filled the following */
49 struct mbchar cur; /* the current character:
50 const char *cur.ptr pointer to current character
51 The following are only valid after mbi_avail.
52 size_t cur.bytes number of bytes of current character
53 bool cur.wc_valid true if wc is a valid wide character
54 wchar_t cur.wc if wc_valid: the current character
59 mbiter_multi_next (struct mbiter_multi *iter)
65 /* Handle most ASCII characters quickly, without calling mbrtowc(). */
66 if (is_basic (*iter->cur.ptr))
68 /* These characters are part of the basic character set. ISO C 99
69 guarantees that their wide character code is identical to their
72 iter->cur.wc = *iter->cur.ptr;
73 iter->cur.wc_valid = true;
77 assert (mbsinit (&iter->state));
78 iter->in_shift = true;
80 iter->cur.bytes = mbrtowc (&iter->cur.wc, iter->cur.ptr,
81 strnlen1 (iter->cur.ptr, MB_CUR_MAX),
83 if (iter->cur.bytes == (size_t) -1)
85 /* An invalid multibyte sequence was encountered. */
87 iter->cur.wc_valid = false;
88 /* Whether to set iter->in_shift = false and reset iter->state
89 or not is not very important; the string is bogus anyway. */
91 else if (iter->cur.bytes == (size_t) -2)
93 /* An incomplete multibyte character at the end. */
94 iter->cur.bytes = strlen (iter->cur.ptr) + 1;
95 iter->cur.wc_valid = false;
96 /* Whether to set iter->in_shift = false and reset iter->state
97 or not is not important; the string end is reached anyway. */
101 if (iter->cur.bytes == 0)
103 /* A null wide character was encountered. */
105 assert (*iter->cur.ptr == '\0');
106 assert (iter->cur.wc == 0);
108 iter->cur.wc_valid = true;
110 /* When in the initial state, we can go back treating ASCII
111 characters more quickly. */
112 if (mbsinit (&iter->state))
113 iter->in_shift = false;
116 iter->next_done = true;
120 mbiter_multi_reloc (struct mbiter_multi *iter, ptrdiff_t ptrdiff)
122 iter->cur.ptr += ptrdiff;
125 /* Iteration macros. */
126 typedef struct mbiter_multi mbi_iterator_t;
127 #define mbi_init(iter, startptr) \
128 ((iter).cur.ptr = (startptr), (iter).at_end = false, \
129 (iter).in_shift = false, memset (&(iter).state, '\0', sizeof (mbstate_t)), \
130 (iter).next_done = false)
131 #define mbi_avail(iter) \
132 (!(iter).at_end && (mbiter_multi_next (&(iter)), true))
133 #define mbi_advance(iter) \
134 ((mb_isnul ((iter).cur) ? ((iter).at_end = true) : 0), \
135 (iter).cur.ptr += (iter).cur.bytes, (iter).next_done = false)
137 /* Access to the current character. */
138 #define mbi_cur(iter) (iter).cur
139 #define mbi_cur_ptr(iter) (iter).cur.ptr
143 #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
145 /* Compare strings S1 and S2, ignoring case, returning less than, equal to or
146 greater than zero if S1 is lexicographically less than, equal to or greater
148 Note: This function may, in multibyte locales, return 0 for strings of
149 different lengths! */
151 strcasecmp (const char *s1, const char *s2)
156 /* Be careful not to look at the entire extent of s1 or s2 until needed.
157 This is useful because when two strings differ, the difference is
158 most often already in the very few first characters. */
162 mbi_iterator_t iter1;
163 mbi_iterator_t iter2;
165 mbi_init (iter1, s1);
166 mbi_init (iter2, s2);
168 while (mbi_avail (iter1) && mbi_avail (iter2))
170 /* Sort invalid characters after all valid ones. */
171 if (!mbi_cur (iter1).wc_valid)
173 if (!mbi_cur (iter2).wc_valid)
175 /* Compare two invalid characters. */
178 if (mbi_cur (iter1).bytes > mbi_cur (iter2).bytes)
180 if (mbi_cur (iter1).bytes < mbi_cur (iter2).bytes)
182 cmp = memcmp (mbi_cur_ptr (iter1), mbi_cur_ptr (iter2),
183 mbi_cur (iter1).bytes);
188 /* mbi_cur (iter1) invalid, mbi_cur (iter2) valid. */
193 if (!mbi_cur (iter2).wc_valid)
194 /* mbi_cur (iter1) valid, mbi_cur (iter2) invalid. */
198 /* Compare two valid characters. */
199 wchar_t c1 = towlower (mbi_cur (iter1).wc);
200 wchar_t c2 = towlower (mbi_cur (iter2).wc);
211 if (mbi_avail (iter1))
212 /* s2 terminated before s1. */
214 if (mbi_avail (iter2))
215 /* s1 terminated before s2. */
222 const unsigned char *p1 = (const unsigned char *) s1;
223 const unsigned char *p2 = (const unsigned char *) s2;
224 unsigned char c1, c2;