From def1af84d3544757a7024228df9ca52f22a58e9c Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Wed, 11 Mar 2009 00:25:15 +0100 Subject: [PATCH] New module 'mbmemcasecoll'. --- ChangeLog | 5 ++ lib/mbmemcasecoll.c | 186 ++++++++++++++++++++++++++++++++++++++++++ lib/mbmemcasecoll.h | 58 +++++++++++++ modules/mbmemcasecoll | 30 +++++++ 4 files changed, 279 insertions(+) create mode 100644 lib/mbmemcasecoll.c create mode 100644 lib/mbmemcasecoll.h create mode 100644 modules/mbmemcasecoll diff --git a/ChangeLog b/ChangeLog index f27f63c754..2d12463147 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,10 @@ 2009-03-10 Bruno Haible + New module 'mbmemcasecoll'. + * lib/mbmemcasecoll.h: New file. + * lib/mbmemcasecoll.c: New file. + * modules/mbmemcasecoll: New file. + * tests/test-mbmemcasecmp.h: New file, extracted from tests/test-mbmemcasecmp.c. * tests/test-mbmemcasecmp.c: Include test-mbmemcasecmp.h. diff --git a/lib/mbmemcasecoll.c b/lib/mbmemcasecoll.c new file mode 100644 index 0000000000..ed0a84f3ff --- /dev/null +++ b/lib/mbmemcasecoll.c @@ -0,0 +1,186 @@ +/* Locale-specific case-ignoring memory comparison. + Copyright (C) 2001, 2009 Free Software Foundation, Inc. + Written by Bruno Haible , 2001. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "mbmemcasecoll.h" + +#include +#include +#include + +/* Get tolower(). */ +#include + +/* Get mbstate_t, mbrtowc(), wcrtomb(). */ +#include + +/* Get towlower(). */ +#include + +#include "malloca.h" +#include "memcmp2.h" +#include "memcoll.h" + +#define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch)) + +/* Apply towlower() to the multibyte character sequence in INBUF, storing the + result as a multibyte character sequence in OUTBUF. */ +static size_t +apply_towlower (const char *inbuf, size_t inbufsize, + char *outbuf, size_t outbufsize) +{ + char *outbuf_orig = outbuf; + size_t remaining; + + remaining = inbufsize; + while (remaining > 0) + { + wchar_t wc1; + size_t n1; + mbstate_t state; + + memset (&state, '\0', sizeof (mbstate_t)); + n1 = mbrtowc (&wc1, inbuf, remaining, &state); + if (n1 == (size_t)(-2)) + break; + if (n1 != (size_t)(-1)) + { + wint_t wc2 = towlower (wc1); + + if (wc2 != wc1) + { + size_t n2; + + memset (&state, '\0', sizeof (mbstate_t)); + n2 = wcrtomb (outbuf, wc2, &state); + if (n2 != (size_t)(-1)) + { + /* Store the translated multibyte character. */ + inbuf += n1; + remaining -= n1; + outbuf += n2; + continue; + } + } + + /* Nothing to translate. */ + memcpy (outbuf, inbuf, n1); + inbuf += n1; + remaining -= n1; + outbuf += n1; + continue; + } + + /* Invalid multibyte character on input. + Copy one byte without modification. */ + *outbuf++ = *inbuf++; + remaining -= 1; + } + /* Incomplete multibyte sequence on input. + Pass it through unmodified. */ + while (remaining > 0) + { + *outbuf++ = *inbuf++; + remaining -= 1; + } + + /* Verify the output buffer was large enough. */ + if (outbuf - outbuf_orig > outbufsize) + abort (); + + /* Return the number of written output bytes. */ + return outbuf - outbuf_orig; +} + +/* Apply tolower() to the unibyte character sequence in INBUF, storing the + result as a unibyte character sequence in OUTBUF. */ +static void +apply_tolower (const char *inbuf, char *outbuf, size_t bufsize) +{ + for (; bufsize > 0; bufsize--) + { + *outbuf = TOLOWER ((unsigned char) *inbuf); + inbuf++; + outbuf++; + } +} + +int +mbmemcasecoll (const char *s1, size_t s1len, const char *s2, size_t s2len, + bool hard_LC_COLLATE) +{ + char *t1; + size_t t1len; + char *t2; + size_t t2len; + char *memory; + int cmp; + + if (MB_CUR_MAX > 1) + { + /* Application of towlower grows each character by a factor 2 + at most. */ + t1len = 2 * s1len; + t2len = 2 * s2len; + } + else + { + /* Application of tolower doesn't change the size. */ + t1len = s1len; + t2len = s2len; + } + /* Allocate memory for t1 and t2. */ + memory = (char *) malloca (t1len + 1 + t2len + 1); + if (memory == NULL) + { + errno = ENOMEM; + return 0; + } + t1 = memory; + t2 = memory + t1len + 1; + + /* Csae-fold the two argument strings. */ + if (MB_CUR_MAX > 1) + { + t1len = apply_towlower (s1, s1len, t1, t1len); + t2len = apply_towlower (s2, s2len, t2, t2len); + } + else + { + apply_tolower (s1, t1, s1len); + apply_tolower (s2, t2, s2len); + } + + /* Compare the two case-folded strings. */ + if (hard_LC_COLLATE) + cmp = memcoll (t1, t1len, t2, t2len); + else + { + cmp = memcmp2 (t1, t1len, t2, t2len); + errno = 0; + } + + { + int saved_errno = errno; + freea (memory); + errno = saved_errno; + } + + return cmp; +} diff --git a/lib/mbmemcasecoll.h b/lib/mbmemcasecoll.h new file mode 100644 index 0000000000..ed774fb3be --- /dev/null +++ b/lib/mbmemcasecoll.h @@ -0,0 +1,58 @@ +/* Locale-specific case-ignoring memory comparison. + Copyright (C) 2001, 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2001. */ + +#ifndef MBMEMCASECOLL_H +#define MBMEMCASECOLL_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Compare the memory regions S1 = [s1..s1+s1len-1], S2 = [s2..s2+s2len-1], + that contain character sequences, using the rules of the current locale, + ignoring case. + HARD_LC_COLLATE is false if the LC_COLLATE category of the current locale + is equivalent to the "C" locale. + + This function's result is locale dependent. Unlike memcasecmp(), it works + correctly in multibyte locales and also handles Turkish i / dotless i. + Unlike ulc_casecmp(), it does not handle the German sharp s and the Greek + final sigma. Like memcoll() and ulc_casecoll(), it uses collation order. + + Return a negative number if S1 < S2, a positive number if S1 > S2, 0 if + S1 and S2 have the same contents, or an unspecified value if there is an + error. + Set errno to an error number if there is an error, and to zero otherwise. + + Note: This function may, in multibyte locales, return 0 for strings of + different lengths! */ + +extern int mbmemcasecoll (const char *s1, size_t s1len, + const char *s2, size_t s2len, + bool hard_LC_COLLATE); + + +#ifdef __cplusplus +} +#endif + +#endif /* MBMEMCASECOLL_H */ diff --git a/modules/mbmemcasecoll b/modules/mbmemcasecoll new file mode 100644 index 0000000000..9820097118 --- /dev/null +++ b/modules/mbmemcasecoll @@ -0,0 +1,30 @@ +Description: +mbmemcasecoll() function: locale dependent case-insensitive memory area +comparison. + +Files: +lib/mbmemcasecoll.h +lib/mbmemcasecoll.c + +Depends-on: +stdbool +malloca +mbrtowc +wcrtomb +memcmp2 +memcoll + +configure.ac: + +Makefile.am: +lib_SOURCES += mbmemcasecoll.c + +Include: +"mbmemcasecoll.h" + +License: +GPL + +Maintainer: +Bruno Haible + -- 2.30.2