From: Bruno Haible Date: Tue, 9 Jan 2007 14:08:43 +0000 (+0000) Subject: New modules 'unistr/u8-cmp', 'unistr/u16-cmp', 'unistr/u32-cmp'. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f3df4c99750b1debd07c30d60770b06a6c0be30e;p=pspp New modules 'unistr/u8-cmp', 'unistr/u16-cmp', 'unistr/u32-cmp'. --- diff --git a/lib/unistr/u16-cmp.c b/lib/unistr/u16-cmp.c new file mode 100644 index 0000000000..3b2b4c856d --- /dev/null +++ b/lib/unistr/u16-cmp.c @@ -0,0 +1,56 @@ +/* Compare pieces of UTF-16 strings. + Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc. + Written by Bruno Haible , 2002. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published + by the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, + USA. */ + +#include + +/* Specification. */ +#include "unistr.h" + +int +u16_cmp (const uint16_t *s1, const uint16_t *s2, size_t n) +{ + /* Note that the UTF-16 encoding does NOT preserve lexicographic order. + Namely, if uc1 is a 16-bit character and [uc2a,uc2b] is a surrogate pair, + we must enforce uc1 < [uc2a,uc2b], even if uc1 > uc2a. */ + for (; n > 0;) + { + uint16_t c1 = *s1++; + uint16_t c2 = *s2++; + if (c1 == c2) + { + n--; + continue; + } + if (c1 < 0xd800 || c1 >= 0xe000) + { + if (!(c2 < 0xd800 || c2 >= 0xe000)) + /* c2 is a surrogate, but c1 is not. */ + return -1; + } + else + { + if (c2 < 0xd800 || c2 >= 0xe000) + /* c1 is a surrogate, but c2 is not. */ + return 1; + } + return (int)c1 - (int)c2; + /* > 0 if c1 > c2, < 0 if c1 < c2. */ + } + return 0; +} diff --git a/lib/unistr/u32-cmp.c b/lib/unistr/u32-cmp.c new file mode 100644 index 0000000000..4cdbd32d57 --- /dev/null +++ b/lib/unistr/u32-cmp.c @@ -0,0 +1,42 @@ +/* Compare pieces of UTF-32 strings. + Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc. + Written by Bruno Haible , 2002. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published + by the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, + USA. */ + +#include + +/* Specification. */ +#include "unistr.h" + +int +u32_cmp (const uint32_t *s1, const uint32_t *s2, size_t n) +{ + for (; n > 0;) + { + uint32_t uc1 = *s1++; + uint32_t uc2 = *s2++; + if (uc1 == uc2) + { + n--; + continue; + } + /* Note that uc1 and uc2 each have at most 31 bits. */ + return (int)uc1 - (int)uc2; + /* > 0 if uc1 > uc2, < 0 if uc1 < uc2. */ + } + return 0; +} diff --git a/lib/unistr/u8-cmp.c b/lib/unistr/u8-cmp.c new file mode 100644 index 0000000000..0e3395bd08 --- /dev/null +++ b/lib/unistr/u8-cmp.c @@ -0,0 +1,32 @@ +/* Compare pieces of UTF-8 strings. + Copyright (C) 2002, 2006 Free Software Foundation, Inc. + Written by Bruno Haible , 2002. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published + by the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, + USA. */ + +#include + +/* Specification. */ +#include "unistr.h" + +#include + +int +u8_cmp (const uint8_t *s1, const uint8_t *s2, size_t n) +{ + /* Use the fact that the UTF-8 encoding preserves lexicographic order. */ + return memcmp ((const char *) s1, (const char *) s2, n); +} diff --git a/modules/unistr/u16-cmp b/modules/unistr/u16-cmp new file mode 100644 index 0000000000..f9c27542b1 --- /dev/null +++ b/modules/unistr/u16-cmp @@ -0,0 +1,23 @@ +Description: +Compare pieces of UTF-16 strings. + +Files: +lib/unistr/u16-cmp.c + +Depends-on: +unistr/base + +configure.ac: + +Makefile.am: +lib_SOURCES += unistr/u16-cmp.c + +Include: +"unistr.h" + +License: +LGPL + +Maintainer: +Bruno Haible + diff --git a/modules/unistr/u32-cmp b/modules/unistr/u32-cmp new file mode 100644 index 0000000000..594b445db6 --- /dev/null +++ b/modules/unistr/u32-cmp @@ -0,0 +1,23 @@ +Description: +Compare pieces of UTF-32 strings. + +Files: +lib/unistr/u32-cmp.c + +Depends-on: +unistr/base + +configure.ac: + +Makefile.am: +lib_SOURCES += unistr/u32-cmp.c + +Include: +"unistr.h" + +License: +LGPL + +Maintainer: +Bruno Haible + diff --git a/modules/unistr/u8-cmp b/modules/unistr/u8-cmp new file mode 100644 index 0000000000..806306d8ad --- /dev/null +++ b/modules/unistr/u8-cmp @@ -0,0 +1,23 @@ +Description: +Compare pieces of UTF-8 strings. + +Files: +lib/unistr/u8-cmp.c + +Depends-on: +unistr/base + +configure.ac: + +Makefile.am: +lib_SOURCES += unistr/u8-cmp.c + +Include: +"unistr.h" + +License: +LGPL + +Maintainer: +Bruno Haible +