/* Elementary Unicode string functions.
- Copyright (C) 2002, 2005-2007 Free Software Foundation, Inc.
+ Copyright (C) 2001-2002, 2005-2009 Free Software Foundation, Inc.
- This program is free software; you can redistribute it and/or modify it
- under the terms of the GNU Library General Public License as published
- by the Free Software Foundation; either version 2, or (at your option)
- any later version.
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
+ Lesser General Public License for more details.
- You should have received a copy of the GNU Library General Public
- License along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
- USA. */
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
#ifndef _UNISTR_H
#define _UNISTR_H
/* Return the length (number of units) of the first character in S, putting
its 'ucs4_t' representation in *PUC. Upon failure, *PUC is set to 0xfffd,
- and an appropriate number of units is returned. */
-/* Similar to mbtowc(), except that puc and s must not be NULL, and the NUL
- character is not treated specially. */
+ and an appropriate number of units is returned.
+ The number of available units, N, must be > 0. */
+/* Similar to mbtowc(), except that puc and s must not be NULL, n must be > 0,
+ and the NUL character is not treated specially. */
/* The variants with _safe suffix are safe, even if the library is compiled
without --enable-safety. */
extern int
u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n);
# else
-# include "utf8-ucs4-unsafe.h"
+extern int
+ u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n);
+static inline int
+u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n)
+{
+ uint8_t c = *s;
+
+ if (c < 0x80)
+ {
+ *puc = c;
+ return 1;
+ }
+ else
+ return u8_mbtouc_unsafe_aux (puc, s, n);
+}
# endif
#endif
-#ifdef GNULIB_UNISTR_U16_MBTOUC
+#ifdef GNULIB_UNISTR_U16_MBTOUC_UNSAFE
# if !HAVE_INLINE
extern int
- u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n);
+ u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n);
# else
-# include "utf16-ucs4.h"
+extern int
+ u16_mbtouc_unsafe_aux (ucs4_t *puc, const uint16_t *s, size_t n);
+static inline int
+u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n)
+{
+ uint16_t c = *s;
+
+ if (c < 0xd800 || c >= 0xe000)
+ {
+ *puc = c;
+ return 1;
+ }
+ else
+ return u16_mbtouc_unsafe_aux (puc, s, n);
+}
# endif
#endif
-#ifdef GNULIB_UNISTR_U32_MBTOUC
+#ifdef GNULIB_UNISTR_U32_MBTOUC_UNSAFE
# if !HAVE_INLINE
extern int
- u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n);
+ u32_mbtouc_unsafe (ucs4_t *puc, const uint32_t *s, size_t n);
# else
static inline int
-u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n)
+u32_mbtouc_unsafe (ucs4_t *puc, const uint32_t *s, size_t n _UNUSED_PARAMETER_)
{
uint32_t c = *s;
extern int
u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n);
# else
-# include "utf8-ucs4.h"
+extern int
+ u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n);
+static inline int
+u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n)
+{
+ uint8_t c = *s;
+
+ if (c < 0x80)
+ {
+ *puc = c;
+ return 1;
+ }
+ else
+ return u8_mbtouc_aux (puc, s, n);
+}
# endif
#endif
-#ifdef GNULIB_UNISTR_U16_MBTOUC_SAFE
+#ifdef GNULIB_UNISTR_U16_MBTOUC
# if !HAVE_INLINE
extern int
- u16_mbtouc_safe (ucs4_t *puc, const uint16_t *s, size_t n);
+ u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n);
# else
-# include "utf16-ucs4-safe.h"
+extern int
+ u16_mbtouc_aux (ucs4_t *puc, const uint16_t *s, size_t n);
+static inline int
+u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n)
+{
+ uint16_t c = *s;
+
+ if (c < 0xd800 || c >= 0xe000)
+ {
+ *puc = c;
+ return 1;
+ }
+ else
+ return u16_mbtouc_aux (puc, s, n);
+}
# endif
#endif
-#ifdef GNULIB_UNISTR_U32_MBTOUC_SAFE
+#ifdef GNULIB_UNISTR_U32_MBTOUC
# if !HAVE_INLINE
extern int
- u32_mbtouc_safe (ucs4_t *puc, const uint32_t *s, size_t n);
+ u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n);
# else
static inline int
-u32_mbtouc_safe (ucs4_t *puc, const uint32_t *s, size_t n)
+u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n _UNUSED_PARAMETER_)
{
uint32_t c = *s;
# endif
#endif
+/* Return the length (number of units) of the first character in S, putting
+ its 'ucs4_t' representation in *PUC. Upon failure, *PUC is set to 0xfffd,
+ and -1 is returned for an invalid sequence of units, -2 is returned for an
+ incomplete sequence of units.
+ The number of available units, N, must be > 0. */
+/* Similar to u*_mbtouc(), except that the return value gives more details
+ about the failure, similar to mbrtowc(). */
+
+#ifdef GNULIB_UNISTR_U8_MBTOUCR
+extern int
+ u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n);
+#endif
+
+#ifdef GNULIB_UNISTR_U16_MBTOUCR
+extern int
+ u16_mbtoucr (ucs4_t *puc, const uint16_t *s, size_t n);
+#endif
+
+#ifdef GNULIB_UNISTR_U32_MBTOUCR
+extern int
+ u32_mbtoucr (ucs4_t *puc, const uint32_t *s, size_t n);
+#endif
+
/* Put the multibyte character represented by UC in S, returning its
length. Return -1 upon failure, -2 if the number of available units, N,
is too small. The latter case cannot occur if N >= 6/2/1, respectively. */
must be specified. */
#ifdef GNULIB_UNISTR_U8_UCTOMB
+/* Auxiliary function, also used by u8_chr, u8_strchr, u8_strrchr. */
+extern int
+ u8_uctomb_aux (uint8_t *s, ucs4_t uc, int n);
# if !HAVE_INLINE
extern int
u8_uctomb (uint8_t *s, ucs4_t uc, int n);
# else
-# include "ucs4-utf8.h"
+static inline int
+u8_uctomb (uint8_t *s, ucs4_t uc, int n)
+{
+ if (uc < 0x80 && n > 0)
+ {
+ s[0] = uc;
+ return 1;
+ }
+ else
+ return u8_uctomb_aux (s, uc, n);
+}
# endif
#endif
#ifdef GNULIB_UNISTR_U16_UCTOMB
+/* Auxiliary function, also used by u16_chr, u16_strchr, u16_strrchr. */
+extern int
+ u16_uctomb_aux (uint16_t *s, ucs4_t uc, int n);
# if !HAVE_INLINE
extern int
u16_uctomb (uint16_t *s, ucs4_t uc, int n);
# else
-# include "ucs4-utf16.h"
+static inline int
+u16_uctomb (uint16_t *s, ucs4_t uc, int n)
+{
+ if (uc < 0xd800 && n > 0)
+ {
+ s[0] = uc;
+ return 1;
+ }
+ else
+ return u16_uctomb_aux (s, uc, n);
+}
# endif
#endif
extern uint32_t *
u32_chr (const uint32_t *s, size_t n, ucs4_t uc);
+/* Count the number of Unicode characters in the N units from S. */
+/* Similar to mbsnlen(). */
+extern size_t
+ u8_mbsnlen (const uint8_t *s, size_t n);
+extern size_t
+ u16_mbsnlen (const uint16_t *s, size_t n);
+extern size_t
+ u32_mbsnlen (const uint32_t *s, size_t n);
+
/* Elementary string functions with memory allocation. */
/* Make a freshly allocated copy of S, of length N. */
extern int
u32_strcmp (const uint32_t *s1, const uint32_t *s2);
+/* Compare S1 and S2 using the collation rules of the current locale.
+ Return -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2.
+ Upon failure, set errno and return any value. */
+/* Similar to strcoll(), wcscoll(). */
+extern int
+ u8_strcoll (const uint8_t *s1, const uint8_t *s2);
+extern int
+ u16_strcoll (const uint16_t *s1, const uint16_t *s2);
+extern int
+ u32_strcoll (const uint32_t *s1, const uint32_t *s2);
+
/* Compare no more than N units of S1 and S2. */
/* Similar to strncmp(), wcsncmp(). */
extern int