Use unitypes.h.
+2007-01-08 Bruno Haible <bruno@clisp.org>
+
+ * modules/utf8-ucs4 (Files, lib_SOURCES): Add unistr/utf8-ucs4.c.
+ (Depends-on): Add unitypes.
+ * lib/utf8-ucs4.h: Add double-inclusion guard. Include unitypes.h.
+ (u8_mbtouc_aux): Move out to separate file.
+ (u8_mbtouc): Use ucs4_t, uint8_t types.
+ * lib/unistr/utf8-ucs4.c: New file.
+
+ * modules/utf16-ucs4 (Files, lib_SOURCES): Add unistr/utf16-ucs4.c.
+ (Depends-on): Add unitypes.
+ * lib/utf16-ucs4.h: Add double-inclusion guard. Include unitypes.h.
+ (u16_mbtouc_aux): Move out to separate file.
+ (u16_mbtouc): Use ucs4_t, uint16_t types.
+ * lib/unistr/utf16-ucs4.c: New file.
+
+ * modules/ucs4-utf8 (Files, lib_SOURCES): Add unistr/ucs4-utf8.c.
+ (Depends-on): Add unitypes.
+ * lib/ucs4-utf8.h: Add double-inclusion guard. Include unitypes.h.
+ (u8_uctomb_aux): Move out to separate file.
+ (u8_uctomb): Use ucs4_t, uint8_t types.
+ * lib/unistr/ucs4-utf8.c: New file.
+
+ * modules/ucs4-utf16 (Files, lib_SOURCES): Add unistr/ucs4-utf16.c.
+ (Depends-on): Add unitypes.
+ * lib/ucs4-utf16.h: Add double-inclusion guard. Include unitypes.h.
+ (u16_uctomb_aux): Move out to separate file.
+ (u16_uctomb): Use ucs4_t, uint16_t types.
+ * lib/unistr/ucs4-utf16.c: New file.
+
2007-01-08 Bruno Haible <bruno@clisp.org>
Avoid variable names that conflict with those from libtool.
/* Conversion UCS-4 to UTF-16.
- Copyright (C) 2002 Free Software Foundation, Inc.
- Written by Bruno Haible <haible@clisp.cons.org>, 2002.
+ Copyright (C) 2002, 2005-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Library General Public License as published
+ by the Free Software Foundation; either version 2, or (at your option)
+ any later version.
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software Foundation,
-Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+ You should have received a copy of the GNU Library General Public
+ License along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+ USA. */
+#ifndef _UCS4_UTF16_H
+#define _UCS4_UTF16_H
#include <stddef.h>
+#include "unitypes.h"
+
+extern int u16_uctomb_aux (uint16_t *s, ucs4_t uc, int n);
/* Return the length (number of units) of the UTF-16 representation of uc,
after storing it at S. Return -1 upon failure, -2 if the number of
available units, N, is too small. */
-static int
-u16_uctomb_aux (unsigned short *s, unsigned int uc, int n)
-{
- if (uc >= 0x10000)
- {
- if (uc < 0x110000)
- {
- if (n >= 2)
- {
- s[0] = 0xd800 + ((uc - 0x10000) >> 10);
- s[1] = 0xdc00 + ((uc - 0x10000) & 0x3ff);
- return 2;
- }
- }
- else
- return -1;
- }
- return -2;
-}
-
static inline int
-u16_uctomb (unsigned short *s, unsigned int uc, int n)
+u16_uctomb (uint16_t *s, ucs4_t uc, int n)
{
- if (uc < 0x10000 && n > 0)
+ if (uc < 0xd800 && n > 0)
{
s[0] = uc;
return 1;
else
return u16_uctomb_aux (s, uc, n);
}
+
+#endif /* _UCS4_UTF16_H */
/* Conversion UCS-4 to UTF-8.
- Copyright (C) 2002 Free Software Foundation, Inc.
- Written by Bruno Haible <haible@clisp.cons.org>, 2002.
+ Copyright (C) 2002, 2005-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Library General Public License as published
+ by the Free Software Foundation; either version 2, or (at your option)
+ any later version.
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software Foundation,
-Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+ You should have received a copy of the GNU Library General Public
+ License along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+ USA. */
+#ifndef _UCS4_UTF8_H
+#define _UCS4_UTF8_H
#include <stddef.h>
+#include "unitypes.h"
+
+extern int u8_uctomb_aux (uint8_t *s, ucs4_t uc, int n);
/* Return the length (number of units) of the UTF-8 representation of uc,
after storing it at S. Return -1 upon failure, -2 if the number of
available units, N, is too small. */
-static int
-u8_uctomb_aux (unsigned char *s, unsigned int uc, int n)
-{
- int count;
-
- if (uc < 0x80)
- count = 1;
- else if (uc < 0x800)
- count = 2;
- else if (uc < 0x10000)
- count = 3;
-#if 0
- else if (uc < 0x200000)
- count = 4;
- else if (uc < 0x4000000)
- count = 5;
- else if (uc <= 0x7fffffff)
- count = 6;
-#else
- else if (uc < 0x110000)
- count = 4;
-#endif
- else
- return -1;
-
- if (n < count)
- return -2;
-
- switch (count) /* note: code falls through cases! */
- {
-#if 0
- case 6: s[5] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x4000000;
- case 5: s[4] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x200000;
-#endif
- case 4: s[3] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x10000;
- case 3: s[2] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x800;
- case 2: s[1] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0xc0;
- case 1: s[0] = uc;
- }
- return count;
-}
-
static inline int
-u8_uctomb (unsigned char *s, unsigned int uc, int n)
+u8_uctomb (uint8_t *s, ucs4_t uc, int n)
{
if (uc < 0x80 && n > 0)
{
else
return u8_uctomb_aux (s, uc, n);
}
+
+#endif /* _UCS4_UTF8_H */
--- /dev/null
+/* Conversion UCS-4 to UTF-16.
+ Copyright (C) 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Library General Public License as published
+ by the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+ USA. */
+
+#include <config.h>
+
+/* Specification. */
+#include "ucs4-utf16.h"
+
+int
+u16_uctomb_aux (uint16_t *s, ucs4_t uc, int n)
+{
+ if (uc < 0xd800)
+ {
+ /* The case n >= 1 is already handled by the caller. */
+ }
+ else if (uc < 0x10000)
+ {
+ if (uc >= 0xe000)
+ {
+ if (n >= 1)
+ {
+ s[0] = uc;
+ return 1;
+ }
+ }
+ else
+ return -1;
+ }
+ else
+ {
+ if (uc < 0x110000)
+ {
+ if (n >= 2)
+ {
+ s[0] = 0xd800 + ((uc - 0x10000) >> 10);
+ s[1] = 0xdc00 + ((uc - 0x10000) & 0x3ff);
+ return 2;
+ }
+ }
+ else
+ return -1;
+ }
+ return -2;
+}
--- /dev/null
+/* Conversion UCS-4 to UTF-8.
+ Copyright (C) 2002, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Library General Public License as published
+ by the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+ USA. */
+
+#include <config.h>
+
+/* Specification. */
+#include "ucs4-utf8.h"
+
+int
+u8_uctomb_aux (uint8_t *s, ucs4_t uc, int n)
+{
+ int count;
+
+ if (uc < 0x80)
+ /* The case n >= 1 is already handled by the caller. */
+ return -2;
+ else if (uc < 0x800)
+ count = 2;
+ else if (uc < 0x10000)
+ {
+ if (uc < 0xd800 || uc >= 0xe000)
+ count = 3;
+ else
+ return -1;
+ }
+#if 0
+ else if (uc < 0x200000)
+ count = 4;
+ else if (uc < 0x4000000)
+ count = 5;
+ else if (uc <= 0x7fffffff)
+ count = 6;
+#else
+ else if (uc < 0x110000)
+ count = 4;
+#endif
+ else
+ return -1;
+
+ if (n < count)
+ return -2;
+
+ switch (count) /* note: code falls through cases! */
+ {
+#if 0
+ case 6: s[5] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x4000000;
+ case 5: s[4] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x200000;
+#endif
+ case 4: s[3] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x10000;
+ case 3: s[2] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x800;
+ case 2: s[1] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0xc0;
+ /*case 1:*/ s[0] = uc;
+ }
+ return count;
+}
--- /dev/null
+/* Conversion UTF-16 to UCS-4.
+ Copyright (C) 2001-2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2001.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Library General Public License as published
+ by the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+ USA. */
+
+#include <config.h>
+
+/* Specification. */
+#include "utf16-ucs4.h"
+
+int
+u16_mbtouc_aux (ucs4_t *puc, const uint16_t *s, size_t n)
+{
+ uint16_t c = *s;
+
+#if CONFIG_UNICODE_SAFETY
+ if (c < 0xdc00)
+#endif
+ {
+ if (n >= 2)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if (s[1] >= 0xdc00 && s[1] < 0xe000)
+#endif
+ {
+ *puc = 0x10000 + ((c - 0xd800) << 10) + (s[1] - 0xdc00);
+ return 2;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ }
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 1;
+}
--- /dev/null
+/* Conversion UTF-8 to UCS-4.
+ Copyright (C) 2001-2002, 2006-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2001.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Library General Public License as published
+ by the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+ USA. */
+
+#include <config.h>
+
+/* Specification. */
+#include "utf8-ucs4.h"
+
+int
+u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n)
+{
+ uint8_t c = *s;
+
+ if (c >= 0xc2)
+ {
+ if (c < 0xe0)
+ {
+ if (n >= 2)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40)
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x1f) << 6)
+ | (unsigned int) (s[1] ^ 0x80);
+ return 2;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+ else if (c < 0xf0)
+ {
+ if (n >= 3)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (c >= 0xe1 || s[1] >= 0xa0)
+ && (c != 0xed || s[1] < 0xa0))
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x0f) << 12)
+ | ((unsigned int) (s[1] ^ 0x80) << 6)
+ | (unsigned int) (s[2] ^ 0x80);
+ return 3;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+ else if (c < 0xf8)
+ {
+ if (n >= 4)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40
+ && (c >= 0xf1 || s[1] >= 0x90)
+#if 1
+ && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
+#endif
+ )
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x07) << 18)
+ | ((unsigned int) (s[1] ^ 0x80) << 12)
+ | ((unsigned int) (s[2] ^ 0x80) << 6)
+ | (unsigned int) (s[3] ^ 0x80);
+ return 4;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+#if 0
+ else if (c < 0xfc)
+ {
+ if (n >= 5)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
+ && (c >= 0xf9 || s[1] >= 0x88))
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x03) << 24)
+ | ((unsigned int) (s[1] ^ 0x80) << 18)
+ | ((unsigned int) (s[2] ^ 0x80) << 12)
+ | ((unsigned int) (s[3] ^ 0x80) << 6)
+ | (unsigned int) (s[4] ^ 0x80);
+ return 5;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+ else if (c < 0xfe)
+ {
+ if (n >= 6)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
+ && (s[5] ^ 0x80) < 0x40
+ && (c >= 0xfd || s[1] >= 0x84))
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x01) << 30)
+ | ((unsigned int) (s[1] ^ 0x80) << 24)
+ | ((unsigned int) (s[2] ^ 0x80) << 18)
+ | ((unsigned int) (s[3] ^ 0x80) << 12)
+ | ((unsigned int) (s[4] ^ 0x80) << 6)
+ | (unsigned int) (s[5] ^ 0x80);
+ return 6;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+#endif
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 1;
+}
/* Conversion UTF-16 to UCS-4.
- Copyright (C) 2001-2002 Free Software Foundation, Inc.
- Written by Bruno Haible <haible@clisp.cons.org>, 2001.
+ Copyright (C) 2001-2002, 2005-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2001.
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Library General Public License as published
+ by the Free Software Foundation; either version 2, or (at your option)
+ any later version.
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software Foundation,
-Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+ You should have received a copy of the GNU Library General Public
+ License along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+ USA. */
+#ifndef _UTF16_UCS4_H
+#define _UTF16_UCS4_H
#include <stddef.h>
+#include "unitypes.h"
-/* Return the length (number of units) of the first character in S, putting
- its 'ucs4_t' representation in *PUC. */
-static int
-u16_mbtouc_aux (unsigned int *puc, const unsigned short *s, size_t n)
-{
- unsigned short c = *s;
+extern int u16_mbtouc_aux (ucs4_t *puc, const uint16_t *s, size_t n);
- if (c < 0xdc00)
- {
- if (n >= 2)
- {
- if (s[1] >= 0xdc00 && s[1] < 0xe000)
- {
- *puc = 0x10000 + ((c - 0xd800) << 10) + (s[1] - 0xdc00);
- return 2;
- }
- /* invalid multibyte character */
- }
- else
- {
- /* incomplete multibyte character */
- *puc = 0xfffd;
- return n;
- }
- }
- /* invalid multibyte character */
- *puc = 0xfffd;
- return 1;
-}
+/* Return the length (number of units) of the first character in S, putting
+ its 'ucs4_t' representation in *PUC.
+ The number of available units, N, must be > 0. */
static inline int
-u16_mbtouc (unsigned int *puc, const unsigned short *s, size_t n)
+u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n)
{
- unsigned short c = *s;
+ uint16_t c = *s;
if (c < 0xd800 || c >= 0xe000)
{
else
return u16_mbtouc_aux (puc, s, n);
}
+
+#endif /* _UTF16_UCS4_H */
/* Conversion UTF-8 to UCS-4.
- Copyright (C) 2001-2002 Free Software Foundation, Inc.
- Written by Bruno Haible <haible@clisp.cons.org>, 2001.
+ Copyright (C) 2001-2002, 2005-2007 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2001.
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Library General Public License as published
+ by the Free Software Foundation; either version 2, or (at your option)
+ any later version.
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software Foundation,
-Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+ You should have received a copy of the GNU Library General Public
+ License along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+ USA. */
+#ifndef _UTF8_UCS4_H
+#define _UTF8_UCS4_H
#include <stddef.h>
+#include "unitypes.h"
-/* Return the length (number of units) of the first character in S, putting
- its 'ucs4_t' representation in *PUC. */
-static int
-u8_mbtouc_aux (unsigned int *puc, const unsigned char *s, size_t n)
-{
- unsigned char c = *s;
+extern int u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n);
- if (c >= 0xc2)
- {
- if (c < 0xe0)
- {
- if (n >= 2)
- {
- if ((s[1] ^ 0x80) < 0x40)
- {
- *puc = ((unsigned int) (c & 0x1f) << 6)
- | (unsigned int) (s[1] ^ 0x80);
- return 2;
- }
- /* invalid multibyte character */
- }
- else
- {
- /* incomplete multibyte character */
- *puc = 0xfffd;
- return n;
- }
- }
- else if (c < 0xf0)
- {
- if (n >= 3)
- {
- if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
- && (c >= 0xe1 || s[1] >= 0xa0))
- {
- *puc = ((unsigned int) (c & 0x0f) << 12)
- | ((unsigned int) (s[1] ^ 0x80) << 6)
- | (unsigned int) (s[2] ^ 0x80);
- return 3;
- }
- /* invalid multibyte character */
- }
- else
- {
- /* incomplete multibyte character */
- *puc = 0xfffd;
- return n;
- }
- }
- else if (c < 0xf8)
- {
- if (n >= 4)
- {
- if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
- && (s[3] ^ 0x80) < 0x40
- && (c >= 0xf1 || s[1] >= 0x90)
-#if 1
- && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
-#endif
- )
- {
- *puc = ((unsigned int) (c & 0x07) << 18)
- | ((unsigned int) (s[1] ^ 0x80) << 12)
- | ((unsigned int) (s[2] ^ 0x80) << 6)
- | (unsigned int) (s[3] ^ 0x80);
- return 4;
- }
- /* invalid multibyte character */
- }
- else
- {
- /* incomplete multibyte character */
- *puc = 0xfffd;
- return n;
- }
- }
-#if 0
- else if (c < 0xfc)
- {
- if (n >= 5)
- {
- if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
- && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
- && (c >= 0xf9 || s[1] >= 0x88))
- {
- *puc = ((unsigned int) (c & 0x03) << 24)
- | ((unsigned int) (s[1] ^ 0x80) << 18)
- | ((unsigned int) (s[2] ^ 0x80) << 12)
- | ((unsigned int) (s[3] ^ 0x80) << 6)
- | (unsigned int) (s[4] ^ 0x80);
- return 5;
- }
- /* invalid multibyte character */
- }
- else
- {
- /* incomplete multibyte character */
- *puc = 0xfffd;
- return n;
- }
- }
- else if (c < 0xfe)
- {
- if (n >= 6)
- {
- if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
- && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
- && (s[5] ^ 0x80) < 0x40
- && (c >= 0xfd || s[1] >= 0x84))
- {
- *puc = ((unsigned int) (c & 0x01) << 30)
- | ((unsigned int) (s[1] ^ 0x80) << 24)
- | ((unsigned int) (s[2] ^ 0x80) << 18)
- | ((unsigned int) (s[3] ^ 0x80) << 12)
- | ((unsigned int) (s[4] ^ 0x80) << 6)
- | (unsigned int) (s[5] ^ 0x80);
- return 6;
- }
- /* invalid multibyte character */
- }
- else
- {
- /* incomplete multibyte character */
- *puc = 0xfffd;
- return n;
- }
- }
-#endif
- }
- /* invalid multibyte character */
- *puc = 0xfffd;
- return 1;
-}
+/* Return the length (number of units) of the first character in S, putting
+ its 'ucs4_t' representation in *PUC.
+ The number of available units, N, must be > 0. */
static inline int
-u8_mbtouc (unsigned int *puc, const unsigned char *s, size_t n)
+u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n)
{
- unsigned char c = *s;
+ uint8_t c = *s;
if (c < 0x80)
{
else
return u8_mbtouc_aux (puc, s, n);
}
+
+#endif /* _UTF8_UCS4_H */
Files:
lib/ucs4-utf16.h
+lib/unistr/ucs4-utf16.c
m4/ucs4-utf.m4
Depends-on:
+unitypes
configure.ac:
gl_UCS4_UTF
Makefile.am:
-lib_SOURCES += ucs4-utf16.h
+lib_SOURCES += ucs4-utf16.h unistr/ucs4-utf16.c
Include:
Files:
lib/ucs4-utf8.h
+lib/unistr/ucs4-utf8.c
m4/ucs4-utf.m4
Depends-on:
+unitypes
configure.ac:
gl_UCS4_UTF
Makefile.am:
-lib_SOURCES += ucs4-utf8.h
+lib_SOURCES += ucs4-utf8.h unistr/ucs4-utf8.c
Include:
Files:
lib/utf16-ucs4.h
+lib/unistr/utf16-ucs4.c
m4/utf-ucs4.m4
Depends-on:
+unitypes
configure.ac:
gl_UTF_UCS4
Makefile.am:
-lib_SOURCES += utf16-ucs4.h
+lib_SOURCES += utf16-ucs4.h unistr/utf16-ucs4.c
Include:
+"utf16-ucs4.h"
License:
LGPL
Files:
lib/utf8-ucs4.h
+lib/unistr/utf8-ucs4.c
m4/utf-ucs4.m4
Depends-on:
+unitypes
configure.ac:
gl_UTF_UCS4
Makefile.am:
-lib_SOURCES += utf8-ucs4.h
+lib_SOURCES += utf8-ucs4.h unistr/utf8-ucs4.c
Include:
+"utf8-ucs4.h"
License:
LGPL