Add context arguments to u*_casemap functions.

author Bruno Haible <bruno@clisp.org>

Mon, 29 Jun 2009 21:51:02 +0000 (23:51 +0200)

committer Bruno Haible <bruno@clisp.org>

Mon, 29 Jun 2009 21:51:02 +0000 (23:51 +0200)
author Bruno Haible <bruno@clisp.org>
Mon, 29 Jun 2009 21:51:02 +0000 (23:51 +0200)
committer Bruno Haible <bruno@clisp.org>
Mon, 29 Jun 2009 21:51:02 +0000 (23:51 +0200)
diff --git a/ChangeLog b/ChangeLog

index 204a0d1a766aa9065bd177e2584a2f6831a1dafb..e5624fd4357c95acfaef053124431bdc6640f905 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,23 @@
  2009-06-29  Bruno Haible  <bruno@clisp.org>
  
+       Add context arguments to u*_casemap functions.
+       * lib/unicase/unicasemap.h: Include unicase.h.
+       (u8_casemap, u16_casemap, u32_casemap): Add prefix_context and
+       suffix_context arguments.
+       * lib/unicase/u-casemap.h (is_cased, is_case_ignorable): Remove
+       functions.
+       (FUNC): Add prefix_context and suffix_context arguments. Use
+       uc_is_cased and uc_is_case_ignorable.
+       * lib/unicase/u8-casemap.c: Include caseprop.h and context.h.
+       * lib/unicase/u16-casemap.c: Likewise.
+       * lib/unicase/u32-casemap.c: Likewise.
+       * modules/unicase/u8-casemap (Files): Add lib/unicase/context.h.
+       (Depends-on): Add unicase/cased, unicase/ignorable. Clean up.
+       * modules/unicase/u16-casemap (Files): Add lib/unicase/context.h.
+       (Depends-on): Add unicase/cased, unicase/ignorable. Clean up.
+       * modules/unicase/u32-casemap (Files): Add lib/unicase/context.h.
+       (Depends-on): Add unicase/cased, unicase/ignorable. Clean up.
+
         New module 'unicase/u32-suffix-context'.
         * lib/unicase/u32-suffix-context.c: New file.
         * modules/unicase/u32-suffix-context: New file.
diff --git a/lib/unicase/u-casemap.h b/lib/unicase/u-casemap.h

index 760fca71daa335556cdbfd0aac6e79496d7c34e4..d904eb432e051df0a8c8eb1f7f09b54825035a66 100644 (file)
--- a/lib/unicase/u-casemap.h
+++ b/lib/unicase/u-casemap.h
@@ -15,40 +15,11 @@
     You should have received a copy of the GNU Lesser General Public License
     along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  
-/* Quoting the Unicode standard:
-     Definition: A character is defined to be "cased" if it has the Lowercase or
-     Uppercase property or has a General_Category value of Titlecase_Letter.  */
-static inline bool
-is_cased (ucs4_t uc)
-{
-  return (uc_is_property_lowercase (uc)
-         || uc_is_property_uppercase (uc)
-         || uc_is_general_category (uc, UC_TITLECASE_LETTER));
-}
-
-/* Quoting the Unicode standard:
-     Definition: A character is defined to be "case-ignorable" if it has the
-     value MidLetter {or the value MidNumLet} for the Word_Break property or
-     its General_Category is one of Nonspacing_Mark (Mn), Enclosing_Mark (Me),
-     Format (Cf), Modifier_Letter (Lm), or Modifier_Symbol (Sk).
-   The text marked in braces was added in Unicode 5.1.0, see
-   <http://www.unicode.org/versions/Unicode5.1.0/> section "Update of
-   Definition of case-ignorable".   */
-static inline bool
-is_case_ignorable (ucs4_t uc)
-{
-  int wbp = uc_wordbreak_property (uc);
-
-  return (wbp == WBP_MIDLETTER || wbp == WBP_MIDNUMLET
-         || uc_is_general_category_withtable (uc, UC_CATEGORY_MASK_Mn
-                                                  | UC_CATEGORY_MASK_Me
-                                                  | UC_CATEGORY_MASK_Cf
-                                                  | UC_CATEGORY_MASK_Lm
-                                                  | UC_CATEGORY_MASK_Sk));
-}
-
  UNIT *
-FUNC (const UNIT *s, size_t n, const char *iso639_language,
+FUNC (const UNIT *s, size_t n,
+      casing_prefix_context_t prefix_context,
+      casing_suffix_context_t suffix_context,
+      const char *iso639_language,
        ucs4_t (*single_character_map) (ucs4_t),
        size_t offset_in_rule, /* offset in 'struct special_casing_rule' */
        uninorm_t nf,
@@ -77,11 +48,13 @@ FUNC (const UNIT *s, size_t n, const char *iso639_language,
  
      /* Helper for evaluating the FINAL_SIGMA condition:
         Last character that was not case-ignorable.  */
-    ucs4_t last_char_except_ignorable = 0xFFFD;
+    ucs4_t last_char_except_ignorable =
+      prefix_context.last_char_except_ignorable;
  
      /* Helper for evaluating the AFTER_SOFT_DOTTED and AFTER_I conditions:
         Last character that was of combining class 230 ("Above") or 0.  */
-    ucs4_t last_char_normal_or_above = 0xFFFD;
+    ucs4_t last_char_normal_or_above =
+      prefix_context.last_char_normal_or_above;
  
      while (s < s_end)
        {
@@ -134,23 +107,31 @@ FUNC (const UNIT *s, size_t n, const char *iso639_language,
                            consisting of a case-ignorable sequence and then a
                            cased letter.  */
                         /* Test the "before" condition.  */
-                       applies = is_cased (last_char_except_ignorable);
+                       applies = uc_is_cased (last_char_except_ignorable);
                         /* Test the "after" condition.  */
                         if (applies)
                           {
                             const UNIT *s2 = s + count;
-                           while (s2 < s_end)
+                           for (;;)
                               {
-                               ucs4_t uc2;
-                               int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2);
-                               if (is_cased (uc2))
+                               if (s2 < s_end)
                                   {
-                                   applies = false;
+                                   ucs4_t uc2;
+                                   int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2);
+                                   if (uc_is_cased (uc2))
+                                     {
+                                       applies = false;
+                                       break;
+                                     }
+                                   if (!uc_is_case_ignorable (uc2))
+                                     break;
+                                   s2 += count2;
+                                 }
+                               else
+                                 {
+                                   applies = ((suffix_context.bits & SCC_FINAL_SIGMA_MASK) == 0);
                                     break;
                                   }
-                               if (!is_case_ignorable (uc2))
-                                 break;
-                               s2 += count2;
                               }
                           }
                         break;
@@ -171,19 +152,27 @@ FUNC (const UNIT *s, size_t n, const char *iso639_language,
                         {
                           const UNIT *s2 = s + count;
                           applies = false;
-                         while (s2 < s_end)
+                         for (;;)
                             {
-                             ucs4_t uc2;
-                             int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2);
-                             int ccc = uc_combining_class (uc2);
-                             if (ccc == UC_CCC_A)
+                             if (s2 < s_end)
                                 {
-                                 applies = true;
+                                 ucs4_t uc2;
+                                 int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2);
+                                 int ccc = uc_combining_class (uc2);
+                                 if (ccc == UC_CCC_A)
+                                   {
+                                     applies = true;
+                                     break;
+                                   }
+                                 if (ccc == UC_CCC_NR)
+                                   break;
+                                 s2 += count2;
+                               }
+                             else
+                               {
+                                 applies = ((suffix_context.bits & SCC_MORE_ABOVE_MASK) != 0);
                                   break;
                                 }
-                             if (ccc == UC_CCC_NR)
-                               break;
-                             s2 += count2;
                             }
                         }
                         break;
@@ -198,21 +187,29 @@ FUNC (const UNIT *s, size_t n, const char *iso639_language,
                         {
                           const UNIT *s2 = s + count;
                           applies = false;
-                         while (s2 < s_end)
+                         for (;;)
                             {
-                             ucs4_t uc2;
-                             int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2);
-                             if (uc2 == 0x0307) /* COMBINING DOT ABOVE */
+                             if (s2 < s_end)
                                 {
-                                 applies = true;
-                                 break;
+                                 ucs4_t uc2;
+                                 int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2);
+                                 if (uc2 == 0x0307) /* COMBINING DOT ABOVE */
+                                   {
+                                     applies = true;
+                                     break;
+                                   }
+                                 {
+                                   int ccc = uc_combining_class (uc2);
+                                   if (ccc == UC_CCC_A || ccc == UC_CCC_NR)
+                                     break;
+                                 }
+                                 s2 += count2;
                                 }
-                             {
-                               int ccc = uc_combining_class (uc2);
-                               if (ccc == UC_CCC_A || ccc == UC_CCC_NR)
+                             else
+                               {
+                                 applies = ((suffix_context.bits & SCC_BEFORE_DOT_MASK) != 0);
                                   break;
-                             }
-                             s2 += count2;
+                               }
                             }
                         }
                         break;
@@ -354,7 +351,7 @@ FUNC (const UNIT *s, size_t n, const char *iso639_language,
             }
         }
  
-       if (!is_case_ignorable (uc))
+       if (!uc_is_case_ignorable (uc))
           last_char_except_ignorable = uc;
  
         {
diff --git a/lib/unicase/u16-casemap.c b/lib/unicase/u16-casemap.c

index 4221aaafd2a80c935d728a487ce4df82415a8abe..1b1952e3c04cae8c02c79e711d75cf94254e00e8 100644 (file)
--- a/lib/unicase/u16-casemap.c
+++ b/lib/unicase/u16-casemap.c
@@ -28,6 +28,8 @@
  #include "unictype.h"
  #include "uniwbrk.h"
  #include "uninorm.h"
+#include "caseprop.h"
+#include "context.h"
  #include "special-casing.h"
  
  #define FUNC u16_casemap
diff --git a/lib/unicase/u32-casemap.c b/lib/unicase/u32-casemap.c

index 084f8f6eb93bf973a5617991a5a3754cf3ee64b3..f6284931021005beff395fad4dbcdc465311c78c 100644 (file)
--- a/lib/unicase/u32-casemap.c
+++ b/lib/unicase/u32-casemap.c
@@ -28,6 +28,8 @@
  #include "unictype.h"
  #include "uniwbrk.h"
  #include "uninorm.h"
+#include "caseprop.h"
+#include "context.h"
  #include "special-casing.h"
  
  #define FUNC u32_casemap
diff --git a/lib/unicase/u8-casemap.c b/lib/unicase/u8-casemap.c

index 96268a3749ddd5ebb20b52e51d1dd452ebd46baf..52c8f45aadfd86fa7eccbf759baeaa24c168bc0e 100644 (file)
--- a/lib/unicase/u8-casemap.c
+++ b/lib/unicase/u8-casemap.c
@@ -28,6 +28,8 @@
  #include "unictype.h"
  #include "uniwbrk.h"
  #include "uninorm.h"
+#include "caseprop.h"
+#include "context.h"
  #include "special-casing.h"
  
  #define FUNC u8_casemap
diff --git a/lib/unicase/unicasemap.h b/lib/unicase/unicasemap.h

index 4581cd6ddea3d58b58038cb6bd317beda6f38a9c..8da8c51d28b271e229c86d2808427cf738647891 100644 (file)
--- a/lib/unicase/unicasemap.h
+++ b/lib/unicase/unicasemap.h
@@ -18,24 +18,34 @@
  #include <stddef.h>
  
  #include "unitypes.h"
+#include "unicase.h"
  #include "uninorm.h"
  
  extern uint8_t *
-       u8_casemap (const uint8_t *s, size_t n, const char *iso639_language,
+       u8_casemap (const uint8_t *s, size_t n,
+                  casing_prefix_context_t prefix_context,
+                  casing_suffix_context_t suffix_context,
+                  const char *iso639_language,
                    ucs4_t (*single_character_map) (ucs4_t),
                    size_t offset_in_rule, /* offset in 'struct special_casing_rule' */
                    uninorm_t nf,
                    uint8_t *resultbuf, size_t *lengthp);
  
  extern uint16_t *
-       u16_casemap (const uint16_t *s, size_t n, const char *iso639_language,
+       u16_casemap (const uint16_t *s, size_t n,
+                   casing_prefix_context_t prefix_context,
+                   casing_suffix_context_t suffix_context,
+                   const char *iso639_language,
                     ucs4_t (*single_character_map) (ucs4_t),
                     size_t offset_in_rule, /* offset in 'struct special_casing_rule' */
                     uninorm_t nf,
                     uint16_t *resultbuf, size_t *lengthp);
  
  extern uint32_t *
-       u32_casemap (const uint32_t *s, size_t n, const char *iso639_language,
+       u32_casemap (const uint32_t *s, size_t n,
+                   casing_prefix_context_t prefix_context,
+                   casing_suffix_context_t suffix_context,
+                   const char *iso639_language,
                     ucs4_t (*single_character_map) (ucs4_t),
                     size_t offset_in_rule, /* offset in 'struct special_casing_rule' */
                     uninorm_t nf,
diff --git a/modules/unicase/u16-casemap b/modules/unicase/u16-casemap

index 5e3910c9ba4aef0c09b3f08eeedd356e2fdc4347..cb2e6c432538c33cf676557636bec8a434cfce69 100644 (file)
--- a/modules/unicase/u16-casemap
+++ b/modules/unicase/u16-casemap
@@ -5,17 +5,14 @@ Files:
  lib/unicase/unicasemap.h
  lib/unicase/u16-casemap.c
  lib/unicase/u-casemap.h
+lib/unicase/context.h
  
  Depends-on:
  unicase/base
+unicase/cased
+unicase/ignorable
  unicase/special-casing
-uniwbrk/wordbreak-property
-unictype/category-of
-unictype/category-test
-unictype/category-Lt
  unictype/combining-class
-unictype/property-lowercase
-unictype/property-uppercase
  unictype/property-soft-dotted
  unistr/u16-mbtouc-unsafe
  unistr/u16-uctomb
diff --git a/modules/unicase/u32-casemap b/modules/unicase/u32-casemap

index f2b634548a26ca9a4eae910f4ccdfe0f35f871dc..4285d1ce31acc7f73fd608f3a6832ecbf7aa29df 100644 (file)
--- a/modules/unicase/u32-casemap
+++ b/modules/unicase/u32-casemap
@@ -5,17 +5,14 @@ Files:
  lib/unicase/unicasemap.h
  lib/unicase/u32-casemap.c
  lib/unicase/u-casemap.h
+lib/unicase/context.h
  
  Depends-on:
  unicase/base
+unicase/cased
+unicase/ignorable
  unicase/special-casing
-uniwbrk/wordbreak-property
-unictype/category-of
-unictype/category-test
-unictype/category-Lt
  unictype/combining-class
-unictype/property-lowercase
-unictype/property-uppercase
  unictype/property-soft-dotted
  unistr/u32-mbtouc-unsafe
  unistr/u32-uctomb
diff --git a/modules/unicase/u8-casemap b/modules/unicase/u8-casemap

index a84e479ce1fb8b9204c29e9a567555529783ab6d..3c482da02d7516b41a2deffa88c070f00798f6c7 100644 (file)
--- a/modules/unicase/u8-casemap
+++ b/modules/unicase/u8-casemap
@@ -5,17 +5,14 @@ Files:
  lib/unicase/unicasemap.h
  lib/unicase/u8-casemap.c
  lib/unicase/u-casemap.h
+lib/unicase/context.h
  
  Depends-on:
  unicase/base
+unicase/cased
+unicase/ignorable
  unicase/special-casing
-uniwbrk/wordbreak-property
-unictype/category-of
-unictype/category-test
-unictype/category-Lt
  unictype/combining-class
-unictype/property-lowercase
-unictype/property-uppercase
  unictype/property-soft-dotted
  unistr/u8-mbtouc-unsafe
  unistr/u8-uctomb
author	Bruno Haible <bruno@clisp.org>
	Mon, 29 Jun 2009 21:51:02 +0000 (23:51 +0200)
committer	Bruno Haible <bruno@clisp.org>
	Mon, 29 Jun 2009 21:51:02 +0000 (23:51 +0200)
ChangeLog		patch \| blob \| history
lib/unicase/u-casemap.h		patch \| blob \| history
lib/unicase/u16-casemap.c		patch \| blob \| history
lib/unicase/u32-casemap.c		patch \| blob \| history
lib/unicase/u8-casemap.c		patch \| blob \| history
lib/unicase/unicasemap.h		patch \| blob \| history
modules/unicase/u16-casemap		patch \| blob \| history
modules/unicase/u32-casemap		patch \| blob \| history
modules/unicase/u8-casemap		patch \| blob \| history