From 84c23b94c66ee83ff3843a50ef948e9556746ef1 Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Wed, 1 Jul 2009 01:06:50 +0200 Subject: [PATCH] Fix evaluation of "Before C" condition of FINAL_SIGMA. --- ChangeLog | 23 ++ lib/gen-uni-tables.c | 58 ++++ lib/unicase/cased.c | 28 +- lib/unicase/cased.h | 343 +++++++++++++++++++ lib/unicase/ignorable.c | 47 ++- lib/unicase/ignorable.h | 544 +++++++++++++++++++++++++++++++ modules/unicase/cased | 7 +- modules/unicase/ignorable | 5 +- tests/unicase/test-u16-tolower.c | 61 ++++ tests/unicase/test-u32-tolower.c | 61 ++++ tests/unicase/test-u8-tolower.c | 61 ++++ 11 files changed, 1216 insertions(+), 22 deletions(-) create mode 100644 lib/unicase/cased.h create mode 100644 lib/unicase/ignorable.h diff --git a/ChangeLog b/ChangeLog index 659a289c47..108e19b737 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,26 @@ +2009-06-30 Bruno Haible + + Fix evaluation of "Before C" condition of FINAL_SIGMA. + * lib/gen-uni-tables.c (is_cased, is_case_ignorable): New functions. + (output_casing_properties): New function. + (main): Call it. + * lib/unicase/cased.h: New file, generated by gen-uni-tables. + * lib/unicase/cased.c: Include unictype/bitmap.h. + (uc_is_cased): Define through a bitmap lookup. + * lib/unicase/ignorable.h: New file, generated by gen-uni-tables. + * lib/unicase/ignorable.c: Include unictype/bitmap.h. + (uc_is_case_ignorable): Define through a bitmap lookup. + * modules/unicase/cased (Files): Add lib/unicase/cased.h, + lib/unictype/bitmap.h. + (Depends-on): Add inline. Clean up. + * modules/unicase/ignorable (Files): Add lib/unicase/ignorable.h, + lib/unictype/bitmap.h. + (Depends-on): Add inline. Clean up. + * tests/unicase/test-u8-tolower.c (main): Add more tests of FINAL_SIGMA + recognition. + * tests/unicase/test-u16-tolower.c (main): Likewise. + * tests/unicase/test-u32-tolower.c (main): Likewise. + 2009-06-30 Bruno Haible * lib/unicase/u8-casemap.c: Don't include uniwbrk.h. diff --git a/lib/gen-uni-tables.c b/lib/gen-uni-tables.c index a50751758c..94752b2519 100644 --- a/lib/gen-uni-tables.c +++ b/lib/gen-uni-tables.c @@ -8210,6 +8210,63 @@ output_casing_rules (const char *filename, const char *version) /* ========================================================================= */ +/* Quoting the Unicode standard: + Definition: A character is defined to be "cased" if it has the Lowercase + or Uppercase property or has a General_Category value of + Titlecase_Letter. */ +static bool +is_cased (unsigned int ch) +{ + return (is_property_lowercase (ch) + || is_property_uppercase (ch) + || is_category_Lt (ch)); +} + +/* Quoting the Unicode standard: + Definition: A character is defined to be "case-ignorable" if it has the + value MidLetter {or the value MidNumLet} for the Word_Break property or + its General_Category is one of Nonspacing_Mark (Mn), Enclosing_Mark (Me), + Format (Cf), Modifier_Letter (Lm), or Modifier_Symbol (Sk). + The text marked in braces was added in Unicode 5.1.0, see + section "Update of + Definition of case-ignorable". */ +/* Since this predicate is only used for the "Before C" and "After C" + conditions of FINAL_SIGMA, we exclude the "cased" characters here. + This simplifies the evaluation of the regular expressions + \p{cased} (\p{case-ignorable})* C + and + C (\p{case-ignorable})* \p{cased} + */ +static bool +is_case_ignorable (unsigned int ch) +{ + return (unicode_org_wbp[ch] == WBP_MIDLETTER + || unicode_org_wbp[ch] == WBP_MIDNUMLET + || is_category_Mn (ch) + || is_category_Me (ch) + || is_category_Cf (ch) + || is_category_Lm (ch) + || is_category_Sk (ch)) + && !is_cased (ch); +} + +/* ------------------------------------------------------------------------- */ + +/* Output all case related properties. */ +static void +output_casing_properties (const char *version) +{ +#define PROPERTY(FN,P) \ + debug_output_predicate ("unicase/" #FN ".txt", is_ ## P); \ + output_predicate_test ("../tests/unicase/test-" #FN ".c", is_ ## P, "uc_is_" #P " (c)"); \ + output_predicate ("unicase/" #FN ".h", is_ ## P, "u_casing_property_" #P, "Casing Properties", version); + PROPERTY(cased, cased) + PROPERTY(ignorable, case_ignorable) +#undef PROPERTY +} + +/* ========================================================================= */ + int main (int argc, char * argv[]) { @@ -8302,6 +8359,7 @@ main (int argc, char * argv[]) output_simple_mapping ("unicase/totitle.h", to_title, version); output_simple_mapping ("unicase/tocasefold.h", to_casefold, version); output_casing_rules ("unicase/special-casing-table.gperf", version); + output_casing_properties (version); return 0; } diff --git a/lib/unicase/cased.c b/lib/unicase/cased.c index 1dc19c5001..5fbf4cb745 100644 --- a/lib/unicase/cased.c +++ b/lib/unicase/cased.c @@ -1,5 +1,5 @@ /* Test whether a Unicode character is cased. - Copyright (C) 2009 Free Software Foundation, Inc. + Copyright (C) 2002, 2006-2007, 2009 Free Software Foundation, Inc. Written by Bruno Haible , 2009. This program is free software: you can redistribute it and/or modify it @@ -20,16 +20,34 @@ /* Specification. */ #include "caseprop.h" +/* Quoting the Unicode standard: + Definition: A character is defined to be "cased" if it has the Lowercase + or Uppercase property or has a General_Category value of + Titlecase_Letter. */ + +#if 0 + #include "unictype.h" bool uc_is_cased (ucs4_t uc) { - /* Quoting the Unicode standard: - Definition: A character is defined to be "cased" if it has the Lowercase - or Uppercase property or has a General_Category value of - Titlecase_Letter. */ return (uc_is_property_lowercase (uc) || uc_is_property_uppercase (uc) || uc_is_general_category (uc, UC_TITLECASE_LETTER)); } + +#else + +#include "unictype/bitmap.h" + +/* Define u_casing_property_cased table. */ +#include "cased.h" + +bool +uc_is_cased (ucs4_t uc) +{ + return bitmap_lookup (&u_casing_property_cased, uc); +} + +#endif diff --git a/lib/unicase/cased.h b/lib/unicase/cased.h new file mode 100644 index 0000000000..ea2c2eaa8d --- /dev/null +++ b/lib/unicase/cased.h @@ -0,0 +1,343 @@ +/* DO NOT EDIT! GENERATED AUTOMATICALLY! */ +/* Casing Properties of Unicode characters. */ +/* Generated automatically by gen-ctype.c for Unicode 5.1.0. */ +#define header_0 16 +#define header_2 9 +#define header_3 127 +#define header_4 15 +static const +struct + { + int header[1]; + int level1[2]; + short level2[2 << 7]; + /*unsigned*/ int level3[15 << 4]; + } +u_casing_property_cased = +{ + { 2 }, + { + 3 * sizeof (int) / sizeof (short) + 0, + 3 * sizeof (int) / sizeof (short) + 128 + }, + { + 3 + 256 * sizeof (short) / sizeof (int) + 0, + 3 + 256 * sizeof (short) / sizeof (int) + 16, + 3 + 256 * sizeof (short) / sizeof (int) + 32, + -1, + -1, + -1, + -1, + -1, + 3 + 256 * sizeof (short) / sizeof (int) + 48, + -1, + -1, + -1, + -1, + -1, + 3 + 256 * sizeof (short) / sizeof (int) + 64, + 3 + 256 * sizeof (short) / sizeof (int) + 80, + 3 + 256 * sizeof (short) / sizeof (int) + 96, + -1, + 3 + 256 * sizeof (short) / sizeof (int) + 112, + -1, + -1, + -1, + 3 + 256 * sizeof (short) / sizeof (int) + 128, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + 3 + 256 * sizeof (short) / sizeof (int) + 144, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + 3 + 256 * sizeof (short) / sizeof (int) + 160, + -1, + 3 + 256 * sizeof (short) / sizeof (int) + 176, + -1, + -1, + 3 + 256 * sizeof (short) / sizeof (int) + 192, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + 3 + 256 * sizeof (short) / sizeof (int) + 208, + 3 + 256 * sizeof (short) / sizeof (int) + 224, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1 + }, + { + 0x00000000, 0x00000000, 0x07FFFFFE, 0x07FFFFFE, + 0x00000000, 0x04200400, 0xFF7FFFFF, 0xFF7FFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xF7FFFFFF, 0xFFFFFFF0, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFEFFFFF, 0x01FFFFFF, 0x00000003, 0x0000001F, + 0x00000000, 0x00000000, 0x00000020, 0x3CCF0000, + 0xFFFFD740, 0xFFFFFFFB, 0xFFFFFFFF, 0xFFBFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFC03, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFE000F, 0x007FFFFF, 0xFFFFFFFE, + 0x000000FF, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0xFFFFFFFF, 0x0000003F, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x3F3FFFFF, 0xFFFFFFFF, 0xAAFF3F3F, 0x3FFFFFFF, + 0xFFFFFFFF, 0x5FDFFFFF, 0x0FCF1FDC, 0x1FDC1FFF, + 0x00000000, 0x00000000, 0x00000000, 0x80020000, + 0x001F0000, 0x00000000, 0x00000000, 0x00000000, + 0x3E2FFC84, 0xF21FBD50, 0x000043E0, 0xFFFFFFFF, + 0x00000018, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0xFFC00000, 0xFFFFFFFF, 0x000003FF, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xFFFFFFFF, 0xFFFF7FFF, 0x7FFFFFFF, 0x3FFEFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000001F, + 0xFFFFFFFF, 0x0000003F, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0xFFFFFFFF, 0x00003FFC, + 0x00FFFFFF, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0xFFFFFFFC, 0xFFFFFFFF, 0xFFFFFFFF, + 0x000018FF, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00F8007F, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x07FFFFFE, 0x07FFFFFE, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0x0000FFFF, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFDFFFFF, 0xFFFFFFFF, + 0xDFFFFFFF, 0xEBFFDE64, 0xFFFFFFEF, 0xFFFFFFFF, + 0xDFDFE7BF, 0x7BFFFFFF, 0xFFFDFC5F, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFF3F, 0xF7FFFFFD, 0xF7FFFFFF, + 0xFFDFFFFF, 0xFFDFFFFF, 0xFFFF7FFF, 0xFFFF7FFF, + 0xFFFFFDFF, 0xFFFFFDFF, 0x00000FF7, 0x00000000 + } +}; diff --git a/lib/unicase/ignorable.c b/lib/unicase/ignorable.c index 609c3d394f..84b2634ccb 100644 --- a/lib/unicase/ignorable.c +++ b/lib/unicase/ignorable.c @@ -1,5 +1,5 @@ /* Test whether a Unicode character is case-ignorable. - Copyright (C) 2009 Free Software Foundation, Inc. + Copyright (C) 2002, 2006-2007, 2009 Free Software Foundation, Inc. Written by Bruno Haible , 2009. This program is free software: you can redistribute it and/or modify it @@ -20,21 +20,30 @@ /* Specification. */ #include "caseprop.h" +/* Quoting the Unicode standard: + Definition: A character is defined to be "case-ignorable" if it has the + value MidLetter {or the value MidNumLet} for the Word_Break property or + its General_Category is one of Nonspacing_Mark (Mn), Enclosing_Mark (Me), + Format (Cf), Modifier_Letter (Lm), or Modifier_Symbol (Sk). + The text marked in braces was added in Unicode 5.1.0, see + section "Update of + Definition of case-ignorable". */ +/* Since this predicate is only used for the "Before C" and "After C" + conditions of FINAL_SIGMA, we exclude the "cased" characters here. + This simplifies the evaluation of the regular expressions + \p{cased} (\p{case-ignorable})* C + and + C (\p{case-ignorable})* \p{cased} + */ + +#if 0 + #include "unictype.h" #include "uniwbrk.h" bool uc_is_case_ignorable (ucs4_t uc) { - /* Quoting the Unicode standard: - Definition: A character is defined to be "case-ignorable" if it has the - value MidLetter {or the value MidNumLet} for the Word_Break property or - its General_Category is one of Nonspacing_Mark (Mn), Enclosing_Mark (Me), - Format (Cf), Modifier_Letter (Lm), or Modifier_Symbol (Sk). - The text marked in braces was added in Unicode 5.1.0, see - section "Update of - Definition of case-ignorable". */ - int wbp = uc_wordbreak_property (uc); return (wbp == WBP_MIDLETTER || wbp == WBP_MIDNUMLET @@ -42,5 +51,21 @@ uc_is_case_ignorable (ucs4_t uc) | UC_CATEGORY_MASK_Me | UC_CATEGORY_MASK_Cf | UC_CATEGORY_MASK_Lm - | UC_CATEGORY_MASK_Sk)); + | UC_CATEGORY_MASK_Sk)) + && !uc_is_cased (uc); +} + +#else + +#include "unictype/bitmap.h" + +/* Define u_casing_property_case_ignorable table. */ +#include "ignorable.h" + +bool +uc_is_case_ignorable (ucs4_t uc) +{ + return bitmap_lookup (&u_casing_property_case_ignorable, uc); } + +#endif diff --git a/lib/unicase/ignorable.h b/lib/unicase/ignorable.h new file mode 100644 index 0000000000..3b29290f82 --- /dev/null +++ b/lib/unicase/ignorable.h @@ -0,0 +1,544 @@ +/* DO NOT EDIT! GENERATED AUTOMATICALLY! */ +/* Casing Properties of Unicode characters. */ +/* Generated automatically by gen-ctype.c for Unicode 5.1.0. */ +#define header_0 16 +#define header_2 9 +#define header_3 127 +#define header_4 15 +static const +struct + { + int header[1]; + int level1[15]; + short level2[3 << 7]; + /*unsigned*/ int level3[30 << 4]; + } +u_casing_property_case_ignorable = +{ + { 15 }, + { + 16 * sizeof (int) / sizeof (short) + 0, + 16 * sizeof (int) / sizeof (short) + 128, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + 16 * sizeof (int) / sizeof (short) + 256 + }, + { + 16 + 384 * sizeof (short) / sizeof (int) + 0, + 16 + 384 * sizeof (short) / sizeof (int) + 16, + 16 + 384 * sizeof (short) / sizeof (int) + 32, + 16 + 384 * sizeof (short) / sizeof (int) + 48, + 16 + 384 * sizeof (short) / sizeof (int) + 64, + 16 + 384 * sizeof (short) / sizeof (int) + 80, + 16 + 384 * sizeof (short) / sizeof (int) + 96, + 16 + 384 * sizeof (short) / sizeof (int) + 112, + 16 + 384 * sizeof (short) / sizeof (int) + 128, + 16 + 384 * sizeof (short) / sizeof (int) + 144, + -1, + 16 + 384 * sizeof (short) / sizeof (int) + 160, + 16 + 384 * sizeof (short) / sizeof (int) + 176, + 16 + 384 * sizeof (short) / sizeof (int) + 192, + 16 + 384 * sizeof (short) / sizeof (int) + 208, + 16 + 384 * sizeof (short) / sizeof (int) + 224, + 16 + 384 * sizeof (short) / sizeof (int) + 240, + -1, + -1, + -1, + -1, + -1, + 16 + 384 * sizeof (short) / sizeof (int) + 256, + 16 + 384 * sizeof (short) / sizeof (int) + 272, + 16 + 384 * sizeof (short) / sizeof (int) + 288, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + 16 + 384 * sizeof (short) / sizeof (int) + 304, + -1, + -1, + 16 + 384 * sizeof (short) / sizeof (int) + 320, + 16 + 384 * sizeof (short) / sizeof (int) + 336, + 16 + 384 * sizeof (short) / sizeof (int) + 352, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + 16 + 384 * sizeof (short) / sizeof (int) + 368, + -1, + 16 + 384 * sizeof (short) / sizeof (int) + 384, + 16 + 384 * sizeof (short) / sizeof (int) + 400, + -1, + -1, + -1, + -1, + 16 + 384 * sizeof (short) / sizeof (int) + 416, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + 16 + 384 * sizeof (short) / sizeof (int) + 432, + 16 + 384 * sizeof (short) / sizeof (int) + 448, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + 16 + 384 * sizeof (short) / sizeof (int) + 464, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1 + }, + { + 0x00000000, 0x04004080, 0x40000000, 0x00000001, + 0x00000000, 0x0190A100, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0xFE000000, 0xFFFFFFFC, 0xFFFFFFE0, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFDF, 0x0030FFFF, + 0x000000B0, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x000003F8, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x02000000, 0x00000000, + 0xFFFE0000, 0xBFFFFFFF, 0x000000B6, 0x00100000, + 0x07FF000F, 0x00000000, 0x7FFFF801, 0x00010000, + 0x00000000, 0x00000000, 0xFFC00000, 0x00003DFF, + 0x00028000, 0xFFFF0000, 0x000007FF, 0x00000000, + 0x00000000, 0x0001FFC0, 0x00000000, 0x043FF800, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000006, 0x10000000, 0x001E21FE, 0x0002000C, + 0x00000002, 0x10000000, 0x0000201E, 0x0000000C, + 0x00000006, 0x10000000, 0x00023986, 0x00230000, + 0x00000006, 0x10000000, 0x000021BE, 0x0000000C, + 0x00000002, 0x90000000, 0x0040201E, 0x0000000C, + 0x00000004, 0x00000000, 0x00002001, 0x00000000, + 0x00000000, 0xC0000000, 0x00603DC1, 0x0000000C, + 0x00000000, 0x90000000, 0x00003040, 0x0000000C, + 0x00000000, 0x00000000, 0x0000201E, 0x0000000C, + 0x00000000, 0x00000000, 0x005C0400, 0x00000000, + 0x00000000, 0x07F20000, 0x00007FC0, 0x00000000, + 0x00000000, 0x1BF20000, 0x00003F40, 0x00000000, + 0x03000000, 0x02A00000, 0x00000000, 0x7FFE0000, + 0xFEFF00DF, 0x1FFFFFFF, 0x00000040, 0x00000000, + 0x00000000, 0x66FDE000, 0xC3000000, 0x001E0001, + 0x00002064, 0x00000000, 0x00000000, 0x10000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x80000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x001C0000, 0x001C0000, 0x000C0000, 0x000C0000, + 0x00000000, 0x3FB00000, 0x208FFE40, 0x00000000, + 0x00003800, 0x00000000, 0x00000008, 0x00000000, + 0x00000000, 0x00000200, 0x00000000, 0x00000000, + 0x00000000, 0x0E040187, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x01800000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x0000000F, 0x17D00000, 0x00000004, 0x000FF800, + 0x00000003, 0x0000033C, 0x00000000, 0x00000000, + 0x00000000, 0x00CFF000, 0x00000000, 0x3F000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0xFFFFFFFF, 0xC000007F, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0xA0000000, 0xE000E003, 0x6000E000, + 0x0300F800, 0x00007C90, 0x00000000, 0x0000FC1F, + 0x00000000, 0x00000000, 0xFFFF0000, 0x0001FFFF, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00008000, + 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF, + 0x00000000, 0x00008000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000020, 0x083EFC00, 0x00000000, 0x00000000, + 0x7E000000, 0x00000000, 0x00000000, 0x70000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00200000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00001000, 0x00000000, 0x00000000, 0xB0078000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xFFFFFFFF, 0x00000003, 0x00000000, 0x00000000, + 0x00000700, 0x00000000, 0x00000000, 0x00000000, + 0x00000844, 0x00000060, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000010, 0x00000000, + 0x00000000, 0x00003FC0, 0x0003FF80, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00667E00, 0x00001008, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x40000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x0008FFFF, 0x0000007F, 0x00240000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x80000000, + 0x04004080, 0x40000000, 0x00000001, 0x00010000, + 0xC0000000, 0x00000000, 0x00000000, 0x0E000008, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x20000000, + 0x0000F06E, 0x87000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0xFFF80380, + 0x00000FE7, 0x00003C00, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x0000001C, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000002, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000FFFF + } +}; diff --git a/modules/unicase/cased b/modules/unicase/cased index e679235b9c..f2b1843647 100644 --- a/modules/unicase/cased +++ b/modules/unicase/cased @@ -4,14 +4,13 @@ Test whether a Unicode character is "cased". Files: lib/unicase/caseprop.h lib/unicase/cased.c +lib/unicase/cased.h +lib/unictype/bitmap.h Depends-on: -unictype/category-test -unictype/category-Lt -unictype/property-lowercase -unictype/property-uppercase unitypes stdbool +inline configure.ac: diff --git a/modules/unicase/ignorable b/modules/unicase/ignorable index 7ea017fd64..5d316823d1 100644 --- a/modules/unicase/ignorable +++ b/modules/unicase/ignorable @@ -4,12 +4,13 @@ Test whether a Unicode character is "case-ignorable". Files: lib/unicase/caseprop.h lib/unicase/ignorable.c +lib/unicase/ignorable.h +lib/unictype/bitmap.h Depends-on: -uniwbrk/wordbreak-property -unictype/category-of unitypes stdbool +inline configure.ac: diff --git a/tests/unicase/test-u16-tolower.c b/tests/unicase/test-u16-tolower.c index 406e45a70c..5ec764c4ad 100644 --- a/tests/unicase/test-u16-tolower.c +++ b/tests/unicase/test-u16-tolower.c @@ -185,6 +185,67 @@ main () }; ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); } + { /* "Σ" -> "σ" */ + static const uint16_t input[] = { 0x03A3 }; + static const uint16_t casemapped[] = { 0x03C3 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ" -> "ας" */ + static const uint16_t input[] = { 0x0391, 0x03A3 }; + static const uint16_t casemapped[] = { 0x03B1, 0x03C2 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + /* It's a final sigma only if not followed by a case-ignorable sequence and + then a cased letter. Note that U+0345 and U+037A are simultaneously + case-ignorable and cased (which is a bit paradoxical). */ + { /* "ΑΣΑ" -> "ασα" */ + static const uint16_t input[] = { 0x0391, 0x03A3, 0x0391 }; + static const uint16_t casemapped[] = { 0x03B1, 0x03C3, 0x03B1 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ:" -> "ας:" */ + static const uint16_t input[] = { 0x0391, 0x03A3, 0x003A }; + static const uint16_t casemapped[] = { 0x03B1, 0x03C2, 0x003A }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ:Α" -> "ασ:α" */ + static const uint16_t input[] = { 0x0391, 0x03A3, 0x003A, 0x0391 }; + static const uint16_t casemapped[] = { 0x03B1, 0x03C3, 0x003A, 0x03B1 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ:ͺ" -> "ασ:ͺ" */ + static const uint16_t input[] = { 0x0391, 0x03A3, 0x003A, 0x037A }; + static const uint16_t casemapped[] = { 0x03B1, 0x03C3, 0x003A, 0x037A }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ:ͺ " -> "ασ:ͺ " */ + static const uint16_t input[] = { 0x0391, 0x03A3, 0x003A, 0x037A, 0x0020 }; + static const uint16_t casemapped[] = { 0x03B1, 0x03C3, 0x003A, 0x037A, 0x0020 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + /* It's a final sigma only if preceded by a case-ignorable sequence and + a cased letter before it. Note that U+0345 and U+037A are simultaneously + case-ignorable and cased (which is a bit paradoxical). */ + { /* ":Σ" -> ":σ" */ + static const uint16_t input[] = { 0x003A, 0x03A3 }; + static const uint16_t casemapped[] = { 0x003A, 0x03C3 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "Α:Σ" -> "α:ς" */ + static const uint16_t input[] = { 0x0391, 0x003A, 0x03A3 }; + static const uint16_t casemapped[] = { 0x03B1, 0x003A, 0x03C2 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ͺ:Σ" -> "ͺ:ς" */ + static const uint16_t input[] = { 0x037A, 0x003A, 0x03A3 }; + static const uint16_t casemapped[] = { 0x037A, 0x003A, 0x03C2 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* " ͺ:Σ" -> " ͺ:ς" */ + static const uint16_t input[] = { 0x0020, 0x037A, 0x003A, 0x03A3 }; + static const uint16_t casemapped[] = { 0x0020, 0x037A, 0x003A, 0x03C2 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } return 0; } diff --git a/tests/unicase/test-u32-tolower.c b/tests/unicase/test-u32-tolower.c index a4318787b2..7f348da484 100644 --- a/tests/unicase/test-u32-tolower.c +++ b/tests/unicase/test-u32-tolower.c @@ -185,6 +185,67 @@ main () }; ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); } + { /* "Σ" -> "σ" */ + static const uint32_t input[] = { 0x03A3 }; + static const uint32_t casemapped[] = { 0x03C3 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ" -> "ας" */ + static const uint32_t input[] = { 0x0391, 0x03A3 }; + static const uint32_t casemapped[] = { 0x03B1, 0x03C2 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + /* It's a final sigma only if not followed by a case-ignorable sequence and + then a cased letter. Note that U+0345 and U+037A are simultaneously + case-ignorable and cased (which is a bit paradoxical). */ + { /* "ΑΣΑ" -> "ασα" */ + static const uint32_t input[] = { 0x0391, 0x03A3, 0x0391 }; + static const uint32_t casemapped[] = { 0x03B1, 0x03C3, 0x03B1 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ:" -> "ας:" */ + static const uint32_t input[] = { 0x0391, 0x03A3, 0x003A }; + static const uint32_t casemapped[] = { 0x03B1, 0x03C2, 0x003A }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ:Α" -> "ασ:α" */ + static const uint32_t input[] = { 0x0391, 0x03A3, 0x003A, 0x0391 }; + static const uint32_t casemapped[] = { 0x03B1, 0x03C3, 0x003A, 0x03B1 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ:ͺ" -> "ασ:ͺ" */ + static const uint32_t input[] = { 0x0391, 0x03A3, 0x003A, 0x037A }; + static const uint32_t casemapped[] = { 0x03B1, 0x03C3, 0x003A, 0x037A }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ:ͺ " -> "ασ:ͺ " */ + static const uint32_t input[] = { 0x0391, 0x03A3, 0x003A, 0x037A, 0x0020 }; + static const uint32_t casemapped[] = { 0x03B1, 0x03C3, 0x003A, 0x037A, 0x0020 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + /* It's a final sigma only if preceded by a case-ignorable sequence and + a cased letter before it. Note that U+0345 and U+037A are simultaneously + case-ignorable and cased (which is a bit paradoxical). */ + { /* ":Σ" -> ":σ" */ + static const uint32_t input[] = { 0x003A, 0x03A3 }; + static const uint32_t casemapped[] = { 0x003A, 0x03C3 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "Α:Σ" -> "α:ς" */ + static const uint32_t input[] = { 0x0391, 0x003A, 0x03A3 }; + static const uint32_t casemapped[] = { 0x03B1, 0x003A, 0x03C2 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ͺ:Σ" -> "ͺ:ς" */ + static const uint32_t input[] = { 0x037A, 0x003A, 0x03A3 }; + static const uint32_t casemapped[] = { 0x037A, 0x003A, 0x03C2 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* " ͺ:Σ" -> " ͺ:ς" */ + static const uint32_t input[] = { 0x0020, 0x037A, 0x003A, 0x03A3 }; + static const uint32_t casemapped[] = { 0x0020, 0x037A, 0x003A, 0x03C2 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } return 0; } diff --git a/tests/unicase/test-u8-tolower.c b/tests/unicase/test-u8-tolower.c index 1b43a1b746..6c0a5df610 100644 --- a/tests/unicase/test-u8-tolower.c +++ b/tests/unicase/test-u8-tolower.c @@ -191,6 +191,67 @@ main () }; ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); } + { /* "Σ" -> "σ" */ + static const uint8_t input[] = { 0xCE, 0xA3 }; + static const uint8_t casemapped[] = { 0xCF, 0x83 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ" -> "ας" */ + static const uint8_t input[] = { 0xCE, 0x91, 0xCE, 0xA3 }; + static const uint8_t casemapped[] = { 0xCE, 0xB1, 0xCF, 0x82 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + /* It's a final sigma only if not followed by a case-ignorable sequence and + then a cased letter. Note that U+0345 and U+037A are simultaneously + case-ignorable and cased (which is a bit paradoxical). */ + { /* "ΑΣΑ" -> "ασα" */ + static const uint8_t input[] = { 0xCE, 0x91, 0xCE, 0xA3, 0xCE, 0x91 }; + static const uint8_t casemapped[] = { 0xCE, 0xB1, 0xCF, 0x83, 0xCE, 0xB1 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ:" -> "ας:" */ + static const uint8_t input[] = { 0xCE, 0x91, 0xCE, 0xA3, 0x3A }; + static const uint8_t casemapped[] = { 0xCE, 0xB1, 0xCF, 0x82, 0x3A }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ:Α" -> "ασ:α" */ + static const uint8_t input[] = { 0xCE, 0x91, 0xCE, 0xA3, 0x3A, 0xCE, 0x91 }; + static const uint8_t casemapped[] = { 0xCE, 0xB1, 0xCF, 0x83, 0x3A, 0xCE, 0xB1 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ:ͺ" -> "ασ:ͺ" */ + static const uint8_t input[] = { 0xCE, 0x91, 0xCE, 0xA3, 0x3A, 0xCD, 0xBA }; + static const uint8_t casemapped[] = { 0xCE, 0xB1, 0xCF, 0x83, 0x3A, 0xCD, 0xBA }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ:ͺ " -> "ασ:ͺ " */ + static const uint8_t input[] = { 0xCE, 0x91, 0xCE, 0xA3, 0x3A, 0xCD, 0xBA, 0x20 }; + static const uint8_t casemapped[] = { 0xCE, 0xB1, 0xCF, 0x83, 0x3A, 0xCD, 0xBA, 0x20 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + /* It's a final sigma only if preceded by a case-ignorable sequence and + a cased letter before it. Note that U+0345 and U+037A are simultaneously + case-ignorable and cased (which is a bit paradoxical). */ + { /* ":Σ" -> ":σ" */ + static const uint8_t input[] = { 0x3A, 0xCE, 0xA3 }; + static const uint8_t casemapped[] = { 0x3A, 0xCF, 0x83 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "Α:Σ" -> "α:ς" */ + static const uint8_t input[] = { 0xCE, 0x91, 0x3A, 0xCE, 0xA3 }; + static const uint8_t casemapped[] = { 0xCE, 0xB1, 0x3A, 0xCF, 0x82 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ͺ:Σ" -> "ͺ:ς" */ + static const uint8_t input[] = { 0xCD, 0xBA, 0x3A, 0xCE, 0xA3 }; + static const uint8_t casemapped[] = { 0xCD, 0xBA, 0x3A, 0xCF, 0x82 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* " ͺ:Σ" -> " ͺ:ς" */ + static const uint8_t input[] = { 0x20, 0xCD, 0xBA, 0x3A, 0xCE, 0xA3 }; + static const uint8_t casemapped[] = { 0x20, 0xCD, 0xBA, 0x3A, 0xCF, 0x82 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } return 0; } -- 2.30.2