+2009-02-22 Bruno Haible <bruno@clisp.org>
+
+ Implement new clarified decomposition of Hangul syllables.
+ * lib/uninorm/decomposition.c (uc_decomposition): For Hangul syllables
+ of type LTV, return only a pairwise decomposition.
+ * lib/uninorm/canonical-decomposition.c (uc_canonical_decomposition):
+ Likewise.
+ * tests/uninorm/test-decomposition.c (main): Updated expected result.
+ * tests/uninorm/test-canonical-decomposition.c (main): Likewise.
+ * tests/uninorm/test-compat-decomposition.c (main): Likewise.
+
2009-02-22 Bruno Haible <bruno@clisp.org>
* lib/uninorm/u-normalize-internal.h (FUNC): At the end, handle
{
if (uc >= 0xAC00 && uc < 0xD7A4)
{
- /* Hangul syllable. See Unicode standard, chapter 3,
- section "Hangul Syllable Decomposition". */
- unsigned int t, v, l;
+ /* Hangul syllable. See Unicode standard, chapter 3, section
+ "Hangul Syllable Decomposition", See also the clarification at
+ <http://www.unicode.org/versions/Unicode5.1.0/>, section
+ "Clarification of Hangul Jamo Handling". */
+ unsigned int t;
uc -= 0xAC00;
t = uc % 28;
- uc = uc / 28;
- v = uc % 21;
- l = uc / 21;
- decomposition[0] = 0x1100 + l;
- decomposition[1] = 0x1161 + v;
if (t == 0)
- return 2;
+ {
+ unsigned int v, l;
+
+ uc = uc / 28;
+ v = uc % 21;
+ l = uc / 21;
+
+ decomposition[0] = 0x1100 + l;
+ decomposition[1] = 0x1161 + v;
+ return 2;
+ }
else
{
+#if 1 /* Return the pairwise decomposition, not the full decomposition. */
+ decomposition[0] = 0xAC00 + uc - t; /* = 0xAC00 + (l * 21 + v) * 28; */
+ decomposition[1] = 0x11A7 + t;
+ return 2;
+#else
+ unsigned int v, l;
+
+ uc = uc / 28;
+ v = uc % 21;
+ l = uc / 21;
+
+ decomposition[0] = 0x1100 + l;
+ decomposition[1] = 0x1161 + v;
decomposition[2] = 0x11A7 + t;
return 3;
+#endif
}
}
else if (uc < 0x110000)
{
if (uc >= 0xAC00 && uc < 0xD7A4)
{
- /* Hangul syllable. See Unicode standard, chapter 3,
- section "Hangul Syllable Decomposition". */
- unsigned int t, v, l;
+ /* Hangul syllable. See Unicode standard, chapter 3, section
+ "Hangul Syllable Decomposition", See also the clarification at
+ <http://www.unicode.org/versions/Unicode5.1.0/>, section
+ "Clarification of Hangul Jamo Handling". */
+ unsigned int t;
uc -= 0xAC00;
t = uc % 28;
- uc = uc / 28;
- v = uc % 21;
- l = uc / 21;
*decomp_tag = UC_DECOMP_CANONICAL;
- decomposition[0] = 0x1100 + l;
- decomposition[1] = 0x1161 + v;
if (t == 0)
- return 2;
+ {
+ unsigned int v, l;
+
+ uc = uc / 28;
+ v = uc % 21;
+ l = uc / 21;
+
+ decomposition[0] = 0x1100 + l;
+ decomposition[1] = 0x1161 + v;
+ return 2;
+ }
else
{
+#if 1 /* Return the pairwise decomposition, not the full decomposition. */
+ decomposition[0] = 0xAC00 + uc - t; /* = 0xAC00 + (l * 21 + v) * 28; */
+ decomposition[1] = 0x11A7 + t;
+ return 2;
+#else
+ unsigned int v, l;
+
+ uc = uc / 28;
+ v = uc % 21;
+ l = uc / 21;
+
+ decomposition[0] = 0x1100 + l;
+ decomposition[1] = 0x1161 + v;
decomposition[2] = 0x11A7 + t;
return 3;
+#endif
}
}
else if (uc < 0x110000)
/* HANGUL SYLLABLE GEUL */
ret = uc_canonical_decomposition (0xAE00, decomposed);
+ /* See the clarification at <http://www.unicode.org/versions/Unicode5.1.0/>,
+ section "Clarification of Hangul Jamo Handling". */
+#if 1
+ ASSERT (ret == 2);
+ ASSERT (decomposed[0] == 0xADF8);
+ ASSERT (decomposed[1] == 0x11AF);
+#else
ASSERT (ret == 3);
ASSERT (decomposed[0] == 0x1100);
ASSERT (decomposed[1] == 0x1173);
ASSERT (decomposed[2] == 0x11AF);
+#endif
/* HANGUL SYLLABLE GEU */
ret = uc_canonical_decomposition (0xADF8, decomposed);
/* HANGUL SYLLABLE GEUL */
ret = uc_compat_decomposition (0xAE00, decomposed);
+ /* See the clarification at <http://www.unicode.org/versions/Unicode5.1.0/>,
+ section "Clarification of Hangul Jamo Handling". */
+#if 1
+ ASSERT (ret == 2);
+ ASSERT (decomposed[0] == 0xADF8);
+ ASSERT (decomposed[1] == 0x11AF);
+#else
ASSERT (ret == 3);
ASSERT (decomposed[0] == 0x1100);
ASSERT (decomposed[1] == 0x1173);
ASSERT (decomposed[2] == 0x11AF);
+#endif
/* HANGUL SYLLABLE GEU */
ret = uc_compat_decomposition (0xADF8, decomposed);
/* HANGUL SYLLABLE GEUL */
ret = uc_decomposition (0xAE00, &tag, decomposed);
+ /* See the clarification at <http://www.unicode.org/versions/Unicode5.1.0/>,
+ section "Clarification of Hangul Jamo Handling". */
+#if 1
+ ASSERT (ret == 2);
+ ASSERT (tag == UC_DECOMP_CANONICAL);
+ ASSERT (decomposed[0] == 0xADF8);
+ ASSERT (decomposed[1] == 0x11AF);
+#else
ASSERT (ret == 3);
ASSERT (tag == UC_DECOMP_CANONICAL);
ASSERT (decomposed[0] == 0x1100);
ASSERT (decomposed[1] == 0x1173);
ASSERT (decomposed[2] == 0x11AF);
+#endif
/* HANGUL SYLLABLE GEU */
ret = uc_decomposition (0xADF8, &tag, decomposed);