From 5ccf18f30a760b348cd07ebac61946e694d11462 Mon Sep 17 00:00:00 2001
From: Bruno Haible <bruno@clisp.org>
Date: Sun, 8 Feb 2009 03:02:06 +0100
Subject: [PATCH] Merge gen-ctype and gen-lbrk into gen-uni-tables.

---
 ChangeLog                                     |   12 +
 .../gen-ctype.c => gen-uni-tables.c}          | 1315 ++++++++++++++-
 lib/unilbrk/gen-lbrk.c                        | 1497 -----------------
 .../{unictype/gen-ctype => gen-uni-tables}    |    4 +-
 modules/unilbrk/gen-lbrk                      |   23 -
 5 files changed, 1287 insertions(+), 1564 deletions(-)
 rename lib/{unictype/gen-ctype.c => gen-uni-tables.c} (75%)
 delete mode 100644 lib/unilbrk/gen-lbrk.c
 rename modules/{unictype/gen-ctype => gen-uni-tables} (70%)
 delete mode 100644 modules/unilbrk/gen-lbrk

diff --git a/ChangeLog b/ChangeLog
index 6e79309c54..0232e3aeef 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2009-02-07  Bruno Haible  <bruno@clisp.org>
+
+	Merge gen-ctype and gen-lbrk into a single program.
+	* lib/gen-uni-tables.c: New file, incorporating
+	lib/unictype/gen-ctype.c and lib/unilbrk/gen-lbrk.c.
+	Add directory prefixes to the names of the generated files.
+	* lib/unictype/gen-ctype.c: Remove file.
+	* lib/unilbrk/gen-lbrk.c: Remove file.
+	* modules/gen-uni-tables: New file.
+	* modules/unictype/gen-ctype: Remove file.
+	* modules/unilbrk/gen-lbrk: Remove file.
+
 2009-02-07  Bruno Haible  <bruno@clisp.org>
 
 	* lib/unistr.h (u8_strcoll, u16_strcoll, u32_strcoll): New declations.
diff --git a/lib/unictype/gen-ctype.c b/lib/gen-uni-tables.c
similarity index 75%
rename from lib/unictype/gen-ctype.c
rename to lib/gen-uni-tables.c
index c1c43c734c..b1149fd190 100644
--- a/lib/unictype/gen-ctype.c
+++ b/lib/gen-uni-tables.c
@@ -1,6 +1,6 @@
-/* Generate Unicode conforming character classification tables from a
-   UnicodeData file.
-   Copyright (C) 2000-2002, 2007-2009 Free Software Foundation, Inc.
+/* Generate Unicode conforming character classification tables and
+   Line Break Properties tables from a UnicodeData file.
+   Copyright (C) 2000-2002, 2004, 2007-2009 Free Software Foundation, Inc.
    Written by Bruno Haible <bruno@clisp.org>, 2000-2002.
 
    This program is free software: you can redistribute it and/or modify
@@ -17,13 +17,15 @@
    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
 
 /* Usage example:
-     $ gen-ctype /usr/local/share/Unidata/UnicodeData.txt \
-                 /usr/local/share/Unidata/PropList.txt \
-                 /usr/local/share/Unidata/DerivedCoreProperties.txt \
-                 /usr/local/share/Unidata/Scripts.txt \
-                 /usr/local/share/Unidata/Blocks.txt \
-                 /usr/local/share/Unidata/PropList-3.0.1.txt \
-                 5.0.0
+     $ gen-uni-tables /usr/local/share/Unidata/UnicodeData.txt \
+                      /usr/local/share/Unidata/PropList.txt \
+                      /usr/local/share/Unidata/DerivedCoreProperties.txt \
+                      /usr/local/share/Unidata/Scripts.txt \
+                      /usr/local/share/Unidata/Blocks.txt \
+                      /usr/local/share/Unidata/PropList-3.0.1.txt \
+                      /usr/local/share/Unidata/EastAsianWidth.txt \
+                      /usr/local/share/Unidata/LineBreak.txt \
+                      5.0.0
  */
 
 #include <stdbool.h>
@@ -791,9 +793,9 @@ static void
 output_categories (const char *version)
 {
 #define CATEGORY(C) \
-  debug_output_predicate ("categ_" #C ".txt", is_category_ ## C); \
-  output_predicate_test ("test-categ_" #C ".c", is_category_ ## C, "uc_is_general_category (c, UC_CATEGORY_" #C ")"); \
-  output_predicate ("categ_" #C ".h", is_category_ ## C, "u_categ_" #C, "Categories", version);
+  debug_output_predicate ("unictype/categ_" #C ".txt", is_category_ ## C); \
+  output_predicate_test ("../tests/unictype/test-categ_" #C ".c", is_category_ ## C, "uc_is_general_category (c, UC_CATEGORY_" #C ")"); \
+  output_predicate ("unictype/categ_" #C ".h", is_category_ ## C, "u_categ_" #C, "Categories", version);
   CATEGORY (L)
   CATEGORY (Lu)
   CATEGORY (Ll)
@@ -3441,9 +3443,9 @@ static void
 output_properties (const char *version)
 {
 #define PROPERTY(P) \
-  debug_output_predicate ("pr_" #P ".txt", is_property_ ## P); \
-  output_predicate_test ("test-pr_" #P ".c", is_property_ ## P, "uc_is_property_" #P " (c)"); \
-  output_predicate ("pr_" #P ".h", is_property_ ## P, "u_property_" #P, "Properties", version);
+  debug_output_predicate ("unictype/pr_" #P ".txt", is_property_ ## P); \
+  output_predicate_test ("../tests/unictype/test-pr_" #P ".c", is_property_ ## P, "uc_is_property_" #P " (c)"); \
+  output_predicate ("unictype/pr_" #P ".h", is_property_ ## P, "u_property_" #P, "Properties", version);
   PROPERTY(white_space)
   PROPERTY(alphabetic)
   PROPERTY(other_alphabetic)
@@ -3619,7 +3621,7 @@ fill_scripts (const char *scripts_filename)
 static void
 output_scripts (const char *version)
 {
-  const char *filename = "scripts.h";
+  const char *filename = "unictype/scripts.h";
   FILE *stream;
   unsigned int ch, s, i;
   struct script_table t;
@@ -3805,7 +3807,7 @@ output_scripts (const char *version)
 static void
 output_scripts_byname (const char *version)
 {
-  const char *filename = "scripts_byname.gperf";
+  const char *filename = "unictype/scripts_byname.gperf";
   FILE *stream;
   unsigned int s;
 
@@ -3942,7 +3944,7 @@ block_last_index (unsigned int ch)
 static void
 output_blocks (const char *version)
 {
-  const char *filename = "blocks.h";
+  const char *filename = "unictype/blocks.h";
   const unsigned int shift = 8; /* bits to shift away for array access */
   const unsigned int threshold = 0x30000; /* cut-off table here to save space */
   FILE *stream;
@@ -4491,15 +4493,15 @@ static void
 output_ident_properties (const char *version)
 {
 #define PROPERTY(P) \
-  debug_output_predicate ("sy_" #P ".txt", is_ ## P); \
-  output_predicate_test ("test-sy_" #P ".c", is_ ## P, "uc_is_" #P " (c)"); \
-  output_predicate ("sy_" #P ".h", is_ ## P, "u_" #P, "Language syntax properties", version);
+  debug_output_predicate ("unictype/sy_" #P ".txt", is_ ## P); \
+  output_predicate_test ("../tests/unictype/test-sy_" #P ".c", is_ ## P, "uc_is_" #P " (c)"); \
+  output_predicate ("unictype/sy_" #P ".h", is_ ## P, "u_" #P, "Language syntax properties", version);
   PROPERTY(c_whitespace)
   PROPERTY(java_whitespace)
 #undef PROPERTY
 
-  output_ident_category ("sy_c_ident.h", c_ident_category, "u_c_ident", version);
-  output_ident_category ("sy_java_ident.h", java_ident_category, "u_java_ident", version);
+  output_ident_category ("unictype/sy_c_ident.h", c_ident_category, "u_c_ident", version);
+  output_ident_category ("unictype/sy_java_ident.h", java_ident_category, "u_java_ident", version);
 }
 
 /* ========================================================================= */
@@ -4724,9 +4726,9 @@ static void
 output_old_ctype (const char *version)
 {
 #define PROPERTY(P) \
-  debug_output_predicate ("ctype_" #P ".txt", is_ ## P); \
-  output_predicate_test ("test-ctype_" #P ".c", is_ ## P, "uc_is_" #P " (c)"); \
-  output_predicate ("ctype_" #P ".h", is_ ## P, "u_is_" #P, "ISO C <ctype.h> like properties", version);
+  debug_output_predicate ("unictype/ctype_" #P ".txt", is_ ## P); \
+  output_predicate_test ("../tests/unictype/test-ctype_" #P ".c", is_ ## P, "uc_is_" #P " (c)"); \
+  output_predicate ("unictype/ctype_" #P ".h", is_ ## P, "u_is_" #P, "ISO C <ctype.h> like properties", version);
   PROPERTY(alnum)
   PROPERTY(alpha)
   PROPERTY(cntrl)
@@ -5061,6 +5063,1223 @@ output_tables (const char *filename, const char *version)
 
 #endif
 
+/* ========================================================================= */
+
+/* The width property from the EastAsianWidth.txt file.
+   Each is NULL (unassigned) or "N", "A", "H", "W", "F", "Na".  */
+const char * unicode_width[0x110000];
+
+/* Stores in unicode_width[] the width property from the EastAsianWidth.txt
+   file.  */
+static void
+fill_width (const char *width_filename)
+{
+  unsigned int i, j;
+  FILE *stream;
+  char field0[FIELDLEN];
+  char field1[FIELDLEN];
+  char field2[FIELDLEN];
+  int lineno = 0;
+
+  for (i = 0; i < 0x110000; i++)
+    unicode_width[i] = (unicode_attributes[i].name != NULL ? "N" : NULL);
+
+  stream = fopen (width_filename, "r");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "error during fopen of '%s'\n", width_filename);
+      exit (1);
+    }
+
+  for (;;)
+    {
+      int n;
+      int c;
+
+      lineno++;
+      c = getc (stream);
+      if (c == EOF)
+	break;
+      if (c == '#')
+	{
+	  do c = getc (stream); while (c != EOF && c != '\n');
+	  continue;
+	}
+      ungetc (c, stream);
+      n = getfield (stream, field0, ';');
+      n += getfield (stream, field1, ' ');
+      n += getfield (stream, field2, '\n');
+      if (n == 0)
+	break;
+      if (n != 3)
+	{
+	  fprintf (stderr, "short line in '%s':%d\n", width_filename, lineno);
+	  exit (1);
+	}
+      i = strtoul (field0, NULL, 16);
+      if (strstr (field0, "..") != NULL)
+	{
+	  /* Deal with a range.  */
+	  j = strtoul (strstr (field0, "..") + 2, NULL, 16);
+	  for (; i <= j; i++)
+	    unicode_width[i] = strdup (field1);
+	}
+      else
+	{
+	  /* Single character line.  */
+	  unicode_width[i] = strdup (field1);
+	}
+    }
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error reading from '%s'\n", width_filename);
+      exit (1);
+    }
+}
+
+/* Line breaking classification.  */
+
+enum
+{
+  /* Values >= 24 are resolved at run time. */
+  LBP_BK = 24, /* mandatory break */
+/*LBP_CR,         carriage return - not used here because it's a DOSism */
+/*LBP_LF,         line feed - not used here because it's a DOSism */
+  LBP_CM = 25, /* attached characters and combining marks */
+/*LBP_NL,         next line - not used here because it's equivalent to LBP_BK */
+/*LBP_SG,         surrogates - not used here because they are not characters */
+  LBP_WJ =  0, /* word joiner */
+  LBP_ZW = 26, /* zero width space */
+  LBP_GL =  1, /* non-breaking (glue) */
+  LBP_SP = 27, /* space */
+  LBP_B2 =  2, /* break opportunity before and after */
+  LBP_BA =  3, /* break opportunity after */
+  LBP_BB =  4, /* break opportunity before */
+  LBP_HY =  5, /* hyphen */
+  LBP_CB = 28, /* contingent break opportunity */
+  LBP_CL =  6, /* closing punctuation */
+  LBP_EX =  7, /* exclamation/interrogation */
+  LBP_IN =  8, /* inseparable */
+  LBP_NS =  9, /* non starter */
+  LBP_OP = 10, /* opening punctuation */
+  LBP_QU = 11, /* ambiguous quotation */
+  LBP_IS = 12, /* infix separator (numeric) */
+  LBP_NU = 13, /* numeric */
+  LBP_PO = 14, /* postfix (numeric) */
+  LBP_PR = 15, /* prefix (numeric) */
+  LBP_SY = 16, /* symbols allowing breaks */
+  LBP_AI = 29, /* ambiguous (alphabetic or ideograph) */
+  LBP_AL = 17, /* ordinary alphabetic and symbol characters */
+  LBP_H2 = 18, /* Hangul LV syllable */
+  LBP_H3 = 19, /* Hangul LVT syllable */
+  LBP_ID = 20, /* ideographic */
+  LBP_JL = 21, /* Hangul L Jamo */
+  LBP_JV = 22, /* Hangul V Jamo */
+  LBP_JT = 23, /* Hangul T Jamo */
+  LBP_SA = 30, /* complex context (South East Asian) */
+  LBP_XX = 31  /* unknown */
+};
+
+/* Returns the line breaking classification for ch, as a bit mask.  */
+static int
+get_lbp (unsigned int ch)
+{
+  int attr = 0;
+
+  if (unicode_attributes[ch].name != NULL)
+    {
+      /* mandatory break */
+      if (ch == 0x000A || ch == 0x000D || ch == 0x0085 /* newline */
+	  || ch == 0x000C /* form feed */
+	  || ch == 0x000B /* line tabulation */
+	  || ch == 0x2028 /* LINE SEPARATOR */
+	  || ch == 0x2029 /* PARAGRAPH SEPARATOR */)
+	attr |= 1 << LBP_BK;
+
+      if (ch == 0x2060 /* WORD JOINER */
+	  || ch == 0xFEFF /* ZERO WIDTH NO-BREAK SPACE */)
+	attr |= 1 << LBP_WJ;
+
+      /* zero width space */
+      if (ch == 0x200B /* ZERO WIDTH SPACE */)
+	attr |= 1 << LBP_ZW;
+
+      /* non-breaking (glue) */
+      if (ch == 0x00A0 /* NO-BREAK SPACE */
+	  || ch == 0x202F /* NARROW NO-BREAK SPACE */
+	  || ch == 0x180E /* MONGOLIAN VOWEL SEPARATOR */
+	  || ch == 0x034F /* COMBINING GRAPHEME JOINER */
+	  || ch == 0x2007 /* FIGURE SPACE */
+	  || ch == 0x2011 /* NON-BREAKING HYPHEN */
+	  || ch == 0x0F08 /* TIBETAN MARK SBRUL SHAD */
+	  || ch == 0x0F0C /* TIBETAN MARK DELIMITER TSHEG BSTAR */
+	  || ch == 0x0F12 /* TIBETAN MARK RGYA GRAM SHAD */
+	  || (ch >= 0x035C && ch <= 0x0362) /* COMBINING DOUBLE ... */)
+	attr |= 1 << LBP_GL;
+
+      /* space */
+      if (ch == 0x0020 /* SPACE */)
+	attr |= 1 << LBP_SP;
+
+      /* break opportunity before and after */
+      if (ch == 0x2014 /* EM DASH */)
+	attr |= 1 << LBP_B2;
+
+      /* break opportunity after */
+      if (ch == 0x1680 /* OGHAM SPACE MARK */
+	  || ch == 0x2000 /* EN QUAD */
+	  || ch == 0x2001 /* EM QUAD */
+	  || ch == 0x2002 /* EN SPACE */
+	  || ch == 0x2003 /* EM SPACE */
+	  || ch == 0x2004 /* THREE-PER-EM SPACE */
+	  || ch == 0x2005 /* FOUR-PER-EM SPACE */
+	  || ch == 0x2006 /* SIX-PER-EM SPACE */
+	  || ch == 0x2008 /* PUNCTUATION SPACE */
+	  || ch == 0x2009 /* THIN SPACE */
+	  || ch == 0x200A /* HAIR SPACE */
+	  || ch == 0x205F /* MEDIUM MATHEMATICAL SPACE */
+	  || ch == 0x0009 /* tab */
+	  || ch == 0x00AD /* SOFT HYPHEN */
+	  || ch == 0x058A /* ARMENIAN HYPHEN */
+	  || ch == 0x2010 /* HYPHEN */
+	  || ch == 0x2012 /* FIGURE DASH */
+	  || ch == 0x2013 /* EN DASH */
+	  || ch == 0x05BE /* HEBREW PUNCTUATION MAQAF */
+	  || ch == 0x0F0B /* TIBETAN MARK INTERSYLLABIC TSHEG */
+	  || ch == 0x1361 /* ETHIOPIC WORDSPACE */
+	  || ch == 0x17D8 /* KHMER SIGN BEYYAL */
+	  || ch == 0x17DA /* KHMER SIGN KOOMUUT */
+	  || ch == 0x2027 /* HYPHENATION POINT */
+	  || ch == 0x007C /* VERTICAL LINE */
+	  || ch == 0x16EB /* RUNIC SINGLE PUNCTUATION */
+	  || ch == 0x16EC /* RUNIC MULTIPLE PUNCTUATION */
+	  || ch == 0x16ED /* RUNIC CROSS PUNCTUATION */
+	  || ch == 0x2056 /* THREE DOT PUNCTUATION */
+	  || ch == 0x2058 /* FOUR DOT PUNCTUATION */
+	  || ch == 0x2059 /* FIVE DOT PUNCTUATION */
+	  || ch == 0x205A /* TWO DOT PUNCTUATION */
+	  || ch == 0x205B /* FOUR DOT MARK */
+	  || ch == 0x205D /* TRICOLON */
+	  || ch == 0x205E /* VERTICAL FOUR DOTS */
+	  || ch == 0x2E19 /* PALM BRANCH */
+	  || ch == 0x2E2A /* TWO DOTS OVER ONE DOT PUNCTUATION */
+	  || ch == 0x2E2B /* ONE DOT OVER TWO DOTS PUNCTUATION */
+	  || ch == 0x2E2C /* SQUARED FOUR DOT PUNCTUATION */
+	  || ch == 0x2E2D /* FIVE DOT PUNCTUATION */
+	  || ch == 0x2E30 /* RING POINT */
+	  || ch == 0x10100 /* AEGEAN WORD SEPARATOR LINE */
+	  || ch == 0x10101 /* AEGEAN WORD SEPARATOR DOT */
+	  || ch == 0x10102 /* AEGEAN CHECK MARK */
+	  || ch == 0x1039F /* UGARITIC WORD DIVIDER */
+	  || ch == 0x103D0 /* OLD PERSIAN WORD DIVIDER */
+	  || ch == 0x1091F /* PHOENICIAN WORD SEPARATOR */
+	  || ch == 0x12470 /* CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER */
+	  || ch == 0x0964 /* DEVANAGARI DANDA */
+	  || ch == 0x0965 /* DEVANAGARI DOUBLE DANDA */
+	  || ch == 0x0E5A /* THAI CHARACTER ANGKHANKHU */
+	  || ch == 0x0E5B /* THAI CHARACTER KHOMUT */
+	  || ch == 0x104A /* MYANMAR SIGN LITTLE SECTION */
+	  || ch == 0x104B /* MYANMAR SIGN SECTION */
+	  || ch == 0x1735 /* PHILIPPINE SINGLE PUNCTUATION */
+	  || ch == 0x1736 /* PHILIPPINE DOUBLE PUNCTUATION */
+	  || ch == 0x17D4 /* KHMER SIGN KHAN */
+	  || ch == 0x17D5 /* KHMER SIGN BARIYOOSAN */
+	  || ch == 0x1B5E /* BALINESE CARIK SIKI */
+	  || ch == 0x1B5F /* BALINESE CARIK PAREREN */
+	  || ch == 0xA8CE /* SAURASHTRA DANDA */
+	  || ch == 0xA8CF /* SAURASHTRA DOUBLE DANDA */
+	  || ch == 0xAA5D /* CHAM PUNCTUATION DANDA */
+	  || ch == 0xAA5E /* CHAM PUNCTUATION DOUBLE DANDA */
+	  || ch == 0xAA5F /* CHAM PUNCTUATION TRIPLE DANDA */
+	  || ch == 0x10A56 /* KHAROSHTHI PUNCTUATION DANDA */
+	  || ch == 0x10A57 /* KHAROSHTHI PUNCTUATION DOUBLE DANDA */
+	  || ch == 0x0F34 /* TIBETAN MARK BSDUS RTAGS */
+	  || ch == 0x0F7F /* TIBETAN SIGN RNAM BCAD */
+	  || ch == 0x0F85 /* TIBETAN MARK PALUTA */
+	  || ch == 0x0FBE /* TIBETAN KU RU KHA */
+	  || ch == 0x0FBF /* TIBETAN KU RU KHA BZHI MIG CAN */
+	  || ch == 0x0FD2 /* TIBETAN MARK NYIS TSHEG */
+#if !REVISION_22
+	  || ch == 0x1802 /* MONGOLIAN COMMA */
+	  || ch == 0x1803 /* MONGOLIAN FULL STOP */
+#endif
+	  || ch == 0x1804 /* MONGOLIAN COLON */
+	  || ch == 0x1805 /* MONGOLIAN FOUR DOTS */
+#if !REVISION_22
+	  || ch == 0x1808 /* MONGOLIAN MANCHU COMMA */
+	  || ch == 0x1809 /* MONGOLIAN MANCHU FULL STOP */
+#endif
+	  || ch == 0x1B5A /* BALINESE PANTI */
+	  || ch == 0x1B5B /* BALINESE PAMADA */
+	  || ch == 0x1B5C /* BALINESE WINDU */
+	  || ch == 0x1B5D /* BALINESE CARIK PAMUNGKAH */
+	  || ch == 0x1B60 /* BALINESE PAMENENG */
+	  || ch == 0x1C3B /* LEPCHA PUNCTUATION TA-ROL */
+	  || ch == 0x1C3C /* LEPCHA PUNCTUATION NYET THYOOM TA-ROL */
+	  || ch == 0x1C3D /* LEPCHA PUNCTUATION CER-WA */
+	  || ch == 0x1C3E /* LEPCHA PUNCTUATION TSHOOK CER-WA */
+	  || ch == 0x1C3F /* LEPCHA PUNCTUATION TSHOOK */
+	  || ch == 0x1C7E /* OL CHIKI PUNCTUATION MUCAAD */
+	  || ch == 0x1C7F /* OL CHIKI PUNCTUATION DOUBLE MUCAAD */
+#if !REVISION_22
+	  || ch == 0x2CF9 /* COPTIC OLD NUBIAN FULL STOP */
+#endif
+	  || ch == 0x2CFA /* COPTIC OLD NUBIAN DIRECT QUESTION MARK */
+	  || ch == 0x2CFB /* COPTIC OLD NUBIAN INDIRECT QUESTION MARK */
+	  || ch == 0x2CFC /* COPTIC OLD NUBIAN VERSE DIVIDER */
+#if !REVISION_22
+	  || ch == 0x2CFE /* COPTIC FULL STOP */
+#endif
+	  || ch == 0x2CFF /* COPTIC MORPHOLOGICAL DIVIDER */
+	  || (ch >= 0x2E0E && ch <= 0x2E15) /* EDITORIAL CORONIS .. UPWARDS ANCORA */
+	  || ch == 0x2E17 /* DOUBLE OBLIQUE HYPHEN */
+	  || ch == 0xA60D /* VAI COMMA */
+	  || ch == 0xA60F /* VAI QUESTION MARK */
+	  || ch == 0xA92E /* KAYAH LI SIGN CWI */
+	  || ch == 0xA92F /* KAYAH LI SIGN SHYA */
+	  || ch == 0x10A50 /* KHAROSHTHI PUNCTUATION DOT */
+	  || ch == 0x10A51 /* KHAROSHTHI PUNCTUATION SMALL CIRCLE */
+	  || ch == 0x10A52 /* KHAROSHTHI PUNCTUATION CIRCLE */
+	  || ch == 0x10A53 /* KHAROSHTHI PUNCTUATION CRESCENT BAR */
+	  || ch == 0x10A54 /* KHAROSHTHI PUNCTUATION MANGALAM */
+	  || ch == 0x10A55 /* KHAROSHTHI PUNCTUATION LOTUS */
+	  /* Extra characters for compatibility with Unicode LineBreak.txt.  */
+#if !REVISION_22
+	  || ch == 0x1A1E /* BUGINESE PALLAWA */
+#endif
+	  || ch == 0x12471 /* CUNEIFORM PUNCTUATION SIGN VERTICAL COLON */
+	  || ch == 0x12472 /* CUNEIFORM PUNCTUATION SIGN DIAGONAL COLON */
+	  || ch == 0x12473 /* CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON */)
+	attr |= 1 << LBP_BA;
+
+      /* break opportunity before */
+      if (ch == 0x00B4 /* ACUTE ACCENT */
+#if REVISION_22
+	  || ch == 0x1FFD /* GREEK OXIA */
+	  || ch == 0x02DF /* MODIFIER LETTER CROSS ACCENT */
+#endif
+	  || ch == 0x02C8 /* MODIFIER LETTER VERTICAL LINE */
+	  || ch == 0x02CC /* MODIFIER LETTER LOW VERTICAL LINE */
+	  || ch == 0x0F01 /* TIBETAN MARK GTER YIG MGO TRUNCATED A */
+	  || ch == 0x0F02 /* TIBETAN MARK GTER YIG MGO -UM RNAM BCAD MA */
+	  || ch == 0x0F03 /* TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA */
+	  || ch == 0x0F04 /* TIBETAN MARK INITIAL YIG MGO MDUN MA */
+	  || ch == 0x0F06 /* TIBETAN MARK CARET YIG MGO PHUR SHAD MA */
+	  || ch == 0x0F07 /* TIBETAN MARK YIG MGO TSHEG SHAD MA */
+	  || ch == 0x0F09 /* TIBETAN MARK BSKUR YIG MGO */
+	  || ch == 0x0F0A /* TIBETAN MARK BKA- SHOG YIG MGO */
+	  || ch == 0x0FD0 /* TIBETAN MARK BSKA- SHOG GI MGO RGYAN */
+	  || ch == 0x0FD1 /* TIBETAN MARK MNYAM YIG GI MGO RGYAN */
+	  || ch == 0x0FD3 /* TIBETAN MARK INITIAL BRDA RNYING YIG MGO MDUN MA */
+	  || ch == 0xA874 /* PHAGS-PA SINGLE HEAD MARK */
+	  || ch == 0xA875 /* PHAGS-PA DOUBLE HEAD MARK */
+	  || ch == 0x1806 /* MONGOLIAN TODO SOFT HYPHEN */)
+	attr |= 1 << LBP_BB;
+
+      /* hyphen */
+      if (ch == 0x002D /* HYPHEN-MINUS */)
+	attr |= 1 << LBP_HY;
+
+      /* contingent break opportunity */
+      if (ch == 0xFFFC /* OBJECT REPLACEMENT CHARACTER */)
+	attr |= 1 << LBP_CB;
+
+      /* closing punctuation */
+      if ((unicode_attributes[ch].category[0] == 'P'
+	   && unicode_attributes[ch].category[1] == 'e')
+	  || ch == 0x3001 /* IDEOGRAPHIC COMMA */
+	  || ch == 0x3002 /* IDEOGRAPHIC FULL STOP */
+	  || ch == 0xFE11 /* PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC COMMA */
+	  || ch == 0xFE12 /* PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC FULL STOP */
+	  || ch == 0xFE50 /* SMALL COMMA */
+	  || ch == 0xFE52 /* SMALL FULL STOP */
+	  || ch == 0xFF0C /* FULLWIDTH COMMA */
+	  || ch == 0xFF0E /* FULLWIDTH FULL STOP */
+	  || ch == 0xFF61 /* HALFWIDTH IDEOGRAPHIC FULL STOP */
+	  || ch == 0xFF64 /* HALFWIDTH IDEOGRAPHIC COMMA */)
+	attr |= 1 << LBP_CL;
+
+      /* exclamation/interrogation */
+      if (ch == 0x0021 /* EXCLAMATION MARK */
+	  || ch == 0x003F /* QUESTION MARK */
+	  || ch == 0x05C6 /* HEBREW PUNCTUATION NUN HAFUKHA */
+#if !REVISION_22
+	  || ch == 0x060C /* ARABIC COMMA */
+#endif
+	  || ch == 0x061B /* ARABIC SEMICOLON */
+	  || ch == 0x061E /* ARABIC TRIPLE DOT PUNCTUATION MARK */
+	  || ch == 0x061F /* ARABIC QUESTION MARK */
+#if !REVISION_22
+	  || ch == 0x066A /* ARABIC PERCENT SIGN */
+#endif
+	  || ch == 0x06D4 /* ARABIC FULL STOP */
+	  || ch == 0x07F9 /* NKO EXCLAMATION MARK */
+	  || ch == 0x0F0D /* TIBETAN MARK SHAD */
+	  || ch == 0x0F0E /* TIBETAN MARK NYIS SHAD */
+	  || ch == 0x0F0F /* TIBETAN MARK TSHEG SHAD */
+	  || ch == 0x0F10 /* TIBETAN MARK NYIS TSHEG SHAD */
+	  || ch == 0x0F11 /* TIBETAN MARK RIN CHEN SPUNGS SHAD */
+	  || ch == 0x0F14 /* TIBETAN MARK GTER TSHEG */
+#if REVISION_22
+	  || ch == 0x1802 /* MONGOLIAN COMMA */
+	  || ch == 0x1803 /* MONGOLIAN FULL STOP */
+	  || ch == 0x1808 /* MONGOLIAN MANCHU COMMA */
+	  || ch == 0x1809 /* MONGOLIAN MANCHU FULL STOP */
+#endif
+	  || ch == 0x1944 /* LIMBU EXCLAMATION MARK */
+	  || ch == 0x1945 /* LIMBU QUESTION MARK */
+	  || ch == 0x2762 /* HEAVY EXCLAMATION MARK ORNAMENT */
+	  || ch == 0x2763 /* HEAVY HEART EXCLAMATION MARK ORNAMENT */
+#if REVISION_22
+	  || ch == 0x2CF9 /* COPTIC OLD NUBIAN FULL STOP */
+	  || ch == 0x2CFE /* COPTIC FULL STOP */
+#endif
+	  || ch == 0x2E2E /* REVERSED QUESTION MARK */
+	  || ch == 0xA60C /* VAI SYLLABLE LENGTHENER */
+	  || ch == 0xA60E /* VAI FULL STOP */
+	  || ch == 0xA876 /* PHAGS-PA MARK SHAD */
+	  || ch == 0xA877 /* PHAGS-PA MARK DOUBLE SHAD */
+	  || ch == 0xFE15 /* PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK */
+	  || ch == 0xFE16 /* PRESENTATION FORM FOR VERTICAL QUESTION MARK */
+	  || ch == 0xFE56 /* SMALL QUESTION MARK */
+	  || ch == 0xFE57 /* SMALL EXCLAMATION MARK */
+	  || ch == 0xFF01 /* FULLWIDTH EXCLAMATION MARK */
+	  || ch == 0xFF1F /* FULLWIDTH QUESTION MARK */)
+	attr |= 1 << LBP_EX;
+
+      /* inseparable */
+      if (ch == 0x2024 /* ONE DOT LEADER */
+	  || ch == 0x2025 /* TWO DOT LEADER */
+	  || ch == 0x2026 /* HORIZONTAL ELLIPSIS */
+	  || ch == 0xFE19 /* PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS */)
+	attr |= 1 << LBP_IN;
+
+      /* non starter */
+      if (ch == 0x17D6 /* KHMER SIGN CAMNUC PII KUUH */
+	  || ch == 0x203C /* DOUBLE EXCLAMATION MARK */
+	  || ch == 0x203D /* INTERROBANG */
+	  || ch == 0x2047 /* DOUBLE QUESTION MARK */
+	  || ch == 0x2048 /* QUESTION EXCLAMATION MARK */
+	  || ch == 0x2049 /* EXCLAMATION QUESTION MARK */
+	  || ch == 0x3005 /* IDEOGRAPHIC ITERATION MARK */
+	  || ch == 0x301C /* WAVE DASH */
+	  || ch == 0x303C /* MASU MARK */
+	  || ch == 0x303B /* VERTICAL IDEOGRAPHIC ITERATION MARK */
+	  || ch == 0x309B /* KATAKANA-HIRAGANA VOICED SOUND MARK */
+	  || ch == 0x309C /* KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK */
+	  || ch == 0x309D /* HIRAGANA ITERATION MARK */
+	  || ch == 0x309E /* HIRAGANA VOICED ITERATION MARK */
+	  || ch == 0x30A0 /* KATAKANA-HIRAGANA DOUBLE HYPHEN */
+	  || ch == 0x30FB /* KATAKANA MIDDLE DOT */
+	  || ch == 0x30FC /* KATAKANA-HIRAGANA PROLONGED SOUND MARK */
+	  || ch == 0x30FD /* KATAKANA ITERATION MARK */
+	  || ch == 0x30FE /* KATAKANA VOICED ITERATION MARK */
+	  || ch == 0xA015 /* YI SYLLABLE WU */
+	  || ch == 0xFE54 /* SMALL SEMICOLON */
+	  || ch == 0xFE55 /* SMALL COLON */
+	  || ch == 0xFF1A /* FULLWIDTH COLON */
+	  || ch == 0xFF1B /* FULLWIDTH SEMICOLON */
+	  || ch == 0xFF65 /* HALFWIDTH KATAKANA MIDDLE DOT */
+	  || ch == 0xFF70 /* HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK */
+	  || ch == 0xFF9E /* HALFWIDTH KATAKANA VOICED SOUND MARK */
+	  || ch == 0xFF9F /* HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK */
+	  || strstr (unicode_attributes[ch].name, "HIRAGANA LETTER SMALL ") != NULL
+	  || strstr (unicode_attributes[ch].name, "KATAKANA LETTER SMALL ") != NULL)
+	attr |= 1 << LBP_NS;
+
+      /* opening punctuation */
+      if ((unicode_attributes[ch].category[0] == 'P'
+	   && unicode_attributes[ch].category[1] == 's')
+#if REVISION_22
+	  || ch == 0x00A1 /* INVERTED EXCLAMATION MARK */
+	  || ch == 0x00BF /* INVERTED QUESTION MARK */
+#endif
+	  || ch == 0x2E18 /* INVERTED INTERROBANG */)
+	attr |= 1 << LBP_OP;
+
+      /* ambiguous quotation */
+      if ((unicode_attributes[ch].category[0] == 'P'
+	   && (unicode_attributes[ch].category[1] == 'f'
+	       || unicode_attributes[ch].category[1] == 'i'))
+	  || ch == 0x0022 /* QUOTATION MARK */
+	  || ch == 0x0027 /* APOSTROPHE */
+	  || ch == 0x275B /* HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT */
+	  || ch == 0x275C /* HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT */
+	  || ch == 0x275D /* HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT */
+	  || ch == 0x275E /* HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT */
+	  || ch == 0x2E00 /* RIGHT ANGLE SUBSTITUTION MARKER */
+	  || ch == 0x2E01 /* RIGHT ANGLE DOTTED SUBSTITUTION MARKER */
+	  || ch == 0x2E06 /* RAISED INTERPOLATION MARKER */
+	  || ch == 0x2E07 /* RAISED DOTTED INTERPOLATION MARKER */
+	  || ch == 0x2E08 /* DOTTED TRANSPOSITION MARKER */
+	  || ch == 0x2E0B /* RAISED SQUARE */)
+	attr |= 1 << LBP_QU;
+
+      /* infix separator (numeric) */
+      if (ch == 0x002C /* COMMA */
+	  || ch == 0x002E /* FULL STOP */
+	  || ch == 0x003A /* COLON */
+	  || ch == 0x003B /* SEMICOLON */
+	  || ch == 0x037E /* GREEK QUESTION MARK */
+	  || ch == 0x0589 /* ARMENIAN FULL STOP */
+#if REVISION_22
+	  || ch == 0x060C /* ARABIC COMMA */
+#endif
+	  || ch == 0x060D /* ARABIC DATE SEPARATOR */
+	  || ch == 0x07F8 /* NKO COMMA */
+	  || ch == 0x2044 /* FRACTION SLASH */
+	  || ch == 0xFE10 /* PRESENTATION FORM FOR VERTICAL COMMA */
+	  || ch == 0xFE13 /* PRESENTATION FORM FOR VERTICAL COLON */
+	  || ch == 0xFE14 /* PRESENTATION FORM FOR VERTICAL SEMICOLON */)
+	attr |= 1 << LBP_IS;
+
+      /* numeric */
+      if ((unicode_attributes[ch].category[0] == 'N'
+	   && unicode_attributes[ch].category[1] == 'd'
+	   && strstr (unicode_attributes[ch].name, "FULLWIDTH") == NULL)
+	  || ch == 0x066B /* ARABIC DECIMAL SEPARATOR */
+	  || ch == 0x066C /* ARABIC THOUSANDS SEPARATOR */)
+	attr |= 1 << LBP_NU;
+
+      /* postfix (numeric) */
+      if (ch == 0x0025 /* PERCENT SIGN */
+	  || ch == 0x00A2 /* CENT SIGN */
+	  || ch == 0x00B0 /* DEGREE SIGN */
+	  || ch == 0x060B /* AFGHANI SIGN */
+#if REVISION_22
+	  || ch == 0x066A /* ARABIC PERCENT SIGN */
+#endif
+	  || ch == 0x2030 /* PER MILLE SIGN */
+	  || ch == 0x2031 /* PER TEN THOUSAND SIGN */
+	  || ch == 0x2032 /* PRIME */
+	  || ch == 0x2033 /* DOUBLE PRIME */
+	  || ch == 0x2034 /* TRIPLE PRIME */
+	  || ch == 0x2035 /* REVERSED PRIME */
+	  || ch == 0x2036 /* REVERSED DOUBLE PRIME */
+	  || ch == 0x2037 /* REVERSED TRIPLE PRIME */
+	  || ch == 0x20A7 /* PESETA SIGN */
+	  || ch == 0x2103 /* DEGREE CELSIUS */
+	  || ch == 0x2109 /* DEGREE FAHRENHEIT */
+	  || ch == 0xFDFC /* RIAL SIGN */
+	  || ch == 0xFE6A /* SMALL PERCENT SIGN */
+	  || ch == 0xFF05 /* FULLWIDTH PERCENT SIGN */
+	  || ch == 0xFFE0 /* FULLWIDTH DIGIT ZERO */)
+	attr |= 1 << LBP_PO;
+
+      /* prefix (numeric) */
+      if ((unicode_attributes[ch].category[0] == 'S'
+	   && unicode_attributes[ch].category[1] == 'c')
+	  || ch == 0x002B /* PLUS SIGN */
+	  || ch == 0x005C /* REVERSE SOLIDUS */
+	  || ch == 0x00B1 /* PLUS-MINUS SIGN */
+	  || ch == 0x2116 /* NUMERO SIGN */
+	  || ch == 0x2212 /* MINUS SIGN */
+	  || ch == 0x2213 /* MINUS-OR-PLUS SIGN */)
+	if (!(attr & (1 << LBP_PO)))
+	  attr |= 1 << LBP_PR;
+
+      /* symbols allowing breaks */
+      if (ch == 0x002F /* SOLIDUS */)
+	attr |= 1 << LBP_SY;
+
+      if (ch >= 0xAC00 && ch <= 0xD7A3 && ((ch - 0xAC00) % 28) == 0)
+	attr |= 1 << LBP_H2;
+
+      if (ch >= 0xAC00 && ch <= 0xD7A3 && ((ch - 0xAC00) % 28) != 0)
+	attr |= 1 << LBP_H3;
+
+      if ((ch >= 0x1100 && ch <= 0x1159) || ch == 0x115F)
+	attr |= 1 << LBP_JL;
+
+      if (ch >= 0x1160 && ch <= 0x11A2)
+	attr |= 1 << LBP_JV;
+
+      if (ch >= 0x11A8 && ch <= 0x11F9)
+	attr |= 1 << LBP_JT;
+
+      /* complex context (South East Asian) */
+      if (((unicode_attributes[ch].category[0] == 'C'
+	    && unicode_attributes[ch].category[1] == 'f')
+	   || (unicode_attributes[ch].category[0] == 'L'
+	       && (unicode_attributes[ch].category[1] == 'm'
+		   || unicode_attributes[ch].category[1] == 'o'))
+	   || (unicode_attributes[ch].category[0] == 'M'
+	       && (unicode_attributes[ch].category[1] == 'c'
+		   || unicode_attributes[ch].category[1] == 'n'))
+	   /* Extra characters for compatibility with Unicode LineBreak.txt.  */
+	   || ch == 0x19DE /* NEW TAI LUE SIGN LAE */
+	   || ch == 0x19DF /* NEW TAI LUE SIGN LAEV */)
+	  && ((ch >= 0x0E00 && ch <= 0x0EFF)
+	      || (ch >= 0x1000 && ch <= 0x109F)
+	      || (ch >= 0x1780 && ch <= 0x17FF)
+	      || (ch >= 0x1950 && ch <= 0x19DF)))
+	attr |= 1 << LBP_SA;
+
+      /* attached characters and combining marks */
+      if ((unicode_attributes[ch].category[0] == 'M'
+	   && (unicode_attributes[ch].category[1] == 'c'
+	       || unicode_attributes[ch].category[1] == 'e'
+	       || unicode_attributes[ch].category[1] == 'n'))
+	  || (unicode_attributes[ch].category[0] == 'C'
+	      && (unicode_attributes[ch].category[1] == 'c'
+		  || unicode_attributes[ch].category[1] == 'f')))
+	if (!(attr & ((1 << LBP_BK) | (1 << LBP_BA) | (1 << LBP_GL) | (1 << LBP_SA) | (1 << LBP_WJ) | (1 << LBP_ZW))))
+	  attr |= 1 << LBP_CM;
+
+      /* ideographic */
+      if ((ch >= 0x2E80 && ch <= 0x2FFF) /* CJK RADICAL, KANGXI RADICAL, IDEOGRAPHIC DESCRIPTION */
+	  || ch == 0x3000 /* IDEOGRAPHIC SPACE */
+	  || (ch >= 0x3040 && ch <= 0x309F) /* HIRAGANA */
+	  || (ch >= 0x30A0 && ch <= 0x30FF) /* KATAKANA */
+	  || (ch >= 0x3400 && ch <= 0x4DB5) /* CJK Ideograph Extension A */
+	  || (ch >= 0x4E00 && ch <= 0x9FBB) /* CJK Ideograph */
+	  || (ch >= 0xF900 && ch <= 0xFAD9) /* CJK COMPATIBILITY IDEOGRAPH */
+	  || (ch >= 0xA000 && ch <= 0xA48F) /* YI SYLLABLE */
+	  || (ch >= 0xA490 && ch <= 0xA4CF) /* YI RADICAL */
+	  || ch == 0xFE62 /* SMALL PLUS SIGN */
+	  || ch == 0xFE63 /* SMALL HYPHEN-MINUS */
+	  || ch == 0xFE64 /* SMALL LESS-THAN SIGN */
+	  || ch == 0xFE65 /* SMALL GREATER-THAN SIGN */
+	  || ch == 0xFE66 /* SMALL EQUALS SIGN */
+	  || (ch >= 0xFF10 && ch <= 0xFF19) /* FULLWIDTH DIGIT */
+	  || (ch >= 0x20000 && ch <= 0x2A6D6) /* CJK Ideograph Extension B */
+	  || (ch >= 0x2F800 && ch <= 0x2FA1D) /* CJK COMPATIBILITY IDEOGRAPH */
+	  || strstr (unicode_attributes[ch].name, "FULLWIDTH LATIN ") != NULL
+	  || (ch >= 0x3000 && ch <= 0x33FF
+	      && !(attr & ((1 << LBP_CM) | (1 << LBP_NS) | (1 << LBP_OP) | (1 << LBP_CL))))
+	  /* Extra characters for compatibility with Unicode LineBreak.txt.  */
+	  || ch == 0xFE30 /* PRESENTATION FORM FOR VERTICAL TWO DOT LEADER */
+	  || ch == 0xFE31 /* PRESENTATION FORM FOR VERTICAL EM DASH */
+	  || ch == 0xFE32 /* PRESENTATION FORM FOR VERTICAL EN DASH */
+	  || ch == 0xFE33 /* PRESENTATION FORM FOR VERTICAL LOW LINE */
+	  || ch == 0xFE34 /* PRESENTATION FORM FOR VERTICAL WAVY LOW LINE */
+	  || ch == 0xFE45 /* SESAME DOT */
+	  || ch == 0xFE46 /* WHITE SESAME DOT */
+	  || ch == 0xFE49 /* DASHED OVERLINE */
+	  || ch == 0xFE4A /* CENTRELINE OVERLINE */
+	  || ch == 0xFE4B /* WAVY OVERLINE */
+	  || ch == 0xFE4C /* DOUBLE WAVY OVERLINE */
+	  || ch == 0xFE4D /* DASHED LOW LINE */
+	  || ch == 0xFE4E /* CENTRELINE LOW LINE */
+	  || ch == 0xFE4F /* WAVY LOW LINE */
+	  || ch == 0xFE51 /* SMALL IDEOGRAPHIC COMMA */
+	  || ch == 0xFE58 /* SMALL EM DASH */
+	  || ch == 0xFE5F /* SMALL NUMBER SIGN */
+	  || ch == 0xFE60 /* SMALL AMPERSAND */
+	  || ch == 0xFE61 /* SMALL ASTERISK */
+	  || ch == 0xFE68 /* SMALL REVERSE SOLIDUS */
+	  || ch == 0xFE6B /* SMALL COMMERCIAL AT */
+	  || ch == 0xFF02 /* FULLWIDTH QUOTATION MARK */
+	  || ch == 0xFF03 /* FULLWIDTH NUMBER SIGN */
+	  || ch == 0xFF06 /* FULLWIDTH AMPERSAND */
+	  || ch == 0xFF07 /* FULLWIDTH APOSTROPHE */
+	  || ch == 0xFF0A /* FULLWIDTH ASTERISK */
+	  || ch == 0xFF0B /* FULLWIDTH PLUS SIGN */
+	  || ch == 0xFF0D /* FULLWIDTH HYPHEN-MINUS */
+	  || ch == 0xFF0F /* FULLWIDTH SOLIDUS */
+	  || ch == 0xFF1C /* FULLWIDTH LESS-THAN SIGN */
+	  || ch == 0xFF1D /* FULLWIDTH EQUALS SIGN */
+	  || ch == 0xFF1E /* FULLWIDTH GREATER-THAN SIGN */
+	  || ch == 0xFF20 /* FULLWIDTH COMMERCIAL AT */
+	  || ch == 0xFF3C /* FULLWIDTH REVERSE SOLIDUS */
+	  || ch == 0xFF3E /* FULLWIDTH CIRCUMFLEX ACCENT */
+	  || ch == 0xFF3F /* FULLWIDTH LOW LINE */
+	  || ch == 0xFF40 /* FULLWIDTH GRAVE ACCENT */
+	  || ch == 0xFF5C /* FULLWIDTH VERTICAL LINE */
+	  || ch == 0xFF5E /* FULLWIDTH TILDE */
+	  || ch == 0xFFE2 /* FULLWIDTH NOT SIGN */
+	  || ch == 0xFFE3 /* FULLWIDTH MACRON */
+	  || ch == 0xFFE4 /* FULLWIDTH BROKEN BAR */)
+	if (!(attr & ((1 << LBP_NS) | (1 << LBP_CM))))
+	  {
+	    /* ambiguous (ideograph) ? */
+	    if ((unicode_width[ch] != NULL
+		 && unicode_width[ch][0] == 'A'
+		 && ch >= 0x2000)
+		|| ch == 0x24EA /* CIRCLED DIGIT ZERO */
+		|| (ch >= 0x2780 && ch <= 0x2793) /* DINGBAT ... CIRCLED DIGIT ... */)
+	      attr |= 1 << LBP_AI;
+	    else
+	      attr |= 1 << LBP_ID;
+	  }
+
+      /* ordinary alphabetic and symbol characters */
+      if ((unicode_attributes[ch].category[0] == 'L'
+	   && (unicode_attributes[ch].category[1] == 'u'
+	       || unicode_attributes[ch].category[1] == 'l'
+	       || unicode_attributes[ch].category[1] == 't'
+	       || unicode_attributes[ch].category[1] == 'm'
+	       || unicode_attributes[ch].category[1] == 'o'))
+	  || (unicode_attributes[ch].category[0] == 'S'
+	      && (unicode_attributes[ch].category[1] == 'm'
+		  || unicode_attributes[ch].category[1] == 'k'
+		  || unicode_attributes[ch].category[1] == 'o'))
+	  || (unicode_attributes[ch].category[0] == 'N'
+	      && (unicode_attributes[ch].category[1] == 'l'
+		  || unicode_attributes[ch].category[1] == 'o'))
+	  || (unicode_attributes[ch].category[0] == 'P'
+	      && (unicode_attributes[ch].category[1] == 'c'
+		  || unicode_attributes[ch].category[1] == 'd'
+		  || unicode_attributes[ch].category[1] == 'o'))
+	  || ch == 0x0600 /* ARABIC NUMBER SIGN */
+	  || ch == 0x0601 /* ARABIC SIGN SANAH */
+	  || ch == 0x0602 /* ARABIC FOOTNOTE MARKER */
+	  || ch == 0x0603 /* ARABIC SIGN SAFHA */
+	  || ch == 0x06DD /* ARABIC END OF AYAH */
+	  || ch == 0x070F /* SYRIAC ABBREVIATION MARK */
+	  || ch == 0x2061 /* FUNCTION APPLICATION */
+	  || ch == 0x2062 /* INVISIBLE TIMES */
+	  || ch == 0x2063 /* INVISIBLE SEPARATOR */
+	  || ch == 0x2064 /* INVISIBLE PLUS */)
+	if (!(attr & ((1 << LBP_GL) | (1 << LBP_B2) | (1 << LBP_BA) | (1 << LBP_BB) | (1 << LBP_HY) | (1 << LBP_CB) | (1 << LBP_CL) | (1 << LBP_EX) | (1 << LBP_IN) | (1 << LBP_NS) | (1 << LBP_OP) | (1 << LBP_QU) | (1 << LBP_IS) | (1 << LBP_NU) | (1 << LBP_PO) | (1 << LBP_PR) | (1 << LBP_SY) | (1 << LBP_H2) | (1 << LBP_H3) | (1 << LBP_JL) | (1 << LBP_JV) | (1 << LBP_JT) | (1 << LBP_SA) | (1 << LBP_ID))))
+	  {
+	    /* ambiguous (alphabetic) ? */
+	    if ((unicode_width[ch] != NULL
+		 && unicode_width[ch][0] == 'A'
+		 && ch >= 0x2000
+		 /* Extra exceptions for compatibility with Unicode LineBreak.txt.  */
+		 && ch != 0x2022 /* BULLET */
+		 && ch != 0x203E /* OVERLINE */
+		 && ch != 0x2126 /* OHM SIGN */
+		 && ch != 0x2153 /* VULGAR FRACTION ONE THIRD */
+		 && ch != 0x215C /* VULGAR FRACTION THREE EIGHTHS */
+		 && ch != 0x215D /* VULGAR FRACTION FIVE EIGHTHS */
+		 && ch != 0x21B8 /* NORTH WEST ARROW TO LONG BAR */
+		 && ch != 0x21B9 /* LEFTWARDS ARROW TO BAR OVER RIGHTWARDS ARROW TO BAR */
+		 && ch != 0x21E7 /* UPWARDS WHITE ARROW */
+		 && ch != 0x24FF /* NEGATIVE CIRCLED DIGIT ZERO */
+		 && ch != 0x273D /* HEAVY TEARDROP-SPOKED ASTERISK */)
+#if !REVISION_22
+		|| ch == 0x00A1 /* INVERTED EXCLAMATION MARK */
+		|| ch == 0x00A7 /* SECTION SIGN */
+		|| ch == 0x00A8 /* DIAERESIS */
+		|| ch == 0x00AA /* FEMININE ORDINAL INDICATOR */
+		|| ch == 0x00B2 /* SUPERSCRIPT TWO */
+		|| ch == 0x00B3 /* SUPERSCRIPT THREE */
+		|| ch == 0x00B6 /* PILCROW SIGN */
+		|| ch == 0x00B7 /* MIDDLE DOT */
+		|| ch == 0x00B8 /* CEDILLA */
+		|| ch == 0x00B9 /* SUPERSCRIPT ONE */
+		|| ch == 0x00BA /* MASCULINE ORDINAL INDICATOR */
+		|| ch == 0x00BC /* VULGAR FRACTION ONE QUARTER */
+		|| ch == 0x00BD /* VULGAR FRACTION ONE HALF */
+		|| ch == 0x00BE /* VULGAR FRACTION THREE QUARTERS */
+		|| ch == 0x00BF /* INVERTED QUESTION MARK */
+		|| ch == 0x00D7 /* MULTIPLICATION SIGN */
+		|| ch == 0x00F7 /* DIVISION SIGN */
+		|| ch == 0x02C7 /* CARON */
+		|| ch == 0x02C9 /* MODIFIER LETTER MACRON */
+		|| ch == 0x02CA /* MODIFIER LETTER ACUTE ACCENT */
+		|| ch == 0x02CB /* MODIFIER LETTER GRAVE ACCENT */
+		|| ch == 0x02CD /* MODIFIER LETTER LOW MACRON */
+		|| ch == 0x02D0 /* MODIFIER LETTER TRIANGULAR COLON */
+		|| ch == 0x02D8 /* BREVE */
+		|| ch == 0x02D9 /* DOT ABOVE */
+		|| ch == 0x02DA /* RING ABOVE */
+		|| ch == 0x02DB /* OGONEK */
+		|| ch == 0x02DD /* DOUBLE ACUTE ACCENT */
+#endif
+		|| ch == 0x24EA /* CIRCLED DIGIT ZERO */
+		|| (ch >= 0x2780 && ch <= 0x2793) /* DINGBAT ... CIRCLED DIGIT ... */
+		/* Extra characters for compatibility with Unicode LineBreak.txt.  */
+		|| ch == 0x2155 /* VULGAR FRACTION ONE FIFTH */
+		|| ch == 0x2574 /* BOX DRAWINGS LIGHT LEFT */
+		|| ch == 0x2616 /* WHITE SHOGI PIECE */
+		|| ch == 0x2617 /* BLACK SHOGI PIECE */)
+	      attr |= 1 << LBP_AI;
+	    else
+	      attr |= 1 << LBP_AL;
+	    attr &= ~(1 << LBP_CM);
+	  }
+    }
+
+  if (attr == 0)
+    /* unknown */
+    attr |= 1 << LBP_XX;
+
+  return attr;
+}
+
+/* Output the line breaking properties in a human readable format.  */
+static void
+debug_output_lbp (FILE *stream)
+{
+  unsigned int i;
+
+  for (i = 0; i < 0x110000; i++)
+    {
+      int attr = get_lbp (i);
+      if (attr != 1 << LBP_XX)
+	{
+	  fprintf (stream, "0x%04X", i);
+#define PRINT_BIT(attr,bit) \
+  if (attr & (1 << bit)) fprintf (stream, " " #bit);
+	  PRINT_BIT(attr,LBP_BK);
+	  PRINT_BIT(attr,LBP_CM);
+	  PRINT_BIT(attr,LBP_WJ);
+	  PRINT_BIT(attr,LBP_ZW);
+	  PRINT_BIT(attr,LBP_GL);
+	  PRINT_BIT(attr,LBP_SP);
+	  PRINT_BIT(attr,LBP_B2);
+	  PRINT_BIT(attr,LBP_BA);
+	  PRINT_BIT(attr,LBP_BB);
+	  PRINT_BIT(attr,LBP_HY);
+	  PRINT_BIT(attr,LBP_CB);
+	  PRINT_BIT(attr,LBP_CL);
+	  PRINT_BIT(attr,LBP_EX);
+	  PRINT_BIT(attr,LBP_IN);
+	  PRINT_BIT(attr,LBP_NS);
+	  PRINT_BIT(attr,LBP_OP);
+	  PRINT_BIT(attr,LBP_QU);
+	  PRINT_BIT(attr,LBP_IS);
+	  PRINT_BIT(attr,LBP_NU);
+	  PRINT_BIT(attr,LBP_PO);
+	  PRINT_BIT(attr,LBP_PR);
+	  PRINT_BIT(attr,LBP_SY);
+	  PRINT_BIT(attr,LBP_AI);
+	  PRINT_BIT(attr,LBP_AL);
+	  PRINT_BIT(attr,LBP_H2);
+	  PRINT_BIT(attr,LBP_H3);
+	  PRINT_BIT(attr,LBP_ID);
+	  PRINT_BIT(attr,LBP_JL);
+	  PRINT_BIT(attr,LBP_JV);
+	  PRINT_BIT(attr,LBP_JT);
+	  PRINT_BIT(attr,LBP_SA);
+	  PRINT_BIT(attr,LBP_XX);
+#undef PRINT_BIT
+	  fprintf (stream, "\n");
+	}
+    }
+}
+
+static void
+debug_output_lbrk_tables (const char *filename)
+{
+  FILE *stream;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  debug_output_lbp (stream);
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+/* The line breaking property from the LineBreak.txt file.  */
+int unicode_org_lbp[0x110000];
+
+/* Stores in unicode_org_lbp[] the line breaking property from the
+   LineBreak.txt file.  */
+static void
+fill_org_lbp (const char *linebreak_filename)
+{
+  unsigned int i, j;
+  FILE *stream;
+  char field0[FIELDLEN];
+  char field1[FIELDLEN];
+  char field2[FIELDLEN];
+  int lineno = 0;
+
+  for (i = 0; i < 0x110000; i++)
+    unicode_org_lbp[i] = LBP_XX;
+
+  stream = fopen (linebreak_filename, "r");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "error during fopen of '%s'\n", linebreak_filename);
+      exit (1);
+    }
+
+  for (;;)
+    {
+      int n;
+      int c;
+      int value;
+
+      lineno++;
+      c = getc (stream);
+      if (c == EOF)
+	break;
+      if (c == '#')
+	{
+	  do c = getc (stream); while (c != EOF && c != '\n');
+	  continue;
+	}
+      ungetc (c, stream);
+      n = getfield (stream, field0, ';');
+      n += getfield (stream, field1, ' ');
+      n += getfield (stream, field2, '\n');
+      if (n == 0)
+	break;
+      if (n != 3)
+	{
+	  fprintf (stderr, "short line in '%s':%d\n", linebreak_filename,
+		   lineno);
+	  exit (1);
+	}
+#define TRY(bit) else if (strcmp (field1, #bit + 4) == 0) value = bit;
+      if (false) {}
+      TRY(LBP_BK)
+      TRY(LBP_CM)
+      TRY(LBP_WJ)
+      TRY(LBP_ZW)
+      TRY(LBP_GL)
+      TRY(LBP_SP)
+      TRY(LBP_B2)
+      TRY(LBP_BA)
+      TRY(LBP_BB)
+      TRY(LBP_HY)
+      TRY(LBP_CB)
+      TRY(LBP_CL)
+      TRY(LBP_EX)
+      TRY(LBP_IN)
+      TRY(LBP_NS)
+      TRY(LBP_OP)
+      TRY(LBP_QU)
+      TRY(LBP_IS)
+      TRY(LBP_NU)
+      TRY(LBP_PO)
+      TRY(LBP_PR)
+      TRY(LBP_SY)
+      TRY(LBP_AI)
+      TRY(LBP_AL)
+      TRY(LBP_H2)
+      TRY(LBP_H3)
+      TRY(LBP_ID)
+      TRY(LBP_JL)
+      TRY(LBP_JV)
+      TRY(LBP_JT)
+      TRY(LBP_SA)
+      TRY(LBP_XX)
+#undef TRY
+      else if (strcmp (field1, "LF") == 0) value = LBP_BK;
+      else if (strcmp (field1, "CR") == 0) value = LBP_BK;
+      else if (strcmp (field1, "NL") == 0) value = LBP_BK;
+      else if (strcmp (field1, "SG") == 0) value = LBP_XX;
+      else
+	{
+	  fprintf (stderr, "unknown property value \"%s\" in '%s':%d\n",
+		   field1, linebreak_filename, lineno);
+	  exit (1);
+	}
+      i = strtoul (field0, NULL, 16);
+      if (strstr (field0, "..") != NULL)
+	{
+	  /* Deal with a range.  */
+	  j = strtoul (strstr (field0, "..") + 2, NULL, 16);
+	  for (; i <= j; i++)
+	    unicode_org_lbp[i] = value;
+	}
+      else
+	{
+	  /* Single character line.  */
+	  unicode_org_lbp[i] = value;
+	}
+    }
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error reading from '%s'\n", linebreak_filename);
+      exit (1);
+    }
+}
+
+/* Output the line breaking properties in a human readable format.  */
+static void
+debug_output_org_lbp (FILE *stream)
+{
+  unsigned int i;
+
+  for (i = 0; i < 0x110000; i++)
+    {
+      int attr = unicode_org_lbp[i];
+      if (attr != LBP_XX)
+	{
+	  fprintf (stream, "0x%04X", i);
+#define PRINT_BIT(attr,bit) \
+  if (attr == bit) fprintf (stream, " " #bit);
+	  PRINT_BIT(attr,LBP_BK);
+	  PRINT_BIT(attr,LBP_CM);
+	  PRINT_BIT(attr,LBP_WJ);
+	  PRINT_BIT(attr,LBP_ZW);
+	  PRINT_BIT(attr,LBP_GL);
+	  PRINT_BIT(attr,LBP_SP);
+	  PRINT_BIT(attr,LBP_B2);
+	  PRINT_BIT(attr,LBP_BA);
+	  PRINT_BIT(attr,LBP_BB);
+	  PRINT_BIT(attr,LBP_HY);
+	  PRINT_BIT(attr,LBP_CB);
+	  PRINT_BIT(attr,LBP_CL);
+	  PRINT_BIT(attr,LBP_EX);
+	  PRINT_BIT(attr,LBP_IN);
+	  PRINT_BIT(attr,LBP_NS);
+	  PRINT_BIT(attr,LBP_OP);
+	  PRINT_BIT(attr,LBP_QU);
+	  PRINT_BIT(attr,LBP_IS);
+	  PRINT_BIT(attr,LBP_NU);
+	  PRINT_BIT(attr,LBP_PO);
+	  PRINT_BIT(attr,LBP_PR);
+	  PRINT_BIT(attr,LBP_SY);
+	  PRINT_BIT(attr,LBP_AI);
+	  PRINT_BIT(attr,LBP_AL);
+	  PRINT_BIT(attr,LBP_H2);
+	  PRINT_BIT(attr,LBP_H3);
+	  PRINT_BIT(attr,LBP_ID);
+	  PRINT_BIT(attr,LBP_JL);
+	  PRINT_BIT(attr,LBP_JV);
+	  PRINT_BIT(attr,LBP_JT);
+	  PRINT_BIT(attr,LBP_SA);
+	  PRINT_BIT(attr,LBP_XX);
+#undef PRINT_BIT
+	  fprintf (stream, "\n");
+	}
+    }
+}
+
+static void
+debug_output_org_lbrk_tables (const char *filename)
+{
+  FILE *stream;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  debug_output_org_lbp (stream);
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+/* Construction of sparse 3-level tables.  */
+#define TABLE lbp_table
+#define ELEMENT unsigned char
+#define DEFAULT LBP_XX
+#define xmalloc malloc
+#define xrealloc realloc
+#include "3level.h"
+
+static void
+output_lbp (FILE *stream1, FILE *stream2)
+{
+  unsigned int i;
+  struct lbp_table t;
+  unsigned int level1_offset, level2_offset, level3_offset;
+
+  t.p = 7;
+  t.q = 9;
+  lbp_table_init (&t);
+
+  for (i = 0; i < 0x110000; i++)
+    {
+      int attr = get_lbp (i);
+
+      /* Now attr should contain exactly one bit.  */
+      if (attr == 0 || ((attr & (attr - 1)) != 0))
+	abort ();
+
+      if (attr != 1 << LBP_XX)
+	{
+	  unsigned int log2_attr;
+	  for (log2_attr = 0; attr > 1; attr >>= 1, log2_attr++);
+
+	  lbp_table_add (&t, i, log2_attr);
+	}
+    }
+
+  lbp_table_finalize (&t);
+
+  level1_offset =
+    5 * sizeof (uint32_t);
+  level2_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t);
+  level3_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t)
+    + (t.level2_size << t.q) * sizeof (uint32_t);
+
+  for (i = 0; i < 5; i++)
+    fprintf (stream1, "#define lbrkprop_header_%d %d\n", i,
+	     ((uint32_t *) t.result)[i]);
+  fprintf (stream1, "\n");
+  fprintf (stream1, "typedef struct\n");
+  fprintf (stream1, "  {\n");
+  fprintf (stream1, "    int level1[%zu];\n", t.level1_size);
+  fprintf (stream1, "    int level2[%zu << %d];\n", t.level2_size, t.q);
+  fprintf (stream1, "    unsigned char level3[%zu << %d];\n", t.level3_size, t.p);
+  fprintf (stream1, "  }\n");
+  fprintf (stream1, "lbrkprop_t;\n");
+  fprintf (stream1, "extern const lbrkprop_t unilbrkprop;\n");
+
+  fprintf (stream2, "const lbrkprop_t unilbrkprop =\n");
+  fprintf (stream2, "{\n");
+  fprintf (stream2, "  {");
+  if (t.level1_size > 8)
+    fprintf (stream2, "\n   ");
+  for (i = 0; i < t.level1_size; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+	fprintf (stream2, "\n   ");
+      offset = ((uint32_t *) (t.result + level1_offset))[i];
+      fprintf (stream2, " %5zd%s",
+	       offset == 0 ? -1 : (offset - level2_offset) / sizeof (uint32_t),
+	       (i+1 < t.level1_size ? "," : ""));
+    }
+  if (t.level1_size > 8)
+    fprintf (stream2, "\n ");
+  fprintf (stream2, " },\n");
+  fprintf (stream2, "  {");
+  if (t.level2_size << t.q > 8)
+    fprintf (stream2, "\n   ");
+  for (i = 0; i < t.level2_size << t.q; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+	fprintf (stream2, "\n   ");
+      offset = ((uint32_t *) (t.result + level2_offset))[i];
+      fprintf (stream2, " %5zd%s",
+	       offset == 0 ? -1 : (offset - level3_offset) / sizeof (uint8_t),
+	       (i+1 < t.level2_size << t.q ? "," : ""));
+    }
+  if (t.level2_size << t.q > 8)
+    fprintf (stream2, "\n ");
+  fprintf (stream2, " },\n");
+  fprintf (stream2, "  {");
+  if (t.level3_size << t.p > 8)
+    fprintf (stream2, "\n   ");
+  for (i = 0; i < t.level3_size << t.p; i++)
+    {
+      unsigned char value = ((unsigned char *) (t.result + level3_offset))[i];
+      const char *value_string;
+      switch (value)
+	{
+#define CASE(x) case x: value_string = #x; break;
+	  CASE(LBP_BK);
+	  CASE(LBP_CM);
+	  CASE(LBP_WJ);
+	  CASE(LBP_ZW);
+	  CASE(LBP_GL);
+	  CASE(LBP_SP);
+	  CASE(LBP_B2);
+	  CASE(LBP_BA);
+	  CASE(LBP_BB);
+	  CASE(LBP_HY);
+	  CASE(LBP_CB);
+	  CASE(LBP_CL);
+	  CASE(LBP_EX);
+	  CASE(LBP_IN);
+	  CASE(LBP_NS);
+	  CASE(LBP_OP);
+	  CASE(LBP_QU);
+	  CASE(LBP_IS);
+	  CASE(LBP_NU);
+	  CASE(LBP_PO);
+	  CASE(LBP_PR);
+	  CASE(LBP_SY);
+	  CASE(LBP_AI);
+	  CASE(LBP_AL);
+	  CASE(LBP_H2);
+	  CASE(LBP_H3);
+	  CASE(LBP_ID);
+	  CASE(LBP_JL);
+	  CASE(LBP_JV);
+	  CASE(LBP_JT);
+	  CASE(LBP_SA);
+	  CASE(LBP_XX);
+#undef CASE
+	  default:
+	    abort ();
+	}
+      if (i > 0 && (i % 8) == 0)
+	fprintf (stream2, "\n   ");
+      fprintf (stream2, " %s%s", value_string,
+	       (i+1 < t.level3_size << t.p ? "," : ""));
+    }
+  if (t.level3_size << t.p > 8)
+    fprintf (stream2, "\n ");
+  fprintf (stream2, " }\n");
+  fprintf (stream2, "};\n");
+}
+
+static void
+output_lbrk_tables (const char *filename1, const char *filename2, const char *version)
+{
+  const char *filenames[2];
+  FILE *streams[2];
+  size_t i;
+
+  filenames[0] = filename1;
+  filenames[1] = filename2;
+
+  for (i = 0; i < 2; i++)
+    {
+      streams[i] = fopen (filenames[i], "w");
+      if (streams[i] == NULL)
+	{
+	  fprintf (stderr, "cannot open '%s' for writing\n", filenames[i]);
+	  exit (1);
+	}
+    }
+
+  for (i = 0; i < 2; i++)
+    {
+      FILE *stream = streams[i];
+
+      fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
+      fprintf (stream, "/* Line breaking properties of Unicode characters.  */\n");
+      fprintf (stream, "/* Generated automatically by gen-lbrk for Unicode %s.  */\n",
+	       version);
+      fprintf (stream, "\n");
+
+      /* Put a GPL header on it.  The gnulib module is under LGPL (although it
+	 still carries the GPL header), and it's gnulib-tool which replaces the
+	 GPL header with an LGPL header.  */
+      fprintf (stream, "/* Copyright (C) 2000-2002, 2004, 2008 Free Software Foundation, Inc.\n");
+      fprintf (stream, "\n");
+      fprintf (stream, "   This program is free software: you can redistribute it and/or modify\n");
+      fprintf (stream, "   it under the terms of the GNU General Public License as published by\n");
+      fprintf (stream, "   the Free Software Foundation; either version 3 of the License, or\n");
+      fprintf (stream, "   (at your option) any later version.\n");
+      fprintf (stream, "\n");
+      fprintf (stream, "   This program is distributed in the hope that it will be useful,\n");
+      fprintf (stream, "   but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
+      fprintf (stream, "   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n");
+      fprintf (stream, "   GNU General Public License for more details.\n");
+      fprintf (stream, "\n");
+      fprintf (stream, "   You should have received a copy of the GNU General Public License\n");
+      fprintf (stream, "   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */\n");
+      fprintf (stream, "\n");
+    }
+
+  output_lbp (streams[0], streams[1]);
+
+  for (i = 0; i < 2; i++)
+    {
+      if (ferror (streams[i]) || fclose (streams[i]))
+	{
+	  fprintf (stderr, "error writing to '%s'\n", filenames[i]);
+	  exit (1);
+	}
+    }
+}
+
+/* ========================================================================= */
+
 int
 main (int argc, char * argv[])
 {
@@ -5070,11 +6289,13 @@ main (int argc, char * argv[])
   const char *scripts_filename;
   const char *blocks_filename;
   const char *proplist30_filename;
+  const char *eastasianwidth_filename;
+  const char *linebreak_filename;
   const char *version;
 
-  if (argc != 8)
+  if (argc != 10)
     {
-      fprintf (stderr, "Usage: %s UnicodeData.txt PropList.txt DerivedCoreProperties.txt Scripts.txt Blocks.txt PropList-3.0.1.txt version\n",
+      fprintf (stderr, "Usage: %s UnicodeData.txt PropList.txt DerivedCoreProperties.txt Scripts.txt Blocks.txt PropList-3.0.1.txt EastAsianWidth.txt LineBreak.txt version\n",
 	       argv[0]);
       exit (1);
     }
@@ -5085,7 +6306,9 @@ main (int argc, char * argv[])
   scripts_filename = argv[4];
   blocks_filename = argv[5];
   proplist30_filename = argv[6];
-  version = argv[7];
+  eastasianwidth_filename = argv[7];
+  linebreak_filename = argv[8];
+  version = argv[9];
 
   fill_attributes (unicodedata_filename);
   clear_properties ();
@@ -5094,18 +6317,20 @@ main (int argc, char * argv[])
   fill_properties30 (proplist30_filename);
   fill_scripts (scripts_filename);
   fill_blocks (blocks_filename);
+  fill_width (eastasianwidth_filename);
+  fill_org_lbp (linebreak_filename);
 
   output_categories (version);
-  output_category ("categ_of.h", version);
-  output_combclass ("combining.h", version);
-  output_bidi_category ("bidi_of.h", version);
-  output_decimal_digit_test ("test-decdigit.h", version);
-  output_decimal_digit ("decdigit.h", version);
-  output_digit_test ("test-digit.h", version);
-  output_digit ("digit.h", version);
-  output_numeric_test ("test-numeric.h", version);
-  output_numeric ("numeric.h", version);
-  output_mirror ("mirror.h", version);
+  output_category ("unictype/categ_of.h", version);
+  output_combclass ("unictype/combining.h", version);
+  output_bidi_category ("unictype/bidi_of.h", version);
+  output_decimal_digit_test ("../tests/unictype/test-decdigit.h", version);
+  output_decimal_digit ("unictype/decdigit.h", version);
+  output_digit_test ("../tests/unictype/test-digit.h", version);
+  output_digit ("unictype/digit.h", version);
+  output_numeric_test ("../tests/unictype/test-numeric.h", version);
+  output_numeric ("unictype/numeric.h", version);
+  output_mirror ("unictype/mirror.h", version);
   output_properties (version);
   output_scripts (version);
   output_scripts_byname (version);
@@ -5113,6 +6338,10 @@ main (int argc, char * argv[])
   output_ident_properties (version);
   output_old_ctype (version);
 
+  debug_output_lbrk_tables ("unilbrk/lbrkprop.txt");
+  debug_output_org_lbrk_tables ("unilbrk/lbrkprop_org.txt");
+  output_lbrk_tables ("unilbrk/lbrkprop1.h", "unilbrk/lbrkprop2.h", version);
+
   return 0;
 }
 
@@ -5120,14 +6349,16 @@ main (int argc, char * argv[])
  * For Emacs M-x compile
  * Local Variables:
  * compile-command: "
-   gcc -O -Wall gen-ctype.c -o gen-ctype && \
-   ./gen-ctype \
+   gcc -O -Wall gen-uni-tables.c -Iunictype -o gen-uni-tables && \
+   ./gen-uni-tables \
         /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/UnicodeData.txt \
         /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/PropList.txt \
         /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/DerivedCoreProperties.txt \
         /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/Scripts.txt \
         /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/Blocks.txt \
         /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/3.0.1/PropList-3.0.1.txt \
+        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/EastAsianWidth.txt \
+        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/LineBreak.txt \
         5.0.0
    "
  * End:
diff --git a/lib/unilbrk/gen-lbrk.c b/lib/unilbrk/gen-lbrk.c
deleted file mode 100644
index f8f93715aa..0000000000
--- a/lib/unilbrk/gen-lbrk.c
+++ /dev/null
@@ -1,1497 +0,0 @@
-/* Generate a Unicode conforming Line Break Properties tables from a
-   UnicodeData file.
-   Copyright (C) 2000-2002, 2004, 2008 Free Software Foundation, Inc.
-   Written by Bruno Haible <bruno@clisp.org>, 2000-2002.
-
-   This program is free software: you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3 of the License, or
-   (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
-
-/* Usage example:
-     $ gen-lbrk /usr/local/share/Unidata/UnicodeData.txt \
-		/usr/local/share/Unidata/EastAsianWidth.txt \
-		/usr/local/share/Unidata/LineBreak.txt \
-		5.0.0
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <string.h>
-#include <time.h>
-
-/* This structure represents one line in the UnicodeData.txt file.  */
-struct unicode_attribute
-{
-  const char *name;           /* Character name */
-  const char *category;       /* General category */
-  const char *combining;      /* Canonical combining classes */
-  const char *bidi;           /* Bidirectional category */
-  const char *decomposition;  /* Character decomposition mapping */
-  const char *decdigit;       /* Decimal digit value */
-  const char *digit;          /* Digit value */
-  const char *numeric;        /* Numeric value */
-  int mirrored;               /* mirrored */
-  const char *oldname;        /* Old Unicode 1.0 name */
-  const char *comment;        /* Comment */
-  unsigned int upper;         /* Uppercase mapping */
-  unsigned int lower;         /* Lowercase mapping */
-  unsigned int title;         /* Titlecase mapping */
-};
-
-/* Missing fields are represented with "" for strings, and NONE for
-   characters.  */
-#define NONE (~(unsigned int)0)
-
-/* The entire contents of the UnicodeData.txt file.  */
-struct unicode_attribute unicode_attributes [0x110000];
-
-/* Stores in unicode_attributes[i] the values from the given fields.  */
-static void
-fill_attribute (unsigned int i,
-		const char *field1, const char *field2,
-		const char *field3, const char *field4,
-		const char *field5, const char *field6,
-		const char *field7, const char *field8,
-		const char *field9, const char *field10,
-		const char *field11, const char *field12,
-		const char *field13, const char *field14)
-{
-  struct unicode_attribute * uni;
-
-  if (i >= 0x110000)
-    {
-      fprintf (stderr, "index too large\n");
-      exit (1);
-    }
-  uni = &unicode_attributes[i];
-  /* Copy the strings.  */
-  uni->name          = strdup (field1);
-  uni->category      = (field2[0] == '\0' ? "" : strdup (field2));
-  uni->combining     = (field3[0] == '\0' ? "" : strdup (field3));
-  uni->bidi          = (field4[0] == '\0' ? "" : strdup (field4));
-  uni->decomposition = (field5[0] == '\0' ? "" : strdup (field5));
-  uni->decdigit      = (field6[0] == '\0' ? "" : strdup (field6));
-  uni->digit         = (field7[0] == '\0' ? "" : strdup (field7));
-  uni->numeric       = (field8[0] == '\0' ? "" : strdup (field8));
-  uni->mirrored      = (field9[0] == 'Y');
-  uni->oldname       = (field10[0] == '\0' ? "" : strdup (field10));
-  uni->comment       = (field11[0] == '\0' ? "" : strdup (field11));
-  uni->upper = (field12[0] =='\0' ? NONE : strtoul (field12, NULL, 16));
-  uni->lower = (field13[0] =='\0' ? NONE : strtoul (field13, NULL, 16));
-  uni->title = (field14[0] =='\0' ? NONE : strtoul (field14, NULL, 16));
-}
-
-/* Maximum length of a field in the UnicodeData.txt file.  */
-#define FIELDLEN 120
-
-/* Reads the next field from STREAM.  The buffer BUFFER has size FIELDLEN.
-   Reads up to (but excluding) DELIM.
-   Returns 1 when a field was successfully read, otherwise 0.  */
-static int
-getfield (FILE *stream, char *buffer, int delim)
-{
-  int count = 0;
-  int c;
-
-  for (; (c = getc (stream)), (c != EOF && c != delim); )
-    {
-      /* The original unicode.org UnicodeData.txt file happens to have
-	 CR/LF line terminators.  Silently convert to LF.  */
-      if (c == '\r')
-	continue;
-
-      /* Put c into the buffer.  */
-      if (++count >= FIELDLEN - 1)
-	{
-	  fprintf (stderr, "field too long\n");
-	  exit (1);
-	}
-      *buffer++ = c;
-    }
-
-  if (c == EOF)
-    return 0;
-
-  *buffer = '\0';
-  return 1;
-}
-
-/* Stores in unicode_attributes[] the entire contents of the UnicodeData.txt
-   file.  */
-static void
-fill_attributes (const char *unicodedata_filename)
-{
-  unsigned int i, j;
-  FILE *stream;
-  char field0[FIELDLEN];
-  char field1[FIELDLEN];
-  char field2[FIELDLEN];
-  char field3[FIELDLEN];
-  char field4[FIELDLEN];
-  char field5[FIELDLEN];
-  char field6[FIELDLEN];
-  char field7[FIELDLEN];
-  char field8[FIELDLEN];
-  char field9[FIELDLEN];
-  char field10[FIELDLEN];
-  char field11[FIELDLEN];
-  char field12[FIELDLEN];
-  char field13[FIELDLEN];
-  char field14[FIELDLEN];
-  int lineno = 0;
-
-  for (i = 0; i < 0x110000; i++)
-    unicode_attributes[i].name = NULL;
-
-  stream = fopen (unicodedata_filename, "r");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "error during fopen of '%s'\n", unicodedata_filename);
-      exit (1);
-    }
-
-  for (;;)
-    {
-      int n;
-
-      lineno++;
-      n = getfield (stream, field0, ';');
-      n += getfield (stream, field1, ';');
-      n += getfield (stream, field2, ';');
-      n += getfield (stream, field3, ';');
-      n += getfield (stream, field4, ';');
-      n += getfield (stream, field5, ';');
-      n += getfield (stream, field6, ';');
-      n += getfield (stream, field7, ';');
-      n += getfield (stream, field8, ';');
-      n += getfield (stream, field9, ';');
-      n += getfield (stream, field10, ';');
-      n += getfield (stream, field11, ';');
-      n += getfield (stream, field12, ';');
-      n += getfield (stream, field13, ';');
-      n += getfield (stream, field14, '\n');
-      if (n == 0)
-	break;
-      if (n != 15)
-	{
-	  fprintf (stderr, "short line in'%s':%d\n",
-		   unicodedata_filename, lineno);
-	  exit (1);
-	}
-      i = strtoul (field0, NULL, 16);
-      if (field1[0] == '<'
-	  && strlen (field1) >= 9
-	  && !strcmp (field1 + strlen(field1) - 8, ", First>"))
-	{
-	  /* Deal with a range. */
-	  lineno++;
-	  n = getfield (stream, field0, ';');
-	  n += getfield (stream, field1, ';');
-	  n += getfield (stream, field2, ';');
-	  n += getfield (stream, field3, ';');
-	  n += getfield (stream, field4, ';');
-	  n += getfield (stream, field5, ';');
-	  n += getfield (stream, field6, ';');
-	  n += getfield (stream, field7, ';');
-	  n += getfield (stream, field8, ';');
-	  n += getfield (stream, field9, ';');
-	  n += getfield (stream, field10, ';');
-	  n += getfield (stream, field11, ';');
-	  n += getfield (stream, field12, ';');
-	  n += getfield (stream, field13, ';');
-	  n += getfield (stream, field14, '\n');
-	  if (n != 15)
-	    {
-	      fprintf (stderr, "missing end range in '%s':%d\n",
-		       unicodedata_filename, lineno);
-	      exit (1);
-	    }
-	  if (!(field1[0] == '<'
-		&& strlen (field1) >= 8
-		&& !strcmp (field1 + strlen (field1) - 7, ", Last>")))
-	    {
-	      fprintf (stderr, "missing end range in '%s':%d\n",
-		       unicodedata_filename, lineno);
-	      exit (1);
-	    }
-	  field1[strlen (field1) - 7] = '\0';
-	  j = strtoul (field0, NULL, 16);
-	  for (; i <= j; i++)
-	    fill_attribute (i, field1+1, field2, field3, field4, field5,
-			       field6, field7, field8, field9, field10,
-			       field11, field12, field13, field14);
-	}
-      else
-	{
-	  /* Single character line */
-	  fill_attribute (i, field1, field2, field3, field4, field5,
-			     field6, field7, field8, field9, field10,
-			     field11, field12, field13, field14);
-	}
-    }
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error reading from '%s'\n", unicodedata_filename);
-      exit (1);
-    }
-}
-
-/* The width property from the EastAsianWidth.txt file.
-   Each is NULL (unassigned) or "N", "A", "H", "W", "F", "Na".  */
-const char * unicode_width[0x110000];
-
-/* Stores in unicode_width[] the width property from the EastAsianWidth.txt
-   file.  */
-static void
-fill_width (const char *width_filename)
-{
-  unsigned int i, j;
-  FILE *stream;
-  char field0[FIELDLEN];
-  char field1[FIELDLEN];
-  char field2[FIELDLEN];
-  int lineno = 0;
-
-  for (i = 0; i < 0x110000; i++)
-    unicode_width[i] = (unicode_attributes[i].name != NULL ? "N" : NULL);
-
-  stream = fopen (width_filename, "r");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "error during fopen of '%s'\n", width_filename);
-      exit (1);
-    }
-
-  for (;;)
-    {
-      int n;
-      int c;
-
-      lineno++;
-      c = getc (stream);
-      if (c == EOF)
-	break;
-      if (c == '#')
-	{
-	  do c = getc (stream); while (c != EOF && c != '\n');
-	  continue;
-	}
-      ungetc (c, stream);
-      n = getfield (stream, field0, ';');
-      n += getfield (stream, field1, ' ');
-      n += getfield (stream, field2, '\n');
-      if (n == 0)
-	break;
-      if (n != 3)
-	{
-	  fprintf (stderr, "short line in '%s':%d\n", width_filename, lineno);
-	  exit (1);
-	}
-      i = strtoul (field0, NULL, 16);
-      if (strstr (field0, "..") != NULL)
-	{
-	  /* Deal with a range.  */
-	  j = strtoul (strstr (field0, "..") + 2, NULL, 16);
-	  for (; i <= j; i++)
-	    unicode_width[i] = strdup (field1);
-	}
-      else
-	{
-	  /* Single character line.  */
-	  unicode_width[i] = strdup (field1);
-	}
-    }
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error reading from '%s'\n", width_filename);
-      exit (1);
-    }
-}
-
-/* Line breaking classification.  */
-
-enum
-{
-  /* Values >= 24 are resolved at run time. */
-  LBP_BK = 24, /* mandatory break */
-/*LBP_CR,         carriage return - not used here because it's a DOSism */
-/*LBP_LF,         line feed - not used here because it's a DOSism */
-  LBP_CM = 25, /* attached characters and combining marks */
-/*LBP_NL,         next line - not used here because it's equivalent to LBP_BK */
-/*LBP_SG,         surrogates - not used here because they are not characters */
-  LBP_WJ =  0, /* word joiner */
-  LBP_ZW = 26, /* zero width space */
-  LBP_GL =  1, /* non-breaking (glue) */
-  LBP_SP = 27, /* space */
-  LBP_B2 =  2, /* break opportunity before and after */
-  LBP_BA =  3, /* break opportunity after */
-  LBP_BB =  4, /* break opportunity before */
-  LBP_HY =  5, /* hyphen */
-  LBP_CB = 28, /* contingent break opportunity */
-  LBP_CL =  6, /* closing punctuation */
-  LBP_EX =  7, /* exclamation/interrogation */
-  LBP_IN =  8, /* inseparable */
-  LBP_NS =  9, /* non starter */
-  LBP_OP = 10, /* opening punctuation */
-  LBP_QU = 11, /* ambiguous quotation */
-  LBP_IS = 12, /* infix separator (numeric) */
-  LBP_NU = 13, /* numeric */
-  LBP_PO = 14, /* postfix (numeric) */
-  LBP_PR = 15, /* prefix (numeric) */
-  LBP_SY = 16, /* symbols allowing breaks */
-  LBP_AI = 29, /* ambiguous (alphabetic or ideograph) */
-  LBP_AL = 17, /* ordinary alphabetic and symbol characters */
-  LBP_H2 = 18, /* Hangul LV syllable */
-  LBP_H3 = 19, /* Hangul LVT syllable */
-  LBP_ID = 20, /* ideographic */
-  LBP_JL = 21, /* Hangul L Jamo */
-  LBP_JV = 22, /* Hangul V Jamo */
-  LBP_JT = 23, /* Hangul T Jamo */
-  LBP_SA = 30, /* complex context (South East Asian) */
-  LBP_XX = 31  /* unknown */
-};
-
-/* Returns the line breaking classification for ch, as a bit mask.  */
-static int
-get_lbp (unsigned int ch)
-{
-  int attr = 0;
-
-  if (unicode_attributes[ch].name != NULL)
-    {
-      /* mandatory break */
-      if (ch == 0x000A || ch == 0x000D || ch == 0x0085 /* newline */
-	  || ch == 0x000C /* form feed */
-	  || ch == 0x000B /* line tabulation */
-	  || ch == 0x2028 /* LINE SEPARATOR */
-	  || ch == 0x2029 /* PARAGRAPH SEPARATOR */)
-	attr |= 1 << LBP_BK;
-
-      if (ch == 0x2060 /* WORD JOINER */
-	  || ch == 0xFEFF /* ZERO WIDTH NO-BREAK SPACE */)
-	attr |= 1 << LBP_WJ;
-
-      /* zero width space */
-      if (ch == 0x200B /* ZERO WIDTH SPACE */)
-	attr |= 1 << LBP_ZW;
-
-      /* non-breaking (glue) */
-      if (ch == 0x00A0 /* NO-BREAK SPACE */
-	  || ch == 0x202F /* NARROW NO-BREAK SPACE */
-	  || ch == 0x180E /* MONGOLIAN VOWEL SEPARATOR */
-	  || ch == 0x034F /* COMBINING GRAPHEME JOINER */
-	  || ch == 0x2007 /* FIGURE SPACE */
-	  || ch == 0x2011 /* NON-BREAKING HYPHEN */
-	  || ch == 0x0F08 /* TIBETAN MARK SBRUL SHAD */
-	  || ch == 0x0F0C /* TIBETAN MARK DELIMITER TSHEG BSTAR */
-	  || ch == 0x0F12 /* TIBETAN MARK RGYA GRAM SHAD */
-	  || (ch >= 0x035C && ch <= 0x0362) /* COMBINING DOUBLE ... */)
-	attr |= 1 << LBP_GL;
-
-      /* space */
-      if (ch == 0x0020 /* SPACE */)
-	attr |= 1 << LBP_SP;
-
-      /* break opportunity before and after */
-      if (ch == 0x2014 /* EM DASH */)
-	attr |= 1 << LBP_B2;
-
-      /* break opportunity after */
-      if (ch == 0x1680 /* OGHAM SPACE MARK */
-	  || ch == 0x2000 /* EN QUAD */
-	  || ch == 0x2001 /* EM QUAD */
-	  || ch == 0x2002 /* EN SPACE */
-	  || ch == 0x2003 /* EM SPACE */
-	  || ch == 0x2004 /* THREE-PER-EM SPACE */
-	  || ch == 0x2005 /* FOUR-PER-EM SPACE */
-	  || ch == 0x2006 /* SIX-PER-EM SPACE */
-	  || ch == 0x2008 /* PUNCTUATION SPACE */
-	  || ch == 0x2009 /* THIN SPACE */
-	  || ch == 0x200A /* HAIR SPACE */
-	  || ch == 0x205F /* MEDIUM MATHEMATICAL SPACE */
-	  || ch == 0x0009 /* tab */
-	  || ch == 0x00AD /* SOFT HYPHEN */
-	  || ch == 0x058A /* ARMENIAN HYPHEN */
-	  || ch == 0x2010 /* HYPHEN */
-	  || ch == 0x2012 /* FIGURE DASH */
-	  || ch == 0x2013 /* EN DASH */
-	  || ch == 0x05BE /* HEBREW PUNCTUATION MAQAF */
-	  || ch == 0x0F0B /* TIBETAN MARK INTERSYLLABIC TSHEG */
-	  || ch == 0x1361 /* ETHIOPIC WORDSPACE */
-	  || ch == 0x17D8 /* KHMER SIGN BEYYAL */
-	  || ch == 0x17DA /* KHMER SIGN KOOMUUT */
-	  || ch == 0x2027 /* HYPHENATION POINT */
-	  || ch == 0x007C /* VERTICAL LINE */
-	  || ch == 0x16EB /* RUNIC SINGLE PUNCTUATION */
-	  || ch == 0x16EC /* RUNIC MULTIPLE PUNCTUATION */
-	  || ch == 0x16ED /* RUNIC CROSS PUNCTUATION */
-	  || ch == 0x2056 /* THREE DOT PUNCTUATION */
-	  || ch == 0x2058 /* FOUR DOT PUNCTUATION */
-	  || ch == 0x2059 /* FIVE DOT PUNCTUATION */
-	  || ch == 0x205A /* TWO DOT PUNCTUATION */
-	  || ch == 0x205B /* FOUR DOT MARK */
-	  || ch == 0x205D /* TRICOLON */
-	  || ch == 0x205E /* VERTICAL FOUR DOTS */
-	  || ch == 0x2E19 /* PALM BRANCH */
-	  || ch == 0x2E2A /* TWO DOTS OVER ONE DOT PUNCTUATION */
-	  || ch == 0x2E2B /* ONE DOT OVER TWO DOTS PUNCTUATION */
-	  || ch == 0x2E2C /* SQUARED FOUR DOT PUNCTUATION */
-	  || ch == 0x2E2D /* FIVE DOT PUNCTUATION */
-	  || ch == 0x2E30 /* RING POINT */
-	  || ch == 0x10100 /* AEGEAN WORD SEPARATOR LINE */
-	  || ch == 0x10101 /* AEGEAN WORD SEPARATOR DOT */
-	  || ch == 0x10102 /* AEGEAN CHECK MARK */
-	  || ch == 0x1039F /* UGARITIC WORD DIVIDER */
-	  || ch == 0x103D0 /* OLD PERSIAN WORD DIVIDER */
-	  || ch == 0x1091F /* PHOENICIAN WORD SEPARATOR */
-	  || ch == 0x12470 /* CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER */
-	  || ch == 0x0964 /* DEVANAGARI DANDA */
-	  || ch == 0x0965 /* DEVANAGARI DOUBLE DANDA */
-	  || ch == 0x0E5A /* THAI CHARACTER ANGKHANKHU */
-	  || ch == 0x0E5B /* THAI CHARACTER KHOMUT */
-	  || ch == 0x104A /* MYANMAR SIGN LITTLE SECTION */
-	  || ch == 0x104B /* MYANMAR SIGN SECTION */
-	  || ch == 0x1735 /* PHILIPPINE SINGLE PUNCTUATION */
-	  || ch == 0x1736 /* PHILIPPINE DOUBLE PUNCTUATION */
-	  || ch == 0x17D4 /* KHMER SIGN KHAN */
-	  || ch == 0x17D5 /* KHMER SIGN BARIYOOSAN */
-	  || ch == 0x1B5E /* BALINESE CARIK SIKI */
-	  || ch == 0x1B5F /* BALINESE CARIK PAREREN */
-	  || ch == 0xA8CE /* SAURASHTRA DANDA */
-	  || ch == 0xA8CF /* SAURASHTRA DOUBLE DANDA */
-	  || ch == 0xAA5D /* CHAM PUNCTUATION DANDA */
-	  || ch == 0xAA5E /* CHAM PUNCTUATION DOUBLE DANDA */
-	  || ch == 0xAA5F /* CHAM PUNCTUATION TRIPLE DANDA */
-	  || ch == 0x10A56 /* KHAROSHTHI PUNCTUATION DANDA */
-	  || ch == 0x10A57 /* KHAROSHTHI PUNCTUATION DOUBLE DANDA */
-	  || ch == 0x0F34 /* TIBETAN MARK BSDUS RTAGS */
-	  || ch == 0x0F7F /* TIBETAN SIGN RNAM BCAD */
-	  || ch == 0x0F85 /* TIBETAN MARK PALUTA */
-	  || ch == 0x0FBE /* TIBETAN KU RU KHA */
-	  || ch == 0x0FBF /* TIBETAN KU RU KHA BZHI MIG CAN */
-	  || ch == 0x0FD2 /* TIBETAN MARK NYIS TSHEG */
-#if !REVISION_22
-	  || ch == 0x1802 /* MONGOLIAN COMMA */
-	  || ch == 0x1803 /* MONGOLIAN FULL STOP */
-#endif
-	  || ch == 0x1804 /* MONGOLIAN COLON */
-	  || ch == 0x1805 /* MONGOLIAN FOUR DOTS */
-#if !REVISION_22
-	  || ch == 0x1808 /* MONGOLIAN MANCHU COMMA */
-	  || ch == 0x1809 /* MONGOLIAN MANCHU FULL STOP */
-#endif
-	  || ch == 0x1B5A /* BALINESE PANTI */
-	  || ch == 0x1B5B /* BALINESE PAMADA */
-	  || ch == 0x1B5C /* BALINESE WINDU */
-	  || ch == 0x1B5D /* BALINESE CARIK PAMUNGKAH */
-	  || ch == 0x1B60 /* BALINESE PAMENENG */
-	  || ch == 0x1C3B /* LEPCHA PUNCTUATION TA-ROL */
-	  || ch == 0x1C3C /* LEPCHA PUNCTUATION NYET THYOOM TA-ROL */
-	  || ch == 0x1C3D /* LEPCHA PUNCTUATION CER-WA */
-	  || ch == 0x1C3E /* LEPCHA PUNCTUATION TSHOOK CER-WA */
-	  || ch == 0x1C3F /* LEPCHA PUNCTUATION TSHOOK */
-	  || ch == 0x1C7E /* OL CHIKI PUNCTUATION MUCAAD */
-	  || ch == 0x1C7F /* OL CHIKI PUNCTUATION DOUBLE MUCAAD */
-#if !REVISION_22
-	  || ch == 0x2CF9 /* COPTIC OLD NUBIAN FULL STOP */
-#endif
-	  || ch == 0x2CFA /* COPTIC OLD NUBIAN DIRECT QUESTION MARK */
-	  || ch == 0x2CFB /* COPTIC OLD NUBIAN INDIRECT QUESTION MARK */
-	  || ch == 0x2CFC /* COPTIC OLD NUBIAN VERSE DIVIDER */
-#if !REVISION_22
-	  || ch == 0x2CFE /* COPTIC FULL STOP */
-#endif
-	  || ch == 0x2CFF /* COPTIC MORPHOLOGICAL DIVIDER */
-	  || (ch >= 0x2E0E && ch <= 0x2E15) /* EDITORIAL CORONIS .. UPWARDS ANCORA */
-	  || ch == 0x2E17 /* DOUBLE OBLIQUE HYPHEN */
-	  || ch == 0xA60D /* VAI COMMA */
-	  || ch == 0xA60F /* VAI QUESTION MARK */
-	  || ch == 0xA92E /* KAYAH LI SIGN CWI */
-	  || ch == 0xA92F /* KAYAH LI SIGN SHYA */
-	  || ch == 0x10A50 /* KHAROSHTHI PUNCTUATION DOT */
-	  || ch == 0x10A51 /* KHAROSHTHI PUNCTUATION SMALL CIRCLE */
-	  || ch == 0x10A52 /* KHAROSHTHI PUNCTUATION CIRCLE */
-	  || ch == 0x10A53 /* KHAROSHTHI PUNCTUATION CRESCENT BAR */
-	  || ch == 0x10A54 /* KHAROSHTHI PUNCTUATION MANGALAM */
-	  || ch == 0x10A55 /* KHAROSHTHI PUNCTUATION LOTUS */
-	  /* Extra characters for compatibility with Unicode LineBreak.txt.  */
-#if !REVISION_22
-	  || ch == 0x1A1E /* BUGINESE PALLAWA */
-#endif
-	  || ch == 0x12471 /* CUNEIFORM PUNCTUATION SIGN VERTICAL COLON */
-	  || ch == 0x12472 /* CUNEIFORM PUNCTUATION SIGN DIAGONAL COLON */
-	  || ch == 0x12473 /* CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON */)
-	attr |= 1 << LBP_BA;
-
-      /* break opportunity before */
-      if (ch == 0x00B4 /* ACUTE ACCENT */
-#if REVISION_22
-	  || ch == 0x1FFD /* GREEK OXIA */
-	  || ch == 0x02DF /* MODIFIER LETTER CROSS ACCENT */
-#endif
-	  || ch == 0x02C8 /* MODIFIER LETTER VERTICAL LINE */
-	  || ch == 0x02CC /* MODIFIER LETTER LOW VERTICAL LINE */
-	  || ch == 0x0F01 /* TIBETAN MARK GTER YIG MGO TRUNCATED A */
-	  || ch == 0x0F02 /* TIBETAN MARK GTER YIG MGO -UM RNAM BCAD MA */
-	  || ch == 0x0F03 /* TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA */
-	  || ch == 0x0F04 /* TIBETAN MARK INITIAL YIG MGO MDUN MA */
-	  || ch == 0x0F06 /* TIBETAN MARK CARET YIG MGO PHUR SHAD MA */
-	  || ch == 0x0F07 /* TIBETAN MARK YIG MGO TSHEG SHAD MA */
-	  || ch == 0x0F09 /* TIBETAN MARK BSKUR YIG MGO */
-	  || ch == 0x0F0A /* TIBETAN MARK BKA- SHOG YIG MGO */
-	  || ch == 0x0FD0 /* TIBETAN MARK BSKA- SHOG GI MGO RGYAN */
-	  || ch == 0x0FD1 /* TIBETAN MARK MNYAM YIG GI MGO RGYAN */
-	  || ch == 0x0FD3 /* TIBETAN MARK INITIAL BRDA RNYING YIG MGO MDUN MA */
-	  || ch == 0xA874 /* PHAGS-PA SINGLE HEAD MARK */
-	  || ch == 0xA875 /* PHAGS-PA DOUBLE HEAD MARK */
-	  || ch == 0x1806 /* MONGOLIAN TODO SOFT HYPHEN */)
-	attr |= 1 << LBP_BB;
-
-      /* hyphen */
-      if (ch == 0x002D /* HYPHEN-MINUS */)
-	attr |= 1 << LBP_HY;
-
-      /* contingent break opportunity */
-      if (ch == 0xFFFC /* OBJECT REPLACEMENT CHARACTER */)
-	attr |= 1 << LBP_CB;
-
-      /* closing punctuation */
-      if ((unicode_attributes[ch].category[0] == 'P'
-	   && unicode_attributes[ch].category[1] == 'e')
-	  || ch == 0x3001 /* IDEOGRAPHIC COMMA */
-	  || ch == 0x3002 /* IDEOGRAPHIC FULL STOP */
-	  || ch == 0xFE11 /* PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC COMMA */
-	  || ch == 0xFE12 /* PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC FULL STOP */
-	  || ch == 0xFE50 /* SMALL COMMA */
-	  || ch == 0xFE52 /* SMALL FULL STOP */
-	  || ch == 0xFF0C /* FULLWIDTH COMMA */
-	  || ch == 0xFF0E /* FULLWIDTH FULL STOP */
-	  || ch == 0xFF61 /* HALFWIDTH IDEOGRAPHIC FULL STOP */
-	  || ch == 0xFF64 /* HALFWIDTH IDEOGRAPHIC COMMA */)
-	attr |= 1 << LBP_CL;
-
-      /* exclamation/interrogation */
-      if (ch == 0x0021 /* EXCLAMATION MARK */
-	  || ch == 0x003F /* QUESTION MARK */
-	  || ch == 0x05C6 /* HEBREW PUNCTUATION NUN HAFUKHA */
-#if !REVISION_22
-	  || ch == 0x060C /* ARABIC COMMA */
-#endif
-	  || ch == 0x061B /* ARABIC SEMICOLON */
-	  || ch == 0x061E /* ARABIC TRIPLE DOT PUNCTUATION MARK */
-	  || ch == 0x061F /* ARABIC QUESTION MARK */
-#if !REVISION_22
-	  || ch == 0x066A /* ARABIC PERCENT SIGN */
-#endif
-	  || ch == 0x06D4 /* ARABIC FULL STOP */
-	  || ch == 0x07F9 /* NKO EXCLAMATION MARK */
-	  || ch == 0x0F0D /* TIBETAN MARK SHAD */
-	  || ch == 0x0F0E /* TIBETAN MARK NYIS SHAD */
-	  || ch == 0x0F0F /* TIBETAN MARK TSHEG SHAD */
-	  || ch == 0x0F10 /* TIBETAN MARK NYIS TSHEG SHAD */
-	  || ch == 0x0F11 /* TIBETAN MARK RIN CHEN SPUNGS SHAD */
-	  || ch == 0x0F14 /* TIBETAN MARK GTER TSHEG */
-#if REVISION_22
-	  || ch == 0x1802 /* MONGOLIAN COMMA */
-	  || ch == 0x1803 /* MONGOLIAN FULL STOP */
-	  || ch == 0x1808 /* MONGOLIAN MANCHU COMMA */
-	  || ch == 0x1809 /* MONGOLIAN MANCHU FULL STOP */
-#endif
-	  || ch == 0x1944 /* LIMBU EXCLAMATION MARK */
-	  || ch == 0x1945 /* LIMBU QUESTION MARK */
-	  || ch == 0x2762 /* HEAVY EXCLAMATION MARK ORNAMENT */
-	  || ch == 0x2763 /* HEAVY HEART EXCLAMATION MARK ORNAMENT */
-#if REVISION_22
-	  || ch == 0x2CF9 /* COPTIC OLD NUBIAN FULL STOP */
-	  || ch == 0x2CFE /* COPTIC FULL STOP */
-#endif
-	  || ch == 0x2E2E /* REVERSED QUESTION MARK */
-	  || ch == 0xA60C /* VAI SYLLABLE LENGTHENER */
-	  || ch == 0xA60E /* VAI FULL STOP */
-	  || ch == 0xA876 /* PHAGS-PA MARK SHAD */
-	  || ch == 0xA877 /* PHAGS-PA MARK DOUBLE SHAD */
-	  || ch == 0xFE15 /* PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK */
-	  || ch == 0xFE16 /* PRESENTATION FORM FOR VERTICAL QUESTION MARK */
-	  || ch == 0xFE56 /* SMALL QUESTION MARK */
-	  || ch == 0xFE57 /* SMALL EXCLAMATION MARK */
-	  || ch == 0xFF01 /* FULLWIDTH EXCLAMATION MARK */
-	  || ch == 0xFF1F /* FULLWIDTH QUESTION MARK */)
-	attr |= 1 << LBP_EX;
-
-      /* inseparable */
-      if (ch == 0x2024 /* ONE DOT LEADER */
-	  || ch == 0x2025 /* TWO DOT LEADER */
-	  || ch == 0x2026 /* HORIZONTAL ELLIPSIS */
-	  || ch == 0xFE19 /* PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS */)
-	attr |= 1 << LBP_IN;
-
-      /* non starter */
-      if (ch == 0x17D6 /* KHMER SIGN CAMNUC PII KUUH */
-	  || ch == 0x203C /* DOUBLE EXCLAMATION MARK */
-	  || ch == 0x203D /* INTERROBANG */
-	  || ch == 0x2047 /* DOUBLE QUESTION MARK */
-	  || ch == 0x2048 /* QUESTION EXCLAMATION MARK */
-	  || ch == 0x2049 /* EXCLAMATION QUESTION MARK */
-	  || ch == 0x3005 /* IDEOGRAPHIC ITERATION MARK */
-	  || ch == 0x301C /* WAVE DASH */
-	  || ch == 0x303C /* MASU MARK */
-	  || ch == 0x303B /* VERTICAL IDEOGRAPHIC ITERATION MARK */
-	  || ch == 0x309B /* KATAKANA-HIRAGANA VOICED SOUND MARK */
-	  || ch == 0x309C /* KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK */
-	  || ch == 0x309D /* HIRAGANA ITERATION MARK */
-	  || ch == 0x309E /* HIRAGANA VOICED ITERATION MARK */
-	  || ch == 0x30A0 /* KATAKANA-HIRAGANA DOUBLE HYPHEN */
-	  || ch == 0x30FB /* KATAKANA MIDDLE DOT */
-	  || ch == 0x30FC /* KATAKANA-HIRAGANA PROLONGED SOUND MARK */
-	  || ch == 0x30FD /* KATAKANA ITERATION MARK */
-	  || ch == 0x30FE /* KATAKANA VOICED ITERATION MARK */
-	  || ch == 0xA015 /* YI SYLLABLE WU */
-	  || ch == 0xFE54 /* SMALL SEMICOLON */
-	  || ch == 0xFE55 /* SMALL COLON */
-	  || ch == 0xFF1A /* FULLWIDTH COLON */
-	  || ch == 0xFF1B /* FULLWIDTH SEMICOLON */
-	  || ch == 0xFF65 /* HALFWIDTH KATAKANA MIDDLE DOT */
-	  || ch == 0xFF70 /* HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK */
-	  || ch == 0xFF9E /* HALFWIDTH KATAKANA VOICED SOUND MARK */
-	  || ch == 0xFF9F /* HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK */
-	  || strstr (unicode_attributes[ch].name, "HIRAGANA LETTER SMALL ") != NULL
-	  || strstr (unicode_attributes[ch].name, "KATAKANA LETTER SMALL ") != NULL)
-	attr |= 1 << LBP_NS;
-
-      /* opening punctuation */
-      if ((unicode_attributes[ch].category[0] == 'P'
-	   && unicode_attributes[ch].category[1] == 's')
-#if REVISION_22
-	  || ch == 0x00A1 /* INVERTED EXCLAMATION MARK */
-	  || ch == 0x00BF /* INVERTED QUESTION MARK */
-#endif
-	  || ch == 0x2E18 /* INVERTED INTERROBANG */)
-	attr |= 1 << LBP_OP;
-
-      /* ambiguous quotation */
-      if ((unicode_attributes[ch].category[0] == 'P'
-	   && (unicode_attributes[ch].category[1] == 'f'
-	       || unicode_attributes[ch].category[1] == 'i'))
-	  || ch == 0x0022 /* QUOTATION MARK */
-	  || ch == 0x0027 /* APOSTROPHE */
-	  || ch == 0x275B /* HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT */
-	  || ch == 0x275C /* HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT */
-	  || ch == 0x275D /* HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT */
-	  || ch == 0x275E /* HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT */
-	  || ch == 0x2E00 /* RIGHT ANGLE SUBSTITUTION MARKER */
-	  || ch == 0x2E01 /* RIGHT ANGLE DOTTED SUBSTITUTION MARKER */
-	  || ch == 0x2E06 /* RAISED INTERPOLATION MARKER */
-	  || ch == 0x2E07 /* RAISED DOTTED INTERPOLATION MARKER */
-	  || ch == 0x2E08 /* DOTTED TRANSPOSITION MARKER */
-	  || ch == 0x2E0B /* RAISED SQUARE */)
-	attr |= 1 << LBP_QU;
-
-      /* infix separator (numeric) */
-      if (ch == 0x002C /* COMMA */
-	  || ch == 0x002E /* FULL STOP */
-	  || ch == 0x003A /* COLON */
-	  || ch == 0x003B /* SEMICOLON */
-	  || ch == 0x037E /* GREEK QUESTION MARK */
-	  || ch == 0x0589 /* ARMENIAN FULL STOP */
-#if REVISION_22
-	  || ch == 0x060C /* ARABIC COMMA */
-#endif
-	  || ch == 0x060D /* ARABIC DATE SEPARATOR */
-	  || ch == 0x07F8 /* NKO COMMA */
-	  || ch == 0x2044 /* FRACTION SLASH */
-	  || ch == 0xFE10 /* PRESENTATION FORM FOR VERTICAL COMMA */
-	  || ch == 0xFE13 /* PRESENTATION FORM FOR VERTICAL COLON */
-	  || ch == 0xFE14 /* PRESENTATION FORM FOR VERTICAL SEMICOLON */)
-	attr |= 1 << LBP_IS;
-
-      /* numeric */
-      if ((unicode_attributes[ch].category[0] == 'N'
-	   && unicode_attributes[ch].category[1] == 'd'
-	   && strstr (unicode_attributes[ch].name, "FULLWIDTH") == NULL)
-	  || ch == 0x066B /* ARABIC DECIMAL SEPARATOR */
-	  || ch == 0x066C /* ARABIC THOUSANDS SEPARATOR */)
-	attr |= 1 << LBP_NU;
-
-      /* postfix (numeric) */
-      if (ch == 0x0025 /* PERCENT SIGN */
-	  || ch == 0x00A2 /* CENT SIGN */
-	  || ch == 0x00B0 /* DEGREE SIGN */
-	  || ch == 0x060B /* AFGHANI SIGN */
-#if REVISION_22
-	  || ch == 0x066A /* ARABIC PERCENT SIGN */
-#endif
-	  || ch == 0x2030 /* PER MILLE SIGN */
-	  || ch == 0x2031 /* PER TEN THOUSAND SIGN */
-	  || ch == 0x2032 /* PRIME */
-	  || ch == 0x2033 /* DOUBLE PRIME */
-	  || ch == 0x2034 /* TRIPLE PRIME */
-	  || ch == 0x2035 /* REVERSED PRIME */
-	  || ch == 0x2036 /* REVERSED DOUBLE PRIME */
-	  || ch == 0x2037 /* REVERSED TRIPLE PRIME */
-	  || ch == 0x20A7 /* PESETA SIGN */
-	  || ch == 0x2103 /* DEGREE CELSIUS */
-	  || ch == 0x2109 /* DEGREE FAHRENHEIT */
-	  || ch == 0xFDFC /* RIAL SIGN */
-	  || ch == 0xFE6A /* SMALL PERCENT SIGN */
-	  || ch == 0xFF05 /* FULLWIDTH PERCENT SIGN */
-	  || ch == 0xFFE0 /* FULLWIDTH DIGIT ZERO */)
-	attr |= 1 << LBP_PO;
-
-      /* prefix (numeric) */
-      if ((unicode_attributes[ch].category[0] == 'S'
-	   && unicode_attributes[ch].category[1] == 'c')
-	  || ch == 0x002B /* PLUS SIGN */
-	  || ch == 0x005C /* REVERSE SOLIDUS */
-	  || ch == 0x00B1 /* PLUS-MINUS SIGN */
-	  || ch == 0x2116 /* NUMERO SIGN */
-	  || ch == 0x2212 /* MINUS SIGN */
-	  || ch == 0x2213 /* MINUS-OR-PLUS SIGN */)
-	if (!(attr & (1 << LBP_PO)))
-	  attr |= 1 << LBP_PR;
-
-      /* symbols allowing breaks */
-      if (ch == 0x002F /* SOLIDUS */)
-	attr |= 1 << LBP_SY;
-
-      if (ch >= 0xAC00 && ch <= 0xD7A3 && ((ch - 0xAC00) % 28) == 0)
-	attr |= 1 << LBP_H2;
-
-      if (ch >= 0xAC00 && ch <= 0xD7A3 && ((ch - 0xAC00) % 28) != 0)
-	attr |= 1 << LBP_H3;
-
-      if ((ch >= 0x1100 && ch <= 0x1159) || ch == 0x115F)
-	attr |= 1 << LBP_JL;
-
-      if (ch >= 0x1160 && ch <= 0x11A2)
-	attr |= 1 << LBP_JV;
-
-      if (ch >= 0x11A8 && ch <= 0x11F9)
-	attr |= 1 << LBP_JT;
-
-      /* complex context (South East Asian) */
-      if (((unicode_attributes[ch].category[0] == 'C'
-	    && unicode_attributes[ch].category[1] == 'f')
-	   || (unicode_attributes[ch].category[0] == 'L'
-	       && (unicode_attributes[ch].category[1] == 'm'
-		   || unicode_attributes[ch].category[1] == 'o'))
-	   || (unicode_attributes[ch].category[0] == 'M'
-	       && (unicode_attributes[ch].category[1] == 'c'
-		   || unicode_attributes[ch].category[1] == 'n'))
-	   /* Extra characters for compatibility with Unicode LineBreak.txt.  */
-	   || ch == 0x19DE /* NEW TAI LUE SIGN LAE */
-	   || ch == 0x19DF /* NEW TAI LUE SIGN LAEV */)
-	  && ((ch >= 0x0E00 && ch <= 0x0EFF)
-	      || (ch >= 0x1000 && ch <= 0x109F)
-	      || (ch >= 0x1780 && ch <= 0x17FF)
-	      || (ch >= 0x1950 && ch <= 0x19DF)))
-	attr |= 1 << LBP_SA;
-
-      /* attached characters and combining marks */
-      if ((unicode_attributes[ch].category[0] == 'M'
-	   && (unicode_attributes[ch].category[1] == 'c'
-	       || unicode_attributes[ch].category[1] == 'e'
-	       || unicode_attributes[ch].category[1] == 'n'))
-	  || (unicode_attributes[ch].category[0] == 'C'
-	      && (unicode_attributes[ch].category[1] == 'c'
-		  || unicode_attributes[ch].category[1] == 'f')))
-	if (!(attr & ((1 << LBP_BK) | (1 << LBP_BA) | (1 << LBP_GL) | (1 << LBP_SA) | (1 << LBP_WJ) | (1 << LBP_ZW))))
-	  attr |= 1 << LBP_CM;
-
-      /* ideographic */
-      if ((ch >= 0x2E80 && ch <= 0x2FFF) /* CJK RADICAL, KANGXI RADICAL, IDEOGRAPHIC DESCRIPTION */
-	  || ch == 0x3000 /* IDEOGRAPHIC SPACE */
-	  || (ch >= 0x3040 && ch <= 0x309F) /* HIRAGANA */
-	  || (ch >= 0x30A0 && ch <= 0x30FF) /* KATAKANA */
-	  || (ch >= 0x3400 && ch <= 0x4DB5) /* CJK Ideograph Extension A */
-	  || (ch >= 0x4E00 && ch <= 0x9FBB) /* CJK Ideograph */
-	  || (ch >= 0xF900 && ch <= 0xFAD9) /* CJK COMPATIBILITY IDEOGRAPH */
-	  || (ch >= 0xA000 && ch <= 0xA48F) /* YI SYLLABLE */
-	  || (ch >= 0xA490 && ch <= 0xA4CF) /* YI RADICAL */
-	  || ch == 0xFE62 /* SMALL PLUS SIGN */
-	  || ch == 0xFE63 /* SMALL HYPHEN-MINUS */
-	  || ch == 0xFE64 /* SMALL LESS-THAN SIGN */
-	  || ch == 0xFE65 /* SMALL GREATER-THAN SIGN */
-	  || ch == 0xFE66 /* SMALL EQUALS SIGN */
-	  || (ch >= 0xFF10 && ch <= 0xFF19) /* FULLWIDTH DIGIT */
-	  || (ch >= 0x20000 && ch <= 0x2A6D6) /* CJK Ideograph Extension B */
-	  || (ch >= 0x2F800 && ch <= 0x2FA1D) /* CJK COMPATIBILITY IDEOGRAPH */
-	  || strstr (unicode_attributes[ch].name, "FULLWIDTH LATIN ") != NULL
-	  || (ch >= 0x3000 && ch <= 0x33FF
-	      && !(attr & ((1 << LBP_CM) | (1 << LBP_NS) | (1 << LBP_OP) | (1 << LBP_CL))))
-	  /* Extra characters for compatibility with Unicode LineBreak.txt.  */
-	  || ch == 0xFE30 /* PRESENTATION FORM FOR VERTICAL TWO DOT LEADER */
-	  || ch == 0xFE31 /* PRESENTATION FORM FOR VERTICAL EM DASH */
-	  || ch == 0xFE32 /* PRESENTATION FORM FOR VERTICAL EN DASH */
-	  || ch == 0xFE33 /* PRESENTATION FORM FOR VERTICAL LOW LINE */
-	  || ch == 0xFE34 /* PRESENTATION FORM FOR VERTICAL WAVY LOW LINE */
-	  || ch == 0xFE45 /* SESAME DOT */
-	  || ch == 0xFE46 /* WHITE SESAME DOT */
-	  || ch == 0xFE49 /* DASHED OVERLINE */
-	  || ch == 0xFE4A /* CENTRELINE OVERLINE */
-	  || ch == 0xFE4B /* WAVY OVERLINE */
-	  || ch == 0xFE4C /* DOUBLE WAVY OVERLINE */
-	  || ch == 0xFE4D /* DASHED LOW LINE */
-	  || ch == 0xFE4E /* CENTRELINE LOW LINE */
-	  || ch == 0xFE4F /* WAVY LOW LINE */
-	  || ch == 0xFE51 /* SMALL IDEOGRAPHIC COMMA */
-	  || ch == 0xFE58 /* SMALL EM DASH */
-	  || ch == 0xFE5F /* SMALL NUMBER SIGN */
-	  || ch == 0xFE60 /* SMALL AMPERSAND */
-	  || ch == 0xFE61 /* SMALL ASTERISK */
-	  || ch == 0xFE68 /* SMALL REVERSE SOLIDUS */
-	  || ch == 0xFE6B /* SMALL COMMERCIAL AT */
-	  || ch == 0xFF02 /* FULLWIDTH QUOTATION MARK */
-	  || ch == 0xFF03 /* FULLWIDTH NUMBER SIGN */
-	  || ch == 0xFF06 /* FULLWIDTH AMPERSAND */
-	  || ch == 0xFF07 /* FULLWIDTH APOSTROPHE */
-	  || ch == 0xFF0A /* FULLWIDTH ASTERISK */
-	  || ch == 0xFF0B /* FULLWIDTH PLUS SIGN */
-	  || ch == 0xFF0D /* FULLWIDTH HYPHEN-MINUS */
-	  || ch == 0xFF0F /* FULLWIDTH SOLIDUS */
-	  || ch == 0xFF1C /* FULLWIDTH LESS-THAN SIGN */
-	  || ch == 0xFF1D /* FULLWIDTH EQUALS SIGN */
-	  || ch == 0xFF1E /* FULLWIDTH GREATER-THAN SIGN */
-	  || ch == 0xFF20 /* FULLWIDTH COMMERCIAL AT */
-	  || ch == 0xFF3C /* FULLWIDTH REVERSE SOLIDUS */
-	  || ch == 0xFF3E /* FULLWIDTH CIRCUMFLEX ACCENT */
-	  || ch == 0xFF3F /* FULLWIDTH LOW LINE */
-	  || ch == 0xFF40 /* FULLWIDTH GRAVE ACCENT */
-	  || ch == 0xFF5C /* FULLWIDTH VERTICAL LINE */
-	  || ch == 0xFF5E /* FULLWIDTH TILDE */
-	  || ch == 0xFFE2 /* FULLWIDTH NOT SIGN */
-	  || ch == 0xFFE3 /* FULLWIDTH MACRON */
-	  || ch == 0xFFE4 /* FULLWIDTH BROKEN BAR */)
-	if (!(attr & ((1 << LBP_NS) | (1 << LBP_CM))))
-	  {
-	    /* ambiguous (ideograph) ? */
-	    if ((unicode_width[ch] != NULL
-		 && unicode_width[ch][0] == 'A'
-		 && ch >= 0x2000)
-		|| ch == 0x24EA /* CIRCLED DIGIT ZERO */
-		|| (ch >= 0x2780 && ch <= 0x2793) /* DINGBAT ... CIRCLED DIGIT ... */)
-	      attr |= 1 << LBP_AI;
-	    else
-	      attr |= 1 << LBP_ID;
-	  }
-
-      /* ordinary alphabetic and symbol characters */
-      if ((unicode_attributes[ch].category[0] == 'L'
-	   && (unicode_attributes[ch].category[1] == 'u'
-	       || unicode_attributes[ch].category[1] == 'l'
-	       || unicode_attributes[ch].category[1] == 't'
-	       || unicode_attributes[ch].category[1] == 'm'
-	       || unicode_attributes[ch].category[1] == 'o'))
-	  || (unicode_attributes[ch].category[0] == 'S'
-	      && (unicode_attributes[ch].category[1] == 'm'
-		  || unicode_attributes[ch].category[1] == 'k'
-		  || unicode_attributes[ch].category[1] == 'o'))
-	  || (unicode_attributes[ch].category[0] == 'N'
-	      && (unicode_attributes[ch].category[1] == 'l'
-		  || unicode_attributes[ch].category[1] == 'o'))
-	  || (unicode_attributes[ch].category[0] == 'P'
-	      && (unicode_attributes[ch].category[1] == 'c'
-		  || unicode_attributes[ch].category[1] == 'd'
-		  || unicode_attributes[ch].category[1] == 'o'))
-	  || ch == 0x0600 /* ARABIC NUMBER SIGN */
-	  || ch == 0x0601 /* ARABIC SIGN SANAH */
-	  || ch == 0x0602 /* ARABIC FOOTNOTE MARKER */
-	  || ch == 0x0603 /* ARABIC SIGN SAFHA */
-	  || ch == 0x06DD /* ARABIC END OF AYAH */
-	  || ch == 0x070F /* SYRIAC ABBREVIATION MARK */
-	  || ch == 0x2061 /* FUNCTION APPLICATION */
-	  || ch == 0x2062 /* INVISIBLE TIMES */
-	  || ch == 0x2063 /* INVISIBLE SEPARATOR */
-	  || ch == 0x2064 /* INVISIBLE PLUS */)
-	if (!(attr & ((1 << LBP_GL) | (1 << LBP_B2) | (1 << LBP_BA) | (1 << LBP_BB) | (1 << LBP_HY) | (1 << LBP_CB) | (1 << LBP_CL) | (1 << LBP_EX) | (1 << LBP_IN) | (1 << LBP_NS) | (1 << LBP_OP) | (1 << LBP_QU) | (1 << LBP_IS) | (1 << LBP_NU) | (1 << LBP_PO) | (1 << LBP_PR) | (1 << LBP_SY) | (1 << LBP_H2) | (1 << LBP_H3) | (1 << LBP_JL) | (1 << LBP_JV) | (1 << LBP_JT) | (1 << LBP_SA) | (1 << LBP_ID))))
-	  {
-	    /* ambiguous (alphabetic) ? */
-	    if ((unicode_width[ch] != NULL
-		 && unicode_width[ch][0] == 'A'
-		 && ch >= 0x2000
-		 /* Extra exceptions for compatibility with Unicode LineBreak.txt.  */
-		 && ch != 0x2022 /* BULLET */
-		 && ch != 0x203E /* OVERLINE */
-		 && ch != 0x2126 /* OHM SIGN */
-		 && ch != 0x2153 /* VULGAR FRACTION ONE THIRD */
-		 && ch != 0x215C /* VULGAR FRACTION THREE EIGHTHS */
-		 && ch != 0x215D /* VULGAR FRACTION FIVE EIGHTHS */
-		 && ch != 0x21B8 /* NORTH WEST ARROW TO LONG BAR */
-		 && ch != 0x21B9 /* LEFTWARDS ARROW TO BAR OVER RIGHTWARDS ARROW TO BAR */
-		 && ch != 0x21E7 /* UPWARDS WHITE ARROW */
-		 && ch != 0x24FF /* NEGATIVE CIRCLED DIGIT ZERO */
-		 && ch != 0x273D /* HEAVY TEARDROP-SPOKED ASTERISK */)
-#if !REVISION_22
-		|| ch == 0x00A1 /* INVERTED EXCLAMATION MARK */
-		|| ch == 0x00A7 /* SECTION SIGN */
-		|| ch == 0x00A8 /* DIAERESIS */
-		|| ch == 0x00AA /* FEMININE ORDINAL INDICATOR */
-		|| ch == 0x00B2 /* SUPERSCRIPT TWO */
-		|| ch == 0x00B3 /* SUPERSCRIPT THREE */
-		|| ch == 0x00B6 /* PILCROW SIGN */
-		|| ch == 0x00B7 /* MIDDLE DOT */
-		|| ch == 0x00B8 /* CEDILLA */
-		|| ch == 0x00B9 /* SUPERSCRIPT ONE */
-		|| ch == 0x00BA /* MASCULINE ORDINAL INDICATOR */
-		|| ch == 0x00BC /* VULGAR FRACTION ONE QUARTER */
-		|| ch == 0x00BD /* VULGAR FRACTION ONE HALF */
-		|| ch == 0x00BE /* VULGAR FRACTION THREE QUARTERS */
-		|| ch == 0x00BF /* INVERTED QUESTION MARK */
-		|| ch == 0x00D7 /* MULTIPLICATION SIGN */
-		|| ch == 0x00F7 /* DIVISION SIGN */
-		|| ch == 0x02C7 /* CARON */
-		|| ch == 0x02C9 /* MODIFIER LETTER MACRON */
-		|| ch == 0x02CA /* MODIFIER LETTER ACUTE ACCENT */
-		|| ch == 0x02CB /* MODIFIER LETTER GRAVE ACCENT */
-		|| ch == 0x02CD /* MODIFIER LETTER LOW MACRON */
-		|| ch == 0x02D0 /* MODIFIER LETTER TRIANGULAR COLON */
-		|| ch == 0x02D8 /* BREVE */
-		|| ch == 0x02D9 /* DOT ABOVE */
-		|| ch == 0x02DA /* RING ABOVE */
-		|| ch == 0x02DB /* OGONEK */
-		|| ch == 0x02DD /* DOUBLE ACUTE ACCENT */
-#endif
-		|| ch == 0x24EA /* CIRCLED DIGIT ZERO */
-		|| (ch >= 0x2780 && ch <= 0x2793) /* DINGBAT ... CIRCLED DIGIT ... */
-		/* Extra characters for compatibility with Unicode LineBreak.txt.  */
-		|| ch == 0x2155 /* VULGAR FRACTION ONE FIFTH */
-		|| ch == 0x2574 /* BOX DRAWINGS LIGHT LEFT */
-		|| ch == 0x2616 /* WHITE SHOGI PIECE */
-		|| ch == 0x2617 /* BLACK SHOGI PIECE */)
-	      attr |= 1 << LBP_AI;
-	    else
-	      attr |= 1 << LBP_AL;
-	    attr &= ~(1 << LBP_CM);
-	  }
-    }
-
-  if (attr == 0)
-    /* unknown */
-    attr |= 1 << LBP_XX;
-
-  return attr;
-}
-
-/* Output the line breaking properties in a human readable format.  */
-static void
-debug_output_lbp (FILE *stream)
-{
-  unsigned int i;
-
-  for (i = 0; i < 0x110000; i++)
-    {
-      int attr = get_lbp (i);
-      if (attr != 1 << LBP_XX)
-	{
-	  fprintf (stream, "0x%04X", i);
-#define PRINT_BIT(attr,bit) \
-  if (attr & (1 << bit)) fprintf (stream, " " #bit);
-	  PRINT_BIT(attr,LBP_BK);
-	  PRINT_BIT(attr,LBP_CM);
-	  PRINT_BIT(attr,LBP_WJ);
-	  PRINT_BIT(attr,LBP_ZW);
-	  PRINT_BIT(attr,LBP_GL);
-	  PRINT_BIT(attr,LBP_SP);
-	  PRINT_BIT(attr,LBP_B2);
-	  PRINT_BIT(attr,LBP_BA);
-	  PRINT_BIT(attr,LBP_BB);
-	  PRINT_BIT(attr,LBP_HY);
-	  PRINT_BIT(attr,LBP_CB);
-	  PRINT_BIT(attr,LBP_CL);
-	  PRINT_BIT(attr,LBP_EX);
-	  PRINT_BIT(attr,LBP_IN);
-	  PRINT_BIT(attr,LBP_NS);
-	  PRINT_BIT(attr,LBP_OP);
-	  PRINT_BIT(attr,LBP_QU);
-	  PRINT_BIT(attr,LBP_IS);
-	  PRINT_BIT(attr,LBP_NU);
-	  PRINT_BIT(attr,LBP_PO);
-	  PRINT_BIT(attr,LBP_PR);
-	  PRINT_BIT(attr,LBP_SY);
-	  PRINT_BIT(attr,LBP_AI);
-	  PRINT_BIT(attr,LBP_AL);
-	  PRINT_BIT(attr,LBP_H2);
-	  PRINT_BIT(attr,LBP_H3);
-	  PRINT_BIT(attr,LBP_ID);
-	  PRINT_BIT(attr,LBP_JL);
-	  PRINT_BIT(attr,LBP_JV);
-	  PRINT_BIT(attr,LBP_JT);
-	  PRINT_BIT(attr,LBP_SA);
-	  PRINT_BIT(attr,LBP_XX);
-#undef PRINT_BIT
-	  fprintf (stream, "\n");
-	}
-    }
-}
-
-static void
-debug_output_tables (const char *filename)
-{
-  FILE *stream;
-
-  stream = fopen (filename, "w");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "cannot open '%s' for writing\n", filename);
-      exit (1);
-    }
-
-  debug_output_lbp (stream);
-
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error writing to '%s'\n", filename);
-      exit (1);
-    }
-}
-
-/* The line breaking property from the LineBreak.txt file.  */
-int unicode_org_lbp[0x110000];
-
-/* Stores in unicode_org_lbp[] the line breaking property from the
-   LineBreak.txt file.  */
-static void
-fill_org_lbp (const char *linebreak_filename)
-{
-  unsigned int i, j;
-  FILE *stream;
-  char field0[FIELDLEN];
-  char field1[FIELDLEN];
-  char field2[FIELDLEN];
-  int lineno = 0;
-
-  for (i = 0; i < 0x110000; i++)
-    unicode_org_lbp[i] = LBP_XX;
-
-  stream = fopen (linebreak_filename, "r");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "error during fopen of '%s'\n", linebreak_filename);
-      exit (1);
-    }
-
-  for (;;)
-    {
-      int n;
-      int c;
-      int value;
-
-      lineno++;
-      c = getc (stream);
-      if (c == EOF)
-	break;
-      if (c == '#')
-	{
-	  do c = getc (stream); while (c != EOF && c != '\n');
-	  continue;
-	}
-      ungetc (c, stream);
-      n = getfield (stream, field0, ';');
-      n += getfield (stream, field1, ' ');
-      n += getfield (stream, field2, '\n');
-      if (n == 0)
-	break;
-      if (n != 3)
-	{
-	  fprintf (stderr, "short line in '%s':%d\n", linebreak_filename,
-		   lineno);
-	  exit (1);
-	}
-#define TRY(bit) else if (strcmp (field1, #bit + 4) == 0) value = bit;
-      if (false) {}
-      TRY(LBP_BK)
-      TRY(LBP_CM)
-      TRY(LBP_WJ)
-      TRY(LBP_ZW)
-      TRY(LBP_GL)
-      TRY(LBP_SP)
-      TRY(LBP_B2)
-      TRY(LBP_BA)
-      TRY(LBP_BB)
-      TRY(LBP_HY)
-      TRY(LBP_CB)
-      TRY(LBP_CL)
-      TRY(LBP_EX)
-      TRY(LBP_IN)
-      TRY(LBP_NS)
-      TRY(LBP_OP)
-      TRY(LBP_QU)
-      TRY(LBP_IS)
-      TRY(LBP_NU)
-      TRY(LBP_PO)
-      TRY(LBP_PR)
-      TRY(LBP_SY)
-      TRY(LBP_AI)
-      TRY(LBP_AL)
-      TRY(LBP_H2)
-      TRY(LBP_H3)
-      TRY(LBP_ID)
-      TRY(LBP_JL)
-      TRY(LBP_JV)
-      TRY(LBP_JT)
-      TRY(LBP_SA)
-      TRY(LBP_XX)
-#undef TRY
-      else if (strcmp (field1, "LF") == 0) value = LBP_BK;
-      else if (strcmp (field1, "CR") == 0) value = LBP_BK;
-      else if (strcmp (field1, "NL") == 0) value = LBP_BK;
-      else if (strcmp (field1, "SG") == 0) value = LBP_XX;
-      else
-	{
-	  fprintf (stderr, "unknown property value \"%s\" in '%s':%d\n",
-		   field1, linebreak_filename, lineno);
-	  exit (1);
-	}
-      i = strtoul (field0, NULL, 16);
-      if (strstr (field0, "..") != NULL)
-	{
-	  /* Deal with a range.  */
-	  j = strtoul (strstr (field0, "..") + 2, NULL, 16);
-	  for (; i <= j; i++)
-	    unicode_org_lbp[i] = value;
-	}
-      else
-	{
-	  /* Single character line.  */
-	  unicode_org_lbp[i] = value;
-	}
-    }
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error reading from '%s'\n", linebreak_filename);
-      exit (1);
-    }
-}
-
-/* Output the line breaking properties in a human readable format.  */
-static void
-debug_output_org_lbp (FILE *stream)
-{
-  unsigned int i;
-
-  for (i = 0; i < 0x110000; i++)
-    {
-      int attr = unicode_org_lbp[i];
-      if (attr != LBP_XX)
-	{
-	  fprintf (stream, "0x%04X", i);
-#define PRINT_BIT(attr,bit) \
-  if (attr == bit) fprintf (stream, " " #bit);
-	  PRINT_BIT(attr,LBP_BK);
-	  PRINT_BIT(attr,LBP_CM);
-	  PRINT_BIT(attr,LBP_WJ);
-	  PRINT_BIT(attr,LBP_ZW);
-	  PRINT_BIT(attr,LBP_GL);
-	  PRINT_BIT(attr,LBP_SP);
-	  PRINT_BIT(attr,LBP_B2);
-	  PRINT_BIT(attr,LBP_BA);
-	  PRINT_BIT(attr,LBP_BB);
-	  PRINT_BIT(attr,LBP_HY);
-	  PRINT_BIT(attr,LBP_CB);
-	  PRINT_BIT(attr,LBP_CL);
-	  PRINT_BIT(attr,LBP_EX);
-	  PRINT_BIT(attr,LBP_IN);
-	  PRINT_BIT(attr,LBP_NS);
-	  PRINT_BIT(attr,LBP_OP);
-	  PRINT_BIT(attr,LBP_QU);
-	  PRINT_BIT(attr,LBP_IS);
-	  PRINT_BIT(attr,LBP_NU);
-	  PRINT_BIT(attr,LBP_PO);
-	  PRINT_BIT(attr,LBP_PR);
-	  PRINT_BIT(attr,LBP_SY);
-	  PRINT_BIT(attr,LBP_AI);
-	  PRINT_BIT(attr,LBP_AL);
-	  PRINT_BIT(attr,LBP_H2);
-	  PRINT_BIT(attr,LBP_H3);
-	  PRINT_BIT(attr,LBP_ID);
-	  PRINT_BIT(attr,LBP_JL);
-	  PRINT_BIT(attr,LBP_JV);
-	  PRINT_BIT(attr,LBP_JT);
-	  PRINT_BIT(attr,LBP_SA);
-	  PRINT_BIT(attr,LBP_XX);
-#undef PRINT_BIT
-	  fprintf (stream, "\n");
-	}
-    }
-}
-
-static void
-debug_output_org_tables (const char *filename)
-{
-  FILE *stream;
-
-  stream = fopen (filename, "w");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "cannot open '%s' for writing\n", filename);
-      exit (1);
-    }
-
-  debug_output_org_lbp (stream);
-
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error writing to '%s'\n", filename);
-      exit (1);
-    }
-}
-
-/* Construction of sparse 3-level tables.  */
-#define TABLE lbp_table
-#define ELEMENT unsigned char
-#define DEFAULT LBP_XX
-#define xmalloc malloc
-#define xrealloc realloc
-#include "3level.h"
-
-static void
-output_lbp (FILE *stream1, FILE *stream2)
-{
-  unsigned int i;
-  struct lbp_table t;
-  unsigned int level1_offset, level2_offset, level3_offset;
-
-  t.p = 7;
-  t.q = 9;
-  lbp_table_init (&t);
-
-  for (i = 0; i < 0x110000; i++)
-    {
-      int attr = get_lbp (i);
-
-      /* Now attr should contain exactly one bit.  */
-      if (attr == 0 || ((attr & (attr - 1)) != 0))
-	abort ();
-
-      if (attr != 1 << LBP_XX)
-	{
-	  unsigned int log2_attr;
-	  for (log2_attr = 0; attr > 1; attr >>= 1, log2_attr++);
-
-	  lbp_table_add (&t, i, log2_attr);
-	}
-    }
-
-  lbp_table_finalize (&t);
-
-  level1_offset =
-    5 * sizeof (uint32_t);
-  level2_offset =
-    5 * sizeof (uint32_t)
-    + t.level1_size * sizeof (uint32_t);
-  level3_offset =
-    5 * sizeof (uint32_t)
-    + t.level1_size * sizeof (uint32_t)
-    + (t.level2_size << t.q) * sizeof (uint32_t);
-
-  for (i = 0; i < 5; i++)
-    fprintf (stream1, "#define lbrkprop_header_%d %d\n", i,
-	     ((uint32_t *) t.result)[i]);
-  fprintf (stream1, "\n");
-  fprintf (stream1, "typedef struct\n");
-  fprintf (stream1, "  {\n");
-  fprintf (stream1, "    int level1[%d];\n", t.level1_size);
-  fprintf (stream1, "    int level2[%d << %d];\n", t.level2_size, t.q);
-  fprintf (stream1, "    unsigned char level3[%d << %d];\n", t.level3_size, t.p);
-  fprintf (stream1, "  }\n");
-  fprintf (stream1, "lbrkprop_t;\n");
-  fprintf (stream1, "extern const lbrkprop_t unilbrkprop;\n");
-
-  fprintf (stream2, "const lbrkprop_t unilbrkprop =\n");
-  fprintf (stream2, "{\n");
-  fprintf (stream2, "  {");
-  if (t.level1_size > 8)
-    fprintf (stream2, "\n   ");
-  for (i = 0; i < t.level1_size; i++)
-    {
-      uint32_t offset;
-      if (i > 0 && (i % 8) == 0)
-	fprintf (stream2, "\n   ");
-      offset = ((uint32_t *) (t.result + level1_offset))[i];
-      fprintf (stream2, " %5d%s",
-	       offset == 0 ? -1 : (offset - level2_offset) / sizeof (uint32_t),
-	       (i+1 < t.level1_size ? "," : ""));
-    }
-  if (t.level1_size > 8)
-    fprintf (stream2, "\n ");
-  fprintf (stream2, " },\n");
-  fprintf (stream2, "  {");
-  if (t.level2_size << t.q > 8)
-    fprintf (stream2, "\n   ");
-  for (i = 0; i < t.level2_size << t.q; i++)
-    {
-      uint32_t offset;
-      if (i > 0 && (i % 8) == 0)
-	fprintf (stream2, "\n   ");
-      offset = ((uint32_t *) (t.result + level2_offset))[i];
-      fprintf (stream2, " %5d%s",
-	       offset == 0 ? -1 : (offset - level3_offset) / sizeof (uint8_t),
-	       (i+1 < t.level2_size << t.q ? "," : ""));
-    }
-  if (t.level2_size << t.q > 8)
-    fprintf (stream2, "\n ");
-  fprintf (stream2, " },\n");
-  fprintf (stream2, "  {");
-  if (t.level3_size << t.p > 8)
-    fprintf (stream2, "\n   ");
-  for (i = 0; i < t.level3_size << t.p; i++)
-    {
-      unsigned char value = ((unsigned char *) (t.result + level3_offset))[i];
-      const char *value_string;
-      switch (value)
-	{
-#define CASE(x) case x: value_string = #x; break;
-	  CASE(LBP_BK);
-	  CASE(LBP_CM);
-	  CASE(LBP_WJ);
-	  CASE(LBP_ZW);
-	  CASE(LBP_GL);
-	  CASE(LBP_SP);
-	  CASE(LBP_B2);
-	  CASE(LBP_BA);
-	  CASE(LBP_BB);
-	  CASE(LBP_HY);
-	  CASE(LBP_CB);
-	  CASE(LBP_CL);
-	  CASE(LBP_EX);
-	  CASE(LBP_IN);
-	  CASE(LBP_NS);
-	  CASE(LBP_OP);
-	  CASE(LBP_QU);
-	  CASE(LBP_IS);
-	  CASE(LBP_NU);
-	  CASE(LBP_PO);
-	  CASE(LBP_PR);
-	  CASE(LBP_SY);
-	  CASE(LBP_AI);
-	  CASE(LBP_AL);
-	  CASE(LBP_H2);
-	  CASE(LBP_H3);
-	  CASE(LBP_ID);
-	  CASE(LBP_JL);
-	  CASE(LBP_JV);
-	  CASE(LBP_JT);
-	  CASE(LBP_SA);
-	  CASE(LBP_XX);
-#undef CASE
-	  default:
-	    abort ();
-	}
-      if (i > 0 && (i % 8) == 0)
-	fprintf (stream2, "\n   ");
-      fprintf (stream2, " %s%s", value_string,
-	       (i+1 < t.level3_size << t.p ? "," : ""));
-    }
-  if (t.level3_size << t.p > 8)
-    fprintf (stream2, "\n ");
-  fprintf (stream2, " }\n");
-  fprintf (stream2, "};\n");
-}
-
-static void
-output_tables (const char *filename1, const char *filename2, const char *version)
-{
-  const char *filenames[2];
-  FILE *streams[2];
-  size_t i;
-
-  filenames[0] = filename1;
-  filenames[1] = filename2;
-
-  for (i = 0; i < 2; i++)
-    {
-      streams[i] = fopen (filenames[i], "w");
-      if (streams[i] == NULL)
-	{
-	  fprintf (stderr, "cannot open '%s' for writing\n", filenames[i]);
-	  exit (1);
-	}
-    }
-
-  for (i = 0; i < 2; i++)
-    {
-      FILE *stream = streams[i];
-
-      fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
-      fprintf (stream, "/* Line breaking properties of Unicode characters.  */\n");
-      fprintf (stream, "/* Generated automatically by gen-lbrk for Unicode %s.  */\n",
-	       version);
-      fprintf (stream, "\n");
-
-      /* Put a GPL header on it.  The gnulib module is under LGPL (although it
-	 still carries the GPL header), and it's gnulib-tool which replaces the
-	 GPL header with an LGPL header.  */
-      fprintf (stream, "/* Copyright (C) 2000-2002, 2004, 2008 Free Software Foundation, Inc.\n");
-      fprintf (stream, "\n");
-      fprintf (stream, "   This program is free software: you can redistribute it and/or modify\n");
-      fprintf (stream, "   it under the terms of the GNU General Public License as published by\n");
-      fprintf (stream, "   the Free Software Foundation; either version 3 of the License, or\n");
-      fprintf (stream, "   (at your option) any later version.\n");
-      fprintf (stream, "\n");
-      fprintf (stream, "   This program is distributed in the hope that it will be useful,\n");
-      fprintf (stream, "   but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
-      fprintf (stream, "   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n");
-      fprintf (stream, "   GNU General Public License for more details.\n");
-      fprintf (stream, "\n");
-      fprintf (stream, "   You should have received a copy of the GNU General Public License\n");
-      fprintf (stream, "   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */\n");
-      fprintf (stream, "\n");
-    }
-
-  output_lbp (streams[0], streams[1]);
-
-  for (i = 0; i < 2; i++)
-    {
-      if (ferror (streams[i]) || fclose (streams[i]))
-	{
-	  fprintf (stderr, "error writing to '%s'\n", filenames[i]);
-	  exit (1);
-	}
-    }
-}
-
-int
-main (int argc, char * argv[])
-{
-  if (argc != 5)
-    {
-      fprintf (stderr, "Usage: %s UnicodeData.txt EastAsianWidth.txt LineBreak.txt version\n",
-	       argv[0]);
-      exit (1);
-    }
-
-  fill_attributes (argv[1]);
-  fill_width (argv[2]);
-  fill_org_lbp (argv[3]);
-
-  debug_output_tables ("lbrkprop.txt");
-  debug_output_org_tables ("lbrkprop_org.txt");
-
-  output_tables ("lbrkprop1.h", "lbrkprop2.h", argv[4]);
-
-  return 0;
-}
-
-/*
- * For Emacs M-x compile
- * Local Variables:
- * compile-command: "
-   gcc -O -Wall -I../unictype gen-lbrk.c -o gen-lbrk && \
-   ./gen-lbrk \
-        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/UnicodeData.txt \
-        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/EastAsianWidth.txt \
-        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/LineBreak.txt \
-        5.0.0
-   "
- * End:
- */
diff --git a/modules/unictype/gen-ctype b/modules/gen-uni-tables
similarity index 70%
rename from modules/unictype/gen-ctype
rename to modules/gen-uni-tables
index b13c2042d5..11f4c94f35 100644
--- a/modules/unictype/gen-ctype
+++ b/modules/gen-uni-tables
@@ -1,8 +1,8 @@
 Description:
-Generates the tables in lib/unictype/*.
+Generates the tables in lib/unictype/* and lib/unilbrk/*.
 
 Files:
-lib/unictype/gen-ctype.c
+lib/gen-uni-tables.c
 lib/unictype/3level.h
 lib/unictype/3levelbit.h
 
diff --git a/modules/unilbrk/gen-lbrk b/modules/unilbrk/gen-lbrk
deleted file mode 100644
index caa4086dee..0000000000
--- a/modules/unilbrk/gen-lbrk
+++ /dev/null
@@ -1,23 +0,0 @@
-Description:
-Generates the tables in lib/unilbrk/*.
-
-Files:
-lib/unilbrk/gen-lbrk.c
-lib/unictype/3level.h
-
-Depends-on:
-memcmp
-strdup
-
-configure.ac:
-
-Makefile.am:
-
-Include:
-
-License:
-GPLed build tool
-
-Maintainer:
-Bruno Haible
-
-- 
2.30.2