Merge gen-ctype and gen-lbrk into gen-uni-tables.

author Bruno Haible <bruno@clisp.org>

Sun, 8 Feb 2009 02:02:06 +0000 (03:02 +0100)

committer Bruno Haible <bruno@clisp.org>

Sun, 8 Feb 2009 02:02:06 +0000 (03:02 +0100)
author Bruno Haible <bruno@clisp.org>
Sun, 8 Feb 2009 02:02:06 +0000 (03:02 +0100)
committer Bruno Haible <bruno@clisp.org>
Sun, 8 Feb 2009 02:02:06 +0000 (03:02 +0100)
diff --git a/ChangeLog b/ChangeLog

index 6e79309c549e3397b0e0857167756cc840835775..0232e3aeefd1d6d33cc01728e1c6e00057e3a506 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2009-02-07  Bruno Haible  <bruno@clisp.org>
+
+       Merge gen-ctype and gen-lbrk into a single program.
+       * lib/gen-uni-tables.c: New file, incorporating
+       lib/unictype/gen-ctype.c and lib/unilbrk/gen-lbrk.c.
+       Add directory prefixes to the names of the generated files.
+       * lib/unictype/gen-ctype.c: Remove file.
+       * lib/unilbrk/gen-lbrk.c: Remove file.
+       * modules/gen-uni-tables: New file.
+       * modules/unictype/gen-ctype: Remove file.
+       * modules/unilbrk/gen-lbrk: Remove file.
+
  2009-02-07  Bruno Haible  <bruno@clisp.org>
  
         * lib/unistr.h (u8_strcoll, u16_strcoll, u32_strcoll): New declations.
diff --git a/lib/gen-uni-tables.c b/lib/gen-uni-tables.c

new file mode 100644 (file)

index 0000000..b1149fd
--- /dev/null
+++ b/lib/gen-uni-tables.c
@@ -0,0 +1,6365 @@
+/* Generate Unicode conforming character classification tables and
+   Line Break Properties tables from a UnicodeData file.
+   Copyright (C) 2000-2002, 2004, 2007-2009 Free Software Foundation, Inc.
+   Written by Bruno Haible <bruno@clisp.org>, 2000-2002.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/* Usage example:
+     $ gen-uni-tables /usr/local/share/Unidata/UnicodeData.txt \
+                      /usr/local/share/Unidata/PropList.txt \
+                      /usr/local/share/Unidata/DerivedCoreProperties.txt \
+                      /usr/local/share/Unidata/Scripts.txt \
+                      /usr/local/share/Unidata/Blocks.txt \
+                      /usr/local/share/Unidata/PropList-3.0.1.txt \
+                      /usr/local/share/Unidata/EastAsianWidth.txt \
+                      /usr/local/share/Unidata/LineBreak.txt \
+                      5.0.0
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+/* ========================================================================= */
+
+/* Reading UnicodeData.txt.  */
+/* See UCD.html.  */
+
+/* This structure represents one line in the UnicodeData.txt file.  */
+struct unicode_attribute
+{
+  const char *name;           /* Character name */
+  const char *category;       /* General category */
+  const char *combining;      /* Canonical combining class */
+  const char *bidi;           /* Bidirectional category */
+  const char *decomposition;  /* Character decomposition mapping */
+  const char *decdigit;       /* Decimal digit value */
+  const char *digit;          /* Digit value */
+  const char *numeric;        /* Numeric value */
+  bool mirrored;              /* mirrored */
+  const char *oldname;        /* Old Unicode 1.0 name */
+  const char *comment;        /* Comment */
+  unsigned int upper;         /* Uppercase mapping */
+  unsigned int lower;         /* Lowercase mapping */
+  unsigned int title;         /* Titlecase mapping */
+};
+
+/* Missing fields are represented with "" for strings, and NONE for
+   characters.  */
+#define NONE (~(unsigned int)0)
+
+/* The entire contents of the UnicodeData.txt file.  */
+struct unicode_attribute unicode_attributes [0x110000];
+
+/* Stores in unicode_attributes[i] the values from the given fields.  */
+static void
+fill_attribute (unsigned int i,
+               const char *field1, const char *field2,
+               const char *field3, const char *field4,
+               const char *field5, const char *field6,
+               const char *field7, const char *field8,
+               const char *field9, const char *field10,
+               const char *field11, const char *field12,
+               const char *field13, const char *field14)
+{
+  struct unicode_attribute * uni;
+
+  if (i >= 0x110000)
+    {
+      fprintf (stderr, "index too large\n");
+      exit (1);
+    }
+  if (strcmp (field2, "Cs") == 0)
+    /* Surrogates are UTF-16 artefacts, not real characters. Ignore them.  */
+    return;
+  uni = &unicode_attributes[i];
+  /* Copy the strings.  */
+  uni->name          = strdup (field1);
+  uni->category      = (field2[0] == '\0' ? "" : strdup (field2));
+  uni->combining     = (field3[0] == '\0' ? "" : strdup (field3));
+  uni->bidi          = (field4[0] == '\0' ? "" : strdup (field4));
+  uni->decomposition = (field5[0] == '\0' ? "" : strdup (field5));
+  uni->decdigit      = (field6[0] == '\0' ? "" : strdup (field6));
+  uni->digit         = (field7[0] == '\0' ? "" : strdup (field7));
+  uni->numeric       = (field8[0] == '\0' ? "" : strdup (field8));
+  uni->mirrored      = (field9[0] == 'Y');
+  uni->oldname       = (field10[0] == '\0' ? "" : strdup (field10));
+  uni->comment       = (field11[0] == '\0' ? "" : strdup (field11));
+  uni->upper = (field12[0] =='\0' ? NONE : strtoul (field12, NULL, 16));
+  uni->lower = (field13[0] =='\0' ? NONE : strtoul (field13, NULL, 16));
+  uni->title = (field14[0] =='\0' ? NONE : strtoul (field14, NULL, 16));
+}
+
+/* Maximum length of a field in the UnicodeData.txt file.  */
+#define FIELDLEN 120
+
+/* Reads the next field from STREAM.  The buffer BUFFER has size FIELDLEN.
+   Reads up to (but excluding) DELIM.
+   Returns 1 when a field was successfully read, otherwise 0.  */
+static int
+getfield (FILE *stream, char *buffer, int delim)
+{
+  int count = 0;
+  int c;
+
+  for (; (c = getc (stream)), (c != EOF && c != delim); )
+    {
+      /* The original unicode.org UnicodeData.txt file happens to have
+        CR/LF line terminators.  Silently convert to LF.  */
+      if (c == '\r')
+       continue;
+
+      /* Put c into the buffer.  */
+      if (++count >= FIELDLEN - 1)
+       {
+         fprintf (stderr, "field longer than expected, increase FIELDLEN\n");
+         exit (1);
+       }
+      *buffer++ = c;
+    }
+
+  if (c == EOF)
+    return 0;
+
+  *buffer = '\0';
+  return 1;
+}
+
+/* Stores in unicode_attributes[] the entire contents of the UnicodeData.txt
+   file.  */
+static void
+fill_attributes (const char *unicodedata_filename)
+{
+  unsigned int i, j;
+  FILE *stream;
+  char field0[FIELDLEN];
+  char field1[FIELDLEN];
+  char field2[FIELDLEN];
+  char field3[FIELDLEN];
+  char field4[FIELDLEN];
+  char field5[FIELDLEN];
+  char field6[FIELDLEN];
+  char field7[FIELDLEN];
+  char field8[FIELDLEN];
+  char field9[FIELDLEN];
+  char field10[FIELDLEN];
+  char field11[FIELDLEN];
+  char field12[FIELDLEN];
+  char field13[FIELDLEN];
+  char field14[FIELDLEN];
+  int lineno = 0;
+
+  for (i = 0; i < 0x110000; i++)
+    unicode_attributes[i].name = NULL;
+
+  stream = fopen (unicodedata_filename, "r");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "error during fopen of '%s'\n", unicodedata_filename);
+      exit (1);
+    }
+
+  for (;;)
+    {
+      int n;
+
+      lineno++;
+      n = getfield (stream, field0, ';');
+      n += getfield (stream, field1, ';');
+      n += getfield (stream, field2, ';');
+      n += getfield (stream, field3, ';');
+      n += getfield (stream, field4, ';');
+      n += getfield (stream, field5, ';');
+      n += getfield (stream, field6, ';');
+      n += getfield (stream, field7, ';');
+      n += getfield (stream, field8, ';');
+      n += getfield (stream, field9, ';');
+      n += getfield (stream, field10, ';');
+      n += getfield (stream, field11, ';');
+      n += getfield (stream, field12, ';');
+      n += getfield (stream, field13, ';');
+      n += getfield (stream, field14, '\n');
+      if (n == 0)
+       break;
+      if (n != 15)
+       {
+         fprintf (stderr, "short line in '%s':%d\n",
+                  unicodedata_filename, lineno);
+         exit (1);
+       }
+      i = strtoul (field0, NULL, 16);
+      if (field1[0] == '<'
+         && strlen (field1) >= 9
+         && strcmp (field1 + strlen(field1) - 8, ", First>") == 0)
+       {
+         /* Deal with a range. */
+         lineno++;
+         n = getfield (stream, field0, ';');
+         n += getfield (stream, field1, ';');
+         n += getfield (stream, field2, ';');
+         n += getfield (stream, field3, ';');
+         n += getfield (stream, field4, ';');
+         n += getfield (stream, field5, ';');
+         n += getfield (stream, field6, ';');
+         n += getfield (stream, field7, ';');
+         n += getfield (stream, field8, ';');
+         n += getfield (stream, field9, ';');
+         n += getfield (stream, field10, ';');
+         n += getfield (stream, field11, ';');
+         n += getfield (stream, field12, ';');
+         n += getfield (stream, field13, ';');
+         n += getfield (stream, field14, '\n');
+         if (n != 15)
+           {
+             fprintf (stderr, "missing end range in '%s':%d\n",
+                      unicodedata_filename, lineno);
+             exit (1);
+           }
+         if (!(field1[0] == '<'
+               && strlen (field1) >= 8
+               && strcmp (field1 + strlen (field1) - 7, ", Last>") == 0))
+           {
+             fprintf (stderr, "missing end range in '%s':%d\n",
+                      unicodedata_filename, lineno);
+             exit (1);
+           }
+         field1[strlen (field1) - 7] = '\0';
+         j = strtoul (field0, NULL, 16);
+         for (; i <= j; i++)
+           fill_attribute (i, field1+1, field2, field3, field4, field5,
+                              field6, field7, field8, field9, field10,
+                              field11, field12, field13, field14);
+       }
+      else
+       {
+         /* Single character line */
+         fill_attribute (i, field1, field2, field3, field4, field5,
+                            field6, field7, field8, field9, field10,
+                            field11, field12, field13, field14);
+       }
+    }
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error reading from '%s'\n", unicodedata_filename);
+      exit (1);
+    }
+}
+
+/* ========================================================================= */
+
+/* General category.  */
+/* See Unicode 3.0 book, section 4.5,
+       UCD.html.  */
+
+static bool
+is_category_L (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'L');
+}
+
+static bool
+is_category_Lu (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'L'
+         && unicode_attributes[ch].category[1] == 'u');
+}
+
+static bool
+is_category_Ll (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'L'
+         && unicode_attributes[ch].category[1] == 'l');
+}
+
+static bool
+is_category_Lt (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'L'
+         && unicode_attributes[ch].category[1] == 't');
+}
+
+static bool
+is_category_Lm (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'L'
+         && unicode_attributes[ch].category[1] == 'm');
+}
+
+static bool
+is_category_Lo (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'L'
+         && unicode_attributes[ch].category[1] == 'o');
+}
+
+static bool
+is_category_M (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'M');
+}
+
+static bool
+is_category_Mn (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'M'
+         && unicode_attributes[ch].category[1] == 'n');
+}
+
+static bool
+is_category_Mc (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'M'
+         && unicode_attributes[ch].category[1] == 'c');
+}
+
+static bool
+is_category_Me (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'M'
+         && unicode_attributes[ch].category[1] == 'e');
+}
+
+static bool
+is_category_N (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'N');
+}
+
+static bool
+is_category_Nd (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'N'
+         && unicode_attributes[ch].category[1] == 'd');
+}
+
+static bool
+is_category_Nl (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'N'
+         && unicode_attributes[ch].category[1] == 'l');
+}
+
+static bool
+is_category_No (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'N'
+         && unicode_attributes[ch].category[1] == 'o');
+}
+
+static bool
+is_category_P (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'P');
+}
+
+static bool
+is_category_Pc (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'P'
+         && unicode_attributes[ch].category[1] == 'c');
+}
+
+static bool
+is_category_Pd (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'P'
+         && unicode_attributes[ch].category[1] == 'd');
+}
+
+static bool
+is_category_Ps (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'P'
+         && unicode_attributes[ch].category[1] == 's');
+}
+
+static bool
+is_category_Pe (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'P'
+         && unicode_attributes[ch].category[1] == 'e');
+}
+
+static bool
+is_category_Pi (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'P'
+         && unicode_attributes[ch].category[1] == 'i');
+}
+
+static bool
+is_category_Pf (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'P'
+         && unicode_attributes[ch].category[1] == 'f');
+}
+
+static bool
+is_category_Po (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'P'
+         && unicode_attributes[ch].category[1] == 'o');
+}
+
+static bool
+is_category_S (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'S');
+}
+
+static bool
+is_category_Sm (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'S'
+         && unicode_attributes[ch].category[1] == 'm');
+}
+
+static bool
+is_category_Sc (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'S'
+         && unicode_attributes[ch].category[1] == 'c');
+}
+
+static bool
+is_category_Sk (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'S'
+         && unicode_attributes[ch].category[1] == 'k');
+}
+
+static bool
+is_category_So (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'S'
+         && unicode_attributes[ch].category[1] == 'o');
+}
+
+static bool
+is_category_Z (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'Z');
+}
+
+static bool
+is_category_Zs (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'Z'
+         && unicode_attributes[ch].category[1] == 's');
+}
+
+static bool
+is_category_Zl (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'Z'
+         && unicode_attributes[ch].category[1] == 'l');
+}
+
+static bool
+is_category_Zp (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'Z'
+         && unicode_attributes[ch].category[1] == 'p');
+}
+
+static bool
+is_category_C (unsigned int ch)
+{
+  return (unicode_attributes[ch].name == NULL
+         || unicode_attributes[ch].category[0] == 'C');
+}
+
+static bool
+is_category_Cc (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'C'
+         && unicode_attributes[ch].category[1] == 'c');
+}
+
+static bool
+is_category_Cf (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'C'
+         && unicode_attributes[ch].category[1] == 'f');
+}
+
+static bool
+is_category_Cs (unsigned int ch)
+{
+  return (ch >= 0xd800 && ch < 0xe000);
+}
+
+static bool
+is_category_Co (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'C'
+         && unicode_attributes[ch].category[1] == 'o');
+}
+
+static bool
+is_category_Cn (unsigned int ch)
+{
+  return (unicode_attributes[ch].name == NULL
+         && !(ch >= 0xd800 && ch < 0xe000));
+}
+
+/* Output a boolean property in a human readable format.  */
+static void
+debug_output_predicate (const char *filename, bool (*predicate) (unsigned int))
+{
+  FILE *stream;
+  unsigned int ch;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+#if 0 /* This yields huge text output.  */
+  for (ch = 0; ch < 0x110000; ch++)
+    if (predicate (ch))
+      {
+       fprintf (stream, "0x%04X\n", ch);
+      }
+#else
+  for (ch = 0; ch < 0x110000; ch++)
+    if (predicate (ch))
+      {
+       unsigned int first = ch;
+       unsigned int last;
+
+       while (ch + 1 < 0x110000 && predicate (ch + 1))
+         ch++;
+       last = ch;
+       if (first < last)
+         fprintf (stream, "0x%04X..0x%04X\n", first, last);
+       else
+         fprintf (stream, "0x%04X\n", ch);
+      }
+#endif
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+/* Output the unit test for a boolean property.  */
+static void
+output_predicate_test (const char *filename, bool (*predicate) (unsigned int), const char *expression)
+{
+  FILE *stream;
+  bool need_comma;
+  unsigned int ch;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
+  fprintf (stream, "/* Test the Unicode character type functions.\n");
+  fprintf (stream, "   Copyright (C) 2007 Free Software Foundation, Inc.\n");
+  fprintf (stream, "\n");
+  fprintf (stream, "   This program is free software: you can redistribute it and/or modify\n");
+  fprintf (stream, "   it under the terms of the GNU General Public License as published by\n");
+  fprintf (stream, "   the Free Software Foundation; either version 3 of the License, or\n");
+  fprintf (stream, "   (at your option) any later version.\n");
+  fprintf (stream, "\n");
+  fprintf (stream, "   This program is distributed in the hope that it will be useful,\n");
+  fprintf (stream, "   but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
+  fprintf (stream, "   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n");
+  fprintf (stream, "   GNU General Public License for more details.\n");
+  fprintf (stream, "\n");
+  fprintf (stream, "   You should have received a copy of the GNU General Public License\n");
+  fprintf (stream, "   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */\n");
+  fprintf (stream, "\n");
+  fprintf (stream, "#include \"test-predicate-part1.h\"\n");
+  fprintf (stream, "\n");
+
+  need_comma = false;
+  for (ch = 0; ch < 0x110000; ch++)
+    if (predicate (ch))
+      {
+       unsigned int first = ch;
+       unsigned int last;
+
+       while (ch + 1 < 0x110000 && predicate (ch + 1))
+         ch++;
+       last = ch;
+       if (need_comma)
+         fprintf (stream, ",\n");
+       fprintf (stream, "    { 0x%04X, 0x%04X }", first, last);
+       need_comma = true;
+      }
+  if (need_comma)
+    fprintf (stream, "\n");
+
+  fprintf (stream, "\n");
+  fprintf (stream, "#define PREDICATE(c) %s\n", expression);
+  fprintf (stream, "#include \"test-predicate-part2.h\"\n");
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+/* Construction of sparse 3-level tables.  */
+#define TABLE predicate_table
+#define xmalloc malloc
+#define xrealloc realloc
+#include "3levelbit.h"
+
+/* Output a boolean property in a three-level bitmap.  */
+static void
+output_predicate (const char *filename, bool (*predicate) (unsigned int), const char *name, const char *comment, const char *version)
+{
+  FILE *stream;
+  unsigned int ch, i;
+  struct predicate_table t;
+  unsigned int level1_offset, level2_offset, level3_offset;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
+  fprintf (stream, "/* %s of Unicode characters.  */\n", comment);
+  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
+          version);
+
+  t.p = 4; /* or: 5 */
+  t.q = 7; /* or: 6 */
+  predicate_table_init (&t);
+
+  for (ch = 0; ch < 0x110000; ch++)
+    if (predicate (ch))
+      predicate_table_add (&t, ch);
+
+  predicate_table_finalize (&t);
+
+  /* Offsets in t.result, in memory of this process.  */
+  level1_offset =
+    5 * sizeof (uint32_t);
+  level2_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t);
+  level3_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t)
+    + (t.level2_size << t.q) * sizeof (uint32_t);
+
+  for (i = 0; i < 5; i++)
+    if (i != 1)
+      fprintf (stream, "#define header_%d %d\n", i,
+              ((uint32_t *) t.result)[i]);
+
+  fprintf (stream, "static const\n");
+  fprintf (stream, "struct\n");
+  fprintf (stream, "  {\n");
+  fprintf (stream, "    int header[1];\n");
+  fprintf (stream, "    int level1[%zu];\n", t.level1_size);
+  fprintf (stream, "    short level2[%zu << %d];\n", t.level2_size, t.q);
+  fprintf (stream, "    /*unsigned*/ int level3[%zu << %d];\n", t.level3_size, t.p);
+  fprintf (stream, "  }\n");
+  fprintf (stream, "%s =\n", name);
+  fprintf (stream, "{\n");
+  fprintf (stream, "  { %d },\n", ((uint32_t *) t.result)[1]);
+  fprintf (stream, "  {");
+  if (t.level1_size > 1)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level1_size; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 1) == 0)
+       fprintf (stream, "\n   ");
+      offset = ((uint32_t *) (t.result + level1_offset))[i];
+      if (offset == 0)
+       fprintf (stream, " %5d", -1);
+      else
+       fprintf (stream, " %5zd * sizeof (int) / sizeof (short) + %5zd",
+                1 + t.level1_size, (offset - level2_offset) / sizeof (uint32_t));
+      if (i+1 < t.level1_size)
+        fprintf (stream, ",");
+    }
+  if (t.level1_size > 1)
+    fprintf (stream, "\n ");
+  fprintf (stream, " },\n");
+  fprintf (stream, "  {");
+  if (t.level2_size << t.q > 1)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level2_size << t.q; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 1) == 0)
+       fprintf (stream, "\n   ");
+      offset = ((uint32_t *) (t.result + level2_offset))[i];
+      if (offset == 0)
+       fprintf (stream, " %5d", -1);
+      else
+       fprintf (stream, " %5zd + %5zd * sizeof (short) / sizeof (int) + %5zd",
+                1 + t.level1_size, t.level2_size << t.q, (offset - level3_offset) / sizeof (uint32_t));
+      if (i+1 < t.level2_size << t.q)
+       fprintf (stream, ",");
+    }
+  if (t.level2_size << t.q > 1)
+    fprintf (stream, "\n ");
+  fprintf (stream, " },\n");
+  fprintf (stream, "  {");
+  if (t.level3_size << t.p > 4)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level3_size << t.p; i++)
+    {
+      if (i > 0 && (i % 4) == 0)
+       fprintf (stream, "\n   ");
+      fprintf (stream, " 0x%08X",
+              ((uint32_t *) (t.result + level3_offset))[i]);
+      if (i+1 < t.level3_size << t.p)
+       fprintf (stream, ",");
+    }
+  if (t.level3_size << t.p > 4)
+    fprintf (stream, "\n ");
+  fprintf (stream, " }\n");
+  fprintf (stream, "};\n");
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+/* Output all categories.  */
+static void
+output_categories (const char *version)
+{
+#define CATEGORY(C) \
+  debug_output_predicate ("unictype/categ_" #C ".txt", is_category_ ## C); \
+  output_predicate_test ("../tests/unictype/test-categ_" #C ".c", is_category_ ## C, "uc_is_general_category (c, UC_CATEGORY_" #C ")"); \
+  output_predicate ("unictype/categ_" #C ".h", is_category_ ## C, "u_categ_" #C, "Categories", version);
+  CATEGORY (L)
+  CATEGORY (Lu)
+  CATEGORY (Ll)
+  CATEGORY (Lt)
+  CATEGORY (Lm)
+  CATEGORY (Lo)
+  CATEGORY (M)
+  CATEGORY (Mn)
+  CATEGORY (Mc)
+  CATEGORY (Me)
+  CATEGORY (N)
+  CATEGORY (Nd)
+  CATEGORY (Nl)
+  CATEGORY (No)
+  CATEGORY (P)
+  CATEGORY (Pc)
+  CATEGORY (Pd)
+  CATEGORY (Ps)
+  CATEGORY (Pe)
+  CATEGORY (Pi)
+  CATEGORY (Pf)
+  CATEGORY (Po)
+  CATEGORY (S)
+  CATEGORY (Sm)
+  CATEGORY (Sc)
+  CATEGORY (Sk)
+  CATEGORY (So)
+  CATEGORY (Z)
+  CATEGORY (Zs)
+  CATEGORY (Zl)
+  CATEGORY (Zp)
+  CATEGORY (C)
+  CATEGORY (Cc)
+  CATEGORY (Cf)
+  CATEGORY (Cs)
+  CATEGORY (Co)
+  CATEGORY (Cn)
+#undef CATEGORY
+}
+
+enum
+{
+  UC_CATEGORY_MASK_L  = 0x0000001f,
+  UC_CATEGORY_MASK_Lu = 0x00000001,
+  UC_CATEGORY_MASK_Ll = 0x00000002,
+  UC_CATEGORY_MASK_Lt = 0x00000004,
+  UC_CATEGORY_MASK_Lm = 0x00000008,
+  UC_CATEGORY_MASK_Lo = 0x00000010,
+  UC_CATEGORY_MASK_M  = 0x000000e0,
+  UC_CATEGORY_MASK_Mn = 0x00000020,
+  UC_CATEGORY_MASK_Mc = 0x00000040,
+  UC_CATEGORY_MASK_Me = 0x00000080,
+  UC_CATEGORY_MASK_N  = 0x00000700,
+  UC_CATEGORY_MASK_Nd = 0x00000100,
+  UC_CATEGORY_MASK_Nl = 0x00000200,
+  UC_CATEGORY_MASK_No = 0x00000400,
+  UC_CATEGORY_MASK_P  = 0x0003f800,
+  UC_CATEGORY_MASK_Pc = 0x00000800,
+  UC_CATEGORY_MASK_Pd = 0x00001000,
+  UC_CATEGORY_MASK_Ps = 0x00002000,
+  UC_CATEGORY_MASK_Pe = 0x00004000,
+  UC_CATEGORY_MASK_Pi = 0x00008000,
+  UC_CATEGORY_MASK_Pf = 0x00010000,
+  UC_CATEGORY_MASK_Po = 0x00020000,
+  UC_CATEGORY_MASK_S  = 0x003c0000,
+  UC_CATEGORY_MASK_Sm = 0x00040000,
+  UC_CATEGORY_MASK_Sc = 0x00080000,
+  UC_CATEGORY_MASK_Sk = 0x00100000,
+  UC_CATEGORY_MASK_So = 0x00200000,
+  UC_CATEGORY_MASK_Z  = 0x01c00000,
+  UC_CATEGORY_MASK_Zs = 0x00400000,
+  UC_CATEGORY_MASK_Zl = 0x00800000,
+  UC_CATEGORY_MASK_Zp = 0x01000000,
+  UC_CATEGORY_MASK_C  = 0x3e000000,
+  UC_CATEGORY_MASK_Cc = 0x02000000,
+  UC_CATEGORY_MASK_Cf = 0x04000000,
+  UC_CATEGORY_MASK_Cs = 0x08000000,
+  UC_CATEGORY_MASK_Co = 0x10000000,
+  UC_CATEGORY_MASK_Cn = 0x20000000
+};
+
+static int
+general_category_byname (const char *category_name)
+{
+  if (category_name[0] != '\0'
+      && (category_name[1] == '\0' || category_name[2] == '\0'))
+    switch (category_name[0])
+      {
+      case 'L':
+       switch (category_name[1])
+         {
+         case '\0': return UC_CATEGORY_MASK_L;
+         case 'u': return UC_CATEGORY_MASK_Lu;
+         case 'l': return UC_CATEGORY_MASK_Ll;
+         case 't': return UC_CATEGORY_MASK_Lt;
+         case 'm': return UC_CATEGORY_MASK_Lm;
+         case 'o': return UC_CATEGORY_MASK_Lo;
+         }
+       break;
+      case 'M':
+       switch (category_name[1])
+         {
+         case '\0': return UC_CATEGORY_MASK_M;
+         case 'n': return UC_CATEGORY_MASK_Mn;
+         case 'c': return UC_CATEGORY_MASK_Mc;
+         case 'e': return UC_CATEGORY_MASK_Me;
+         }
+       break;
+      case 'N':
+       switch (category_name[1])
+         {
+         case '\0': return UC_CATEGORY_MASK_N;
+         case 'd': return UC_CATEGORY_MASK_Nd;
+         case 'l': return UC_CATEGORY_MASK_Nl;
+         case 'o': return UC_CATEGORY_MASK_No;
+         }
+       break;
+      case 'P':
+       switch (category_name[1])
+         {
+         case '\0': return UC_CATEGORY_MASK_P;
+         case 'c': return UC_CATEGORY_MASK_Pc;
+         case 'd': return UC_CATEGORY_MASK_Pd;
+         case 's': return UC_CATEGORY_MASK_Ps;
+         case 'e': return UC_CATEGORY_MASK_Pe;
+         case 'i': return UC_CATEGORY_MASK_Pi;
+         case 'f': return UC_CATEGORY_MASK_Pf;
+         case 'o': return UC_CATEGORY_MASK_Po;
+         }
+       break;
+      case 'S':
+       switch (category_name[1])
+         {
+         case '\0': return UC_CATEGORY_MASK_S;
+         case 'm': return UC_CATEGORY_MASK_Sm;
+         case 'c': return UC_CATEGORY_MASK_Sc;
+         case 'k': return UC_CATEGORY_MASK_Sk;
+         case 'o': return UC_CATEGORY_MASK_So;
+         }
+       break;
+      case 'Z':
+       switch (category_name[1])
+         {
+         case '\0': return UC_CATEGORY_MASK_Z;
+         case 's': return UC_CATEGORY_MASK_Zs;
+         case 'l': return UC_CATEGORY_MASK_Zl;
+         case 'p': return UC_CATEGORY_MASK_Zp;
+         }
+       break;
+      case 'C':
+       switch (category_name[1])
+         {
+         case '\0': return UC_CATEGORY_MASK_C;
+         case 'c': return UC_CATEGORY_MASK_Cc;
+         case 'f': return UC_CATEGORY_MASK_Cf;
+         case 's': return UC_CATEGORY_MASK_Cs;
+         case 'o': return UC_CATEGORY_MASK_Co;
+         case 'n': return UC_CATEGORY_MASK_Cn;
+         }
+       break;
+      }
+  /* Invalid category name.  */
+  abort ();
+}
+
+/* Construction of sparse 3-level tables.  */
+#define TABLE category_table
+#define ELEMENT uint8_t
+#define DEFAULT 29 /* = log2(UC_CATEGORY_MASK_Cn) */
+#define xmalloc malloc
+#define xrealloc realloc
+#include "3level.h"
+
+/* Output the per-character category table.  */
+static void
+output_category (const char *filename, const char *version)
+{
+  FILE *stream;
+  unsigned int ch, i;
+  struct category_table t;
+  unsigned int level1_offset, level2_offset, level3_offset;
+  uint16_t *level3_packed;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
+  fprintf (stream, "/* Categories of Unicode characters.  */\n");
+  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
+          version);
+
+  t.p = 7;
+  t.q = 9;
+  category_table_init (&t);
+
+  for (ch = 0; ch < 0x110000; ch++)
+    {
+      int value;
+      unsigned int log2_value;
+
+      if (is_category_Cs (ch))
+       value = UC_CATEGORY_MASK_Cs;
+      else if (unicode_attributes[ch].name != NULL)
+       value = general_category_byname (unicode_attributes[ch].category);
+      else
+       continue;
+
+      /* Now value should contain exactly one bit.  */
+      if (value == 0 || ((value & (value - 1)) != 0))
+       abort ();
+
+      for (log2_value = 0; value > 1; value >>= 1, log2_value++);
+
+      category_table_add (&t, ch, log2_value);
+    }
+
+  category_table_finalize (&t);
+
+  /* Offsets in t.result, in memory of this process.  */
+  level1_offset =
+    5 * sizeof (uint32_t);
+  level2_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t);
+  level3_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t)
+    + (t.level2_size << t.q) * sizeof (uint32_t);
+
+  for (i = 0; i < 5; i++)
+    fprintf (stream, "#define category_header_%d %d\n", i,
+            ((uint32_t *) t.result)[i]);
+  fprintf (stream, "static const\n");
+  fprintf (stream, "struct\n");
+  fprintf (stream, "  {\n");
+  fprintf (stream, "    int level1[%zu];\n", t.level1_size);
+  fprintf (stream, "    short level2[%zu << %d];\n", t.level2_size, t.q);
+  fprintf (stream, "    unsigned short level3[%zu * %d + 1];\n", t.level3_size,
+          (1 << t.p) * 5 / 16);
+  fprintf (stream, "  }\n");
+  fprintf (stream, "u_category =\n");
+  fprintf (stream, "{\n");
+  fprintf (stream, "  {");
+  if (t.level1_size > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level1_size; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      offset = ((uint32_t *) (t.result + level1_offset))[i];
+      if (offset == 0)
+       fprintf (stream, " %5d", -1);
+      else
+       fprintf (stream, " %5zd",
+                (offset - level2_offset) / sizeof (uint32_t));
+      if (i+1 < t.level1_size)
+       fprintf (stream, ",");
+    }
+  if (t.level1_size > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " },\n");
+  fprintf (stream, "  {");
+  if (t.level2_size << t.q > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level2_size << t.q; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      offset = ((uint32_t *) (t.result + level2_offset))[i];
+      if (offset == 0)
+       fprintf (stream, " %5d", -1);
+      else
+       fprintf (stream, " %5zd",
+                (offset - level3_offset) / sizeof (uint8_t));
+      if (i+1 < t.level2_size << t.q)
+       fprintf (stream, ",");
+    }
+  if (t.level2_size << t.q > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " },\n");
+  /* Pack the level3 array.  Each entry needs 5 bits only.  Use 16-bit units,
+     not 32-bit units, in order to make the lookup function easier.  */
+  level3_packed =
+    (uint16_t *)
+    calloc ((t.level3_size << t.p) * 5 / 16 + 1, sizeof (uint16_t));
+  for (i = 0; i < t.level3_size << t.p; i++)
+    {
+      unsigned int j = (i * 5) / 16;
+      unsigned int k = (i * 5) % 16;
+      uint32_t value = ((unsigned char *) (t.result + level3_offset))[i];
+      value = level3_packed[j] | (level3_packed[j+1] << 16) | (value << k);
+      level3_packed[j] = value & 0xffff;
+      level3_packed[j+1] = value >> 16;
+    }
+  fprintf (stream, "  {");
+  if ((t.level3_size << t.p) * 5 / 16 + 1 > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < (t.level3_size << t.p) * 5 / 16 + 1; i++)
+    {
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      fprintf (stream, " 0x%04x", level3_packed[i]);
+      if (i+1 < (t.level3_size << t.p) * 5 / 16 + 1)
+       fprintf (stream, ",");
+    }
+  if ((t.level3_size << t.p) * 5 / 16 + 1 > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " }\n");
+  free (level3_packed);
+  fprintf (stream, "};\n");
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+/* ========================================================================= */
+
+/* Canonical combining class.  */
+/* See Unicode 3.0 book, section 4.2,
+       UCD.html.  */
+
+/* Construction of sparse 3-level tables.  */
+#define TABLE combclass_table
+#define ELEMENT uint8_t
+#define DEFAULT 0
+#define xmalloc malloc
+#define xrealloc realloc
+#include "3level.h"
+
+/* Output the per-character combining class table.  */
+static void
+output_combclass (const char *filename, const char *version)
+{
+  FILE *stream;
+  unsigned int ch, i;
+  struct combclass_table t;
+  unsigned int level1_offset, level2_offset, level3_offset;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
+  fprintf (stream, "/* Combining class of Unicode characters.  */\n");
+  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
+          version);
+
+  t.p = 7;
+  t.q = 9;
+  combclass_table_init (&t);
+
+  for (ch = 0; ch < 0x110000; ch++)
+    if (unicode_attributes[ch].name != NULL)
+      {
+       int value = atoi (unicode_attributes[ch].combining);
+        if (!(value >= 0 && value <= 255))
+         abort ();
+       combclass_table_add (&t, ch, value);
+      }
+
+  combclass_table_finalize (&t);
+
+  /* Offsets in t.result, in memory of this process.  */
+  level1_offset =
+    5 * sizeof (uint32_t);
+  level2_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t);
+  level3_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t)
+    + (t.level2_size << t.q) * sizeof (uint32_t);
+
+  for (i = 0; i < 5; i++)
+    fprintf (stream, "#define combclass_header_%d %d\n", i,
+            ((uint32_t *) t.result)[i]);
+  fprintf (stream, "static const\n");
+  fprintf (stream, "struct\n");
+  fprintf (stream, "  {\n");
+  fprintf (stream, "    int level1[%zu];\n", t.level1_size);
+  fprintf (stream, "    short level2[%zu << %d];\n", t.level2_size, t.q);
+  fprintf (stream, "    unsigned char level3[%zu << %d];\n", t.level3_size, t.p);
+  fprintf (stream, "  }\n");
+  fprintf (stream, "u_combclass =\n");
+  fprintf (stream, "{\n");
+  fprintf (stream, "  {");
+  if (t.level1_size > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level1_size; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      offset = ((uint32_t *) (t.result + level1_offset))[i];
+      if (offset == 0)
+       fprintf (stream, " %5d", -1);
+      else
+       fprintf (stream, " %5zd",
+                (offset - level2_offset) / sizeof (uint32_t));
+      if (i+1 < t.level1_size)
+       fprintf (stream, ",");
+    }
+  if (t.level1_size > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " },\n");
+  fprintf (stream, "  {");
+  if (t.level2_size << t.q > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level2_size << t.q; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      offset = ((uint32_t *) (t.result + level2_offset))[i];
+      if (offset == 0)
+       fprintf (stream, " %5d", -1);
+      else
+       fprintf (stream, " %5zd",
+                (offset - level3_offset) / sizeof (uint8_t));
+      if (i+1 < t.level2_size << t.q)
+       fprintf (stream, ",");
+    }
+  if (t.level2_size << t.q > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " },\n");
+  fprintf (stream, "  {");
+  if (t.level3_size << t.p > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level3_size << t.p; i++)
+    {
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      fprintf (stream, " %3d", ((uint8_t *) (t.result + level3_offset))[i]);
+      if (i+1 < t.level3_size << t.p)
+       fprintf (stream, ",");
+    }
+  if (t.level3_size << t.p > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " }\n");
+  fprintf (stream, "};\n");
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+/* ========================================================================= */
+
+/* Bidirectional category.  */
+/* See Unicode 3.0 book, section 4.3,
+       UCD.html.  */
+
+enum
+{
+  UC_BIDI_L,   /* Left-to-Right */
+  UC_BIDI_LRE, /* Left-to-Right Embedding */
+  UC_BIDI_LRO, /* Left-to-Right Override */
+  UC_BIDI_R,   /* Right-to-Left */
+  UC_BIDI_AL,  /* Right-to-Left Arabic */
+  UC_BIDI_RLE, /* Right-to-Left Embedding */
+  UC_BIDI_RLO, /* Right-to-Left Override */
+  UC_BIDI_PDF, /* Pop Directional Format */
+  UC_BIDI_EN,  /* European Number */
+  UC_BIDI_ES,  /* European Number Separator */
+  UC_BIDI_ET,  /* European Number Terminator */
+  UC_BIDI_AN,  /* Arabic Number */
+  UC_BIDI_CS,  /* Common Number Separator */
+  UC_BIDI_NSM, /* Non-Spacing Mark */
+  UC_BIDI_BN,  /* Boundary Neutral */
+  UC_BIDI_B,   /* Paragraph Separator */
+  UC_BIDI_S,   /* Segment Separator */
+  UC_BIDI_WS,  /* Whitespace */
+  UC_BIDI_ON   /* Other Neutral */
+};
+
+static int
+bidi_category_byname (const char *category_name)
+{
+  switch (category_name[0])
+    {
+    case 'A':
+      switch (category_name[1])
+       {
+       case 'L':
+         if (category_name[2] == '\0')
+           return UC_BIDI_AL;
+         break;
+       case 'N':
+         if (category_name[2] == '\0')
+           return UC_BIDI_AN;
+         break;
+       }
+      break;
+    case 'B':
+      switch (category_name[1])
+       {
+       case '\0':
+         return UC_BIDI_B;
+       case 'N':
+         if (category_name[2] == '\0')
+           return UC_BIDI_BN;
+         break;
+       }
+      break;
+    case 'C':
+      switch (category_name[1])
+       {
+       case 'S':
+         if (category_name[2] == '\0')
+           return UC_BIDI_CS;
+         break;
+       }
+      break;
+    case 'E':
+      switch (category_name[1])
+       {
+       case 'N':
+         if (category_name[2] == '\0')
+           return UC_BIDI_EN;
+         break;
+       case 'S':
+         if (category_name[2] == '\0')
+           return UC_BIDI_ES;
+         break;
+       case 'T':
+         if (category_name[2] == '\0')
+           return UC_BIDI_ET;
+         break;
+       }
+      break;
+    case 'L':
+      switch (category_name[1])
+       {
+       case '\0':
+         return UC_BIDI_L;
+       case 'R':
+         switch (category_name[2])
+           {
+           case 'E':
+             if (category_name[3] == '\0')
+               return UC_BIDI_LRE;
+             break;
+           case 'O':
+             if (category_name[3] == '\0')
+               return UC_BIDI_LRO;
+             break;
+           }
+         break;
+       }
+      break;
+    case 'N':
+      switch (category_name[1])
+       {
+       case 'S':
+         switch (category_name[2])
+           {
+           case 'M':
+             if (category_name[3] == '\0')
+               return UC_BIDI_NSM;
+             break;
+           }
+         break;
+       }
+      break;
+    case 'O':
+      switch (category_name[1])
+       {
+       case 'N':
+         if (category_name[2] == '\0')
+           return UC_BIDI_ON;
+         break;
+       }
+      break;
+    case 'P':
+      switch (category_name[1])
+       {
+       case 'D':
+         switch (category_name[2])
+           {
+           case 'F':
+             if (category_name[3] == '\0')
+               return UC_BIDI_PDF;
+             break;
+           }
+         break;
+       }
+      break;
+    case 'R':
+      switch (category_name[1])
+       {
+       case '\0':
+         return UC_BIDI_R;
+       case 'L':
+         switch (category_name[2])
+           {
+           case 'E':
+             if (category_name[3] == '\0')
+               return UC_BIDI_RLE;
+             break;
+           case 'O':
+             if (category_name[3] == '\0')
+               return UC_BIDI_RLO;
+             break;
+           }
+         break;
+       }
+      break;
+    case 'S':
+      if (category_name[1] == '\0')
+       return UC_BIDI_S;
+      break;
+    case 'W':
+      switch (category_name[1])
+       {
+       case 'S':
+         if (category_name[2] == '\0')
+           return UC_BIDI_WS;
+         break;
+       }
+      break;
+    }
+  /* Invalid bidi category name.  */
+  abort ();
+}
+
+static int
+get_bidi_category (unsigned int ch)
+{
+  if (unicode_attributes[ch].name != NULL)
+    return bidi_category_byname (unicode_attributes[ch].bidi);
+  else
+    {
+      /* The bidi category of unassigned characters depends on the range.
+        See UTR #9 and DerivedBidiClass.txt.  */
+      if ((ch >= 0x0590 && ch <= 0x05FF)
+         || (ch >= 0x07FB && ch <= 0x08FF)
+         || (ch >= 0xFB37 && ch <= 0xFB45)
+         || (ch >= 0x10800 && ch <= 0x10FFF))
+       return UC_BIDI_R;
+      else if ((ch >= 0x0600 && ch <= 0x07BF)
+              || (ch >= 0x2064 && ch <= 0x2069)
+              || (ch >= 0xFBB2 && ch <= 0xFDCF)
+              || (ch >= 0xFDFE && ch <= 0xFEFE))
+       return UC_BIDI_AL;
+      else if ((ch >= 0xFDD0 && ch <= 0xFDEF)
+              || (ch >= 0xFFF0 && ch <= 0xFFFF)
+              || (ch & 0xFFFF) == 0xFFFE
+              || (ch & 0xFFFF) == 0xFFFF
+              || (ch >= 0xE0000 && ch <= 0xE0FFF))
+       return UC_BIDI_BN;
+      else
+       return UC_BIDI_L;
+    }
+}
+
+/* Construction of sparse 3-level tables.  */
+#define TABLE bidi_category_table
+#define ELEMENT uint8_t
+#define DEFAULT UC_BIDI_L
+#define xmalloc malloc
+#define xrealloc realloc
+#include "3level.h"
+
+/* Output the per-character bidi category table.  */
+static void
+output_bidi_category (const char *filename, const char *version)
+{
+  FILE *stream;
+  unsigned int ch, i;
+  struct bidi_category_table t;
+  unsigned int level1_offset, level2_offset, level3_offset;
+  uint16_t *level3_packed;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
+  fprintf (stream, "/* Bidi categories of Unicode characters.  */\n");
+  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
+          version);
+
+  t.p = 7;
+  t.q = 9;
+  bidi_category_table_init (&t);
+
+  for (ch = 0; ch < 0x110000; ch++)
+    {
+      int value = get_bidi_category (ch);
+
+      bidi_category_table_add (&t, ch, value);
+    }
+
+  bidi_category_table_finalize (&t);
+
+  /* Offsets in t.result, in memory of this process.  */
+  level1_offset =
+    5 * sizeof (uint32_t);
+  level2_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t);
+  level3_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t)
+    + (t.level2_size << t.q) * sizeof (uint32_t);
+
+  for (i = 0; i < 5; i++)
+    fprintf (stream, "#define bidi_category_header_%d %d\n", i,
+            ((uint32_t *) t.result)[i]);
+  fprintf (stream, "static const\n");
+  fprintf (stream, "struct\n");
+  fprintf (stream, "  {\n");
+  fprintf (stream, "    int level1[%zu];\n", t.level1_size);
+  fprintf (stream, "    short level2[%zu << %d];\n", t.level2_size, t.q);
+  fprintf (stream, "    unsigned short level3[%zu * %d + 1];\n", t.level3_size,
+          (1 << t.p) * 5 / 16);
+  fprintf (stream, "  }\n");
+  fprintf (stream, "u_bidi_category =\n");
+  fprintf (stream, "{\n");
+  fprintf (stream, "  {");
+  if (t.level1_size > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level1_size; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      offset = ((uint32_t *) (t.result + level1_offset))[i];
+      if (offset == 0)
+       fprintf (stream, " %5d", -1);
+      else
+       fprintf (stream, " %5zd",
+                (offset - level2_offset) / sizeof (uint32_t));
+      if (i+1 < t.level1_size)
+       fprintf (stream, ",");
+    }
+  if (t.level1_size > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " },\n");
+  fprintf (stream, "  {");
+  if (t.level2_size << t.q > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level2_size << t.q; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      offset = ((uint32_t *) (t.result + level2_offset))[i];
+      if (offset == 0)
+       fprintf (stream, " %5d", -1);
+      else
+       fprintf (stream, " %5zd",
+                (offset - level3_offset) / sizeof (uint8_t));
+      if (i+1 < t.level2_size << t.q)
+       fprintf (stream, ",");
+    }
+  if (t.level2_size << t.q > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " },\n");
+  /* Pack the level3 array.  Each entry needs 5 bits only.  Use 16-bit units,
+     not 32-bit units, in order to make the lookup function easier.  */
+  level3_packed =
+    (uint16_t *)
+    calloc ((t.level3_size << t.p) * 5 / 16 + 1, sizeof (uint16_t));
+  for (i = 0; i < t.level3_size << t.p; i++)
+    {
+      unsigned int j = (i * 5) / 16;
+      unsigned int k = (i * 5) % 16;
+      uint32_t value = ((unsigned char *) (t.result + level3_offset))[i];
+      value = level3_packed[j] | (level3_packed[j+1] << 16) | (value << k);
+      level3_packed[j] = value & 0xffff;
+      level3_packed[j+1] = value >> 16;
+    }
+  fprintf (stream, "  {");
+  if ((t.level3_size << t.p) * 5 / 16 + 1 > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < (t.level3_size << t.p) * 5 / 16 + 1; i++)
+    {
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      fprintf (stream, " 0x%04x", level3_packed[i]);
+      if (i+1 < (t.level3_size << t.p) * 5 / 16 + 1)
+       fprintf (stream, ",");
+    }
+  if ((t.level3_size << t.p) * 5 / 16 + 1 > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " }\n");
+  free (level3_packed);
+  fprintf (stream, "};\n");
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+/* ========================================================================= */
+
+/* Decimal digit value.  */
+/* See Unicode 3.0 book, section 4.6.  */
+
+static int
+get_decdigit_value (unsigned int ch)
+{
+  if (unicode_attributes[ch].name != NULL
+      && unicode_attributes[ch].decdigit[0] != '\0')
+    return atoi (unicode_attributes[ch].decdigit);
+  return -1;
+}
+
+/* Construction of sparse 3-level tables.  */
+#define TABLE decdigit_table
+#define ELEMENT uint8_t
+#define DEFAULT 0
+#define xmalloc malloc
+#define xrealloc realloc
+#include "3level.h"
+
+/* Output the unit test for the per-character decimal digit value table.  */
+static void
+output_decimal_digit_test (const char *filename, const char *version)
+{
+  FILE *stream;
+  bool need_comma;
+  unsigned int ch;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
+  fprintf (stream, "/* Decimal digit values of Unicode characters.  */\n");
+  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
+          version);
+
+  need_comma = false;
+  for (ch = 0; ch < 0x110000; ch++)
+    {
+      int value = get_decdigit_value (ch);
+
+      if (!(value >= -1 && value < 10))
+       abort ();
+
+      if (value >= 0)
+       {
+         if (need_comma)
+           fprintf (stream, ",\n");
+         fprintf (stream, "    { 0x%04X, %d }", ch, value);
+         need_comma = true;
+       }
+    }
+  if (need_comma)
+    fprintf (stream, "\n");
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+/* Output the per-character decimal digit value table.  */
+static void
+output_decimal_digit (const char *filename, const char *version)
+{
+  FILE *stream;
+  unsigned int ch, i;
+  struct decdigit_table t;
+  unsigned int level1_offset, level2_offset, level3_offset;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
+  fprintf (stream, "/* Decimal digit values of Unicode characters.  */\n");
+  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
+          version);
+
+  t.p = 7;
+  t.q = 9;
+  decdigit_table_init (&t);
+
+  for (ch = 0; ch < 0x110000; ch++)
+    {
+      int value = 1 + get_decdigit_value (ch);
+
+      if (!(value >= 0 && value <= 10))
+       abort ();
+
+      decdigit_table_add (&t, ch, value);
+    }
+
+  decdigit_table_finalize (&t);
+
+  /* Offsets in t.result, in memory of this process.  */
+  level1_offset =
+    5 * sizeof (uint32_t);
+  level2_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t);
+  level3_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t)
+    + (t.level2_size << t.q) * sizeof (uint32_t);
+
+  for (i = 0; i < 5; i++)
+    fprintf (stream, "#define decdigit_header_%d %d\n", i,
+            ((uint32_t *) t.result)[i]);
+  fprintf (stream, "static const\n");
+  fprintf (stream, "struct\n");
+  fprintf (stream, "  {\n");
+  fprintf (stream, "    int level1[%zu];\n", t.level1_size);
+  fprintf (stream, "    short level2[%zu << %d];\n", t.level2_size, t.q);
+  fprintf (stream, "    unsigned char level3[%zu << %d];\n", t.level3_size,
+          t.p - 1);
+  fprintf (stream, "  }\n");
+  fprintf (stream, "u_decdigit =\n");
+  fprintf (stream, "{\n");
+  fprintf (stream, "  {");
+  if (t.level1_size > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level1_size; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      offset = ((uint32_t *) (t.result + level1_offset))[i];
+      if (offset == 0)
+       fprintf (stream, " %5d", -1);
+      else
+       fprintf (stream, " %5zd",
+                (offset - level2_offset) / sizeof (uint32_t));
+      if (i+1 < t.level1_size)
+       fprintf (stream, ",");
+    }
+  if (t.level1_size > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " },\n");
+  fprintf (stream, "  {");
+  if (t.level2_size << t.q > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level2_size << t.q; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      offset = ((uint32_t *) (t.result + level2_offset))[i];
+      if (offset == 0)
+       fprintf (stream, " %5d", -1);
+      else
+       fprintf (stream, " %5zd",
+                (offset - level3_offset) / sizeof (uint8_t));
+      if (i+1 < t.level2_size << t.q)
+       fprintf (stream, ",");
+    }
+  if (t.level2_size << t.q > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " },\n");
+  /* Pack the level3 array.  Each entry needs 4 bits only.  */
+  fprintf (stream, "  {");
+  if (t.level3_size << (t.p - 1) > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level3_size << (t.p - 1); i++)
+    {
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      fprintf (stream, " 0x%02x",
+              ((uint8_t *) (t.result + level3_offset))[2*i]
+              + (((uint8_t *) (t.result + level3_offset))[2*i+1] << 4));
+      if (i+1 < t.level3_size << (t.p - 1))
+       fprintf (stream, ",");
+    }
+  if (t.level3_size << (t.p - 1) > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " }\n");
+  fprintf (stream, "};\n");
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+/* ========================================================================= */
+
+/* Digit value.  */
+/* See Unicode 3.0 book, section 4.6.  */
+
+static int
+get_digit_value (unsigned int ch)
+{
+  if (unicode_attributes[ch].name != NULL
+      && unicode_attributes[ch].digit[0] != '\0')
+    return atoi (unicode_attributes[ch].digit);
+  return -1;
+}
+
+/* Output the unit test for the per-character digit value table.  */
+static void
+output_digit_test (const char *filename, const char *version)
+{
+  FILE *stream;
+  bool need_comma;
+  unsigned int ch;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
+  fprintf (stream, "/* Digit values of Unicode characters.  */\n");
+  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
+          version);
+
+  need_comma = false;
+  for (ch = 0; ch < 0x110000; ch++)
+    {
+      int value = get_digit_value (ch);
+
+      if (!(value >= -1 && value < 10))
+       abort ();
+
+      if (value >= 0)
+       {
+         if (need_comma)
+           fprintf (stream, ",\n");
+         fprintf (stream, "    { 0x%04X, %d }", ch, value);
+         need_comma = true;
+       }
+    }
+  if (need_comma)
+    fprintf (stream, "\n");
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+/* Output the per-character digit value table.  */
+static void
+output_digit (const char *filename, const char *version)
+{
+  FILE *stream;
+  unsigned int ch, i;
+  struct decdigit_table t;
+  unsigned int level1_offset, level2_offset, level3_offset;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
+  fprintf (stream, "/* Digit values of Unicode characters.  */\n");
+  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
+          version);
+
+  t.p = 7;
+  t.q = 9;
+  decdigit_table_init (&t);
+
+  for (ch = 0; ch < 0x110000; ch++)
+    {
+      int value = 1 + get_digit_value (ch);
+
+      if (!(value >= 0 && value <= 10))
+       abort ();
+
+      decdigit_table_add (&t, ch, value);
+    }
+
+  decdigit_table_finalize (&t);
+
+  /* Offsets in t.result, in memory of this process.  */
+  level1_offset =
+    5 * sizeof (uint32_t);
+  level2_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t);
+  level3_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t)
+    + (t.level2_size << t.q) * sizeof (uint32_t);
+
+  for (i = 0; i < 5; i++)
+    fprintf (stream, "#define digit_header_%d %d\n", i,
+            ((uint32_t *) t.result)[i]);
+  fprintf (stream, "static const\n");
+  fprintf (stream, "struct\n");
+  fprintf (stream, "  {\n");
+  fprintf (stream, "    int level1[%zu];\n", t.level1_size);
+  fprintf (stream, "    short level2[%zu << %d];\n", t.level2_size, t.q);
+  fprintf (stream, "    unsigned char level3[%zu << %d];\n", t.level3_size,
+          t.p - 1);
+  fprintf (stream, "  }\n");
+  fprintf (stream, "u_digit =\n");
+  fprintf (stream, "{\n");
+  fprintf (stream, "  {");
+  if (t.level1_size > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level1_size; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      offset = ((uint32_t *) (t.result + level1_offset))[i];
+      if (offset == 0)
+       fprintf (stream, " %5d", -1);
+      else
+       fprintf (stream, " %5zd",
+                (offset - level2_offset) / sizeof (uint32_t));
+      if (i+1 < t.level1_size)
+       fprintf (stream, ",");
+    }
+  if (t.level1_size > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " },\n");
+  fprintf (stream, "  {");
+  if (t.level2_size << t.q > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level2_size << t.q; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      offset = ((uint32_t *) (t.result + level2_offset))[i];
+      if (offset == 0)
+       fprintf (stream, " %5d", -1);
+      else
+       fprintf (stream, " %5zd",
+                (offset - level3_offset) / sizeof (uint8_t));
+      if (i+1 < t.level2_size << t.q)
+       fprintf (stream, ",");
+    }
+  if (t.level2_size << t.q > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " },\n");
+  /* Pack the level3 array.  Each entry needs 4 bits only.  */
+  fprintf (stream, "  {");
+  if (t.level3_size << (t.p - 1) > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level3_size << (t.p - 1); i++)
+    {
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      fprintf (stream, " 0x%02x",
+              ((uint8_t *) (t.result + level3_offset))[2*i]
+              + (((uint8_t *) (t.result + level3_offset))[2*i+1] << 4));
+      if (i+1 < t.level3_size << (t.p - 1))
+       fprintf (stream, ",");
+    }
+  if (t.level3_size << (t.p - 1) > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " }\n");
+  fprintf (stream, "};\n");
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+/* ========================================================================= */
+
+/* Numeric value.  */
+/* See Unicode 3.0 book, section 4.6.  */
+
+typedef struct { int numerator; int denominator; } uc_fraction_t;
+
+static uc_fraction_t
+get_numeric_value (unsigned int ch)
+{
+  uc_fraction_t value;
+
+  if (unicode_attributes[ch].name != NULL
+      && unicode_attributes[ch].numeric[0] != '\0')
+    {
+      const char *str = unicode_attributes[ch].numeric;
+      /* str is of the form "integer" or "integer/posinteger".  */
+      value.numerator = atoi (str);
+      if (strchr (str, '/') != NULL)
+       value.denominator = atoi (strchr (str, '/') + 1);
+      else
+       value.denominator = 1;
+    }
+  else
+    {
+      value.numerator = 0;
+      value.denominator = 0;
+    }
+  return value;
+}
+
+/* Output the unit test for the per-character numeric value table.  */
+static void
+output_numeric_test (const char *filename, const char *version)
+{
+  FILE *stream;
+  bool need_comma;
+  unsigned int ch;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
+  fprintf (stream, "/* Numeric values of Unicode characters.  */\n");
+  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
+          version);
+
+  need_comma = false;
+  for (ch = 0; ch < 0x110000; ch++)
+    {
+      uc_fraction_t value = get_numeric_value (ch);
+
+      if (value.numerator != 0 || value.denominator != 0)
+       {
+         if (need_comma)
+           fprintf (stream, ",\n");
+         fprintf (stream, "    { 0x%04X, %d, %d }",
+                  ch, value.numerator, value.denominator);
+         need_comma = true;
+       }
+    }
+  if (need_comma)
+    fprintf (stream, "\n");
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+/* Construction of sparse 3-level tables.  */
+#define TABLE numeric_table
+#define ELEMENT uint8_t
+#define DEFAULT 0
+#define xmalloc malloc
+#define xrealloc realloc
+#include "3level.h"
+
+/* Output the per-character numeric value table.  */
+static void
+output_numeric (const char *filename, const char *version)
+{
+  FILE *stream;
+  uc_fraction_t fractions[128];
+  unsigned int nfractions;
+  unsigned int ch, i, j;
+  struct numeric_table t;
+  unsigned int level1_offset, level2_offset, level3_offset;
+  uint16_t *level3_packed;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
+  fprintf (stream, "/* Numeric values of Unicode characters.  */\n");
+  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
+          version);
+
+  /* Create table of occurring fractions.  */
+  nfractions = 0;
+  for (ch = 0; ch < 0x110000; ch++)
+    {
+      uc_fraction_t value = get_numeric_value (ch);
+
+      for (i = 0; i < nfractions; i++)
+       if (value.numerator == fractions[i].numerator
+           && value.denominator == fractions[i].denominator)
+         break;
+      if (i == nfractions)
+       {
+         if (nfractions == 128)
+           abort ();
+         for (i = 0; i < nfractions; i++)
+           if (value.denominator < fractions[i].denominator
+               || (value.denominator == fractions[i].denominator
+                   && value.numerator < fractions[i].numerator))
+             break;
+         for (j = nfractions; j > i; j--)
+           fractions[j] = fractions[j - 1];
+         fractions[i] = value;
+         nfractions++;
+       }
+    }
+
+  fprintf (stream, "static const uc_fraction_t u_numeric_values[%d] =\n",
+          nfractions);
+  fprintf (stream, "{\n");
+  for (i = 0; i < nfractions; i++)
+    {
+      fprintf (stream, "  { %d, %d }", fractions[i].numerator,
+              fractions[i].denominator);
+      if (i+1 < nfractions)
+       fprintf (stream, ",");
+      fprintf (stream, "\n");
+    }
+  fprintf (stream, "};\n");
+
+  t.p = 7;
+  t.q = 9;
+  numeric_table_init (&t);
+
+  for (ch = 0; ch < 0x110000; ch++)
+    {
+      uc_fraction_t value = get_numeric_value (ch);
+
+      for (i = 0; i < nfractions; i++)
+       if (value.numerator == fractions[i].numerator
+           && value.denominator == fractions[i].denominator)
+         break;
+      if (i == nfractions)
+       abort ();
+
+      numeric_table_add (&t, ch, i);
+    }
+
+  numeric_table_finalize (&t);
+
+  /* Offsets in t.result, in memory of this process.  */
+  level1_offset =
+    5 * sizeof (uint32_t);
+  level2_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t);
+  level3_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t)
+    + (t.level2_size << t.q) * sizeof (uint32_t);
+
+  for (i = 0; i < 5; i++)
+    fprintf (stream, "#define numeric_header_%d %d\n", i,
+            ((uint32_t *) t.result)[i]);
+  fprintf (stream, "static const\n");
+  fprintf (stream, "struct\n");
+  fprintf (stream, "  {\n");
+  fprintf (stream, "    int level1[%zu];\n", t.level1_size);
+  fprintf (stream, "    short level2[%zu << %d];\n", t.level2_size, t.q);
+  fprintf (stream, "    unsigned short level3[%zu * %d + 1];\n", t.level3_size,
+          (1 << t.p) * 7 / 16);
+  fprintf (stream, "  }\n");
+  fprintf (stream, "u_numeric =\n");
+  fprintf (stream, "{\n");
+  fprintf (stream, "  {");
+  if (t.level1_size > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level1_size; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      offset = ((uint32_t *) (t.result + level1_offset))[i];
+      if (offset == 0)
+       fprintf (stream, " %5d", -1);
+      else
+       fprintf (stream, " %5zd",
+                (offset - level2_offset) / sizeof (uint32_t));
+      if (i+1 < t.level1_size)
+       fprintf (stream, ",");
+    }
+  if (t.level1_size > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " },\n");
+  fprintf (stream, "  {");
+  if (t.level2_size << t.q > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level2_size << t.q; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      offset = ((uint32_t *) (t.result + level2_offset))[i];
+      if (offset == 0)
+       fprintf (stream, " %5d", -1);
+      else
+       fprintf (stream, " %5zd",
+                (offset - level3_offset) / sizeof (uint8_t));
+      if (i+1 < t.level2_size << t.q)
+       fprintf (stream, ",");
+    }
+  if (t.level2_size << t.q > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " },\n");
+  /* Pack the level3 array.  Each entry needs 7 bits only.  Use 16-bit units,
+     not 32-bit units, in order to make the lookup function easier.  */
+  level3_packed =
+    (uint16_t *)
+    calloc ((t.level3_size << t.p) * 7 / 16 + 1, sizeof (uint16_t));
+  for (i = 0; i < t.level3_size << t.p; i++)
+    {
+      unsigned int j = (i * 7) / 16;
+      unsigned int k = (i * 7) % 16;
+      uint32_t value = ((unsigned char *) (t.result + level3_offset))[i];
+      value = level3_packed[j] | (level3_packed[j+1] << 16) | (value << k);
+      level3_packed[j] = value & 0xffff;
+      level3_packed[j+1] = value >> 16;
+    }
+  fprintf (stream, "  {");
+  if ((t.level3_size << t.p) * 7 / 16 + 1 > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < (t.level3_size << t.p) * 7 / 16 + 1; i++)
+    {
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      fprintf (stream, " 0x%04x", level3_packed[i]);
+      if (i+1 < (t.level3_size << t.p) * 7 / 16 + 1)
+       fprintf (stream, ",");
+    }
+  if ((t.level3_size << t.p) * 7 / 16 + 1 > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " }\n");
+  free (level3_packed);
+  fprintf (stream, "};\n");
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+/* ========================================================================= */
+
+/* Mirrored.  */
+/* See Unicode 3.0 book, section 4.7,
+       UAX #9.  */
+
+/* List of mirrored character pairs.  This is a subset of the characters
+   having the BidiMirrored property.  */
+static unsigned int mirror_pairs[][2] =
+{
+  { 0x0028, 0x0029 },
+  { 0x003C, 0x003E },
+  { 0x005B, 0x005D },
+  { 0x007B, 0x007D },
+  { 0x00AB, 0x00BB },
+  { 0x2039, 0x203A },
+  { 0x2045, 0x2046 },
+  { 0x207D, 0x207E },
+  { 0x208D, 0x208E },
+  { 0x2208, 0x220B },
+  { 0x220A, 0x220D },
+  { 0x223C, 0x223D },
+  { 0x2243, 0x22CD },
+  { 0x2252, 0x2253 },
+  { 0x2254, 0x2255 },
+  { 0x2264, 0x2265 },
+  { 0x2266, 0x2267 },
+  { 0x226A, 0x226B },
+  { 0x2276, 0x2277 },
+  { 0x2278, 0x2279 },
+  { 0x227A, 0x227B },
+  { 0x227C, 0x227D },
+  { 0x2282, 0x2283 },
+  { 0x2286, 0x2287 },
+  { 0x228F, 0x2290 },
+  { 0x2291, 0x2292 },
+  { 0x22A2, 0x22A3 },
+  { 0x22B0, 0x22B1 },
+  { 0x22B2, 0x22B3 },
+  { 0x22B4, 0x22B5 },
+  { 0x22B6, 0x22B7 },
+  { 0x22C9, 0x22CA },
+  { 0x22CB, 0x22CC },
+  { 0x22D0, 0x22D1 },
+  { 0x22D6, 0x22D7 },
+  { 0x22D8, 0x22D9 },
+  { 0x22DA, 0x22DB },
+  { 0x22DC, 0x22DD },
+  { 0x22DE, 0x22DF },
+  { 0x22F0, 0x22F1 },
+  { 0x2308, 0x2309 },
+  { 0x230A, 0x230B },
+  { 0x2329, 0x232A },
+  { 0x3008, 0x3009 },
+  { 0x300A, 0x300B },
+  { 0x300C, 0x300D },
+  { 0x300E, 0x300F },
+  { 0x3010, 0x3011 },
+  { 0x3014, 0x3015 },
+  { 0x3016, 0x3017 },
+  { 0x3018, 0x3019 },
+  { 0x301A, 0x301B }
+};
+
+static int
+get_mirror_value (unsigned int ch)
+{
+  bool mirrored;
+  unsigned int mirror_char;
+  unsigned int i;
+
+  mirrored = (unicode_attributes[ch].name != NULL
+             && unicode_attributes[ch].mirrored);
+  mirror_char = 0xfffd;
+  for (i = 0; i < sizeof (mirror_pairs) / sizeof (mirror_pairs[0]); i++)
+    if (ch == mirror_pairs[i][0])
+      {
+       mirror_char = mirror_pairs[i][1];
+       break;
+      }
+    else if (ch == mirror_pairs[i][1])
+      {
+       mirror_char = mirror_pairs[i][0];
+       break;
+      }
+  if (mirrored)
+    return (int) mirror_char - (int) ch;
+  else
+    {
+      if (mirror_char != 0xfffd)
+       abort ();
+      return 0;
+    }
+}
+
+/* Construction of sparse 3-level tables.  */
+#define TABLE mirror_table
+#define ELEMENT int32_t
+#define DEFAULT 0
+#define xmalloc malloc
+#define xrealloc realloc
+#include "3level.h"
+
+/* Output the per-character mirror table.  */
+static void
+output_mirror (const char *filename, const char *version)
+{
+  FILE *stream;
+  unsigned int ch, i;
+  struct mirror_table t;
+  unsigned int level1_offset, level2_offset, level3_offset;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
+  fprintf (stream, "/* Mirrored Unicode characters.  */\n");
+  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
+          version);
+
+  t.p = 7;
+  t.q = 9;
+  mirror_table_init (&t);
+
+  for (ch = 0; ch < 0x110000; ch++)
+    {
+      int value = get_mirror_value (ch);
+
+      mirror_table_add (&t, ch, value);
+    }
+
+  mirror_table_finalize (&t);
+
+  /* Offsets in t.result, in memory of this process.  */
+  level1_offset =
+    5 * sizeof (uint32_t);
+  level2_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t);
+  level3_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t)
+    + (t.level2_size << t.q) * sizeof (uint32_t);
+
+  for (i = 0; i < 5; i++)
+    fprintf (stream, "#define mirror_header_%d %d\n", i,
+            ((uint32_t *) t.result)[i]);
+  fprintf (stream, "static const\n");
+  fprintf (stream, "struct\n");
+  fprintf (stream, "  {\n");
+  fprintf (stream, "    int level1[%zu];\n", t.level1_size);
+  fprintf (stream, "    short level2[%zu << %d];\n", t.level2_size, t.q);
+  fprintf (stream, "    int level3[%zu << %d];\n", t.level3_size, t.p);
+  fprintf (stream, "  }\n");
+  fprintf (stream, "u_mirror =\n");
+  fprintf (stream, "{\n");
+  fprintf (stream, "  {");
+  if (t.level1_size > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level1_size; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      offset = ((uint32_t *) (t.result + level1_offset))[i];
+      if (offset == 0)
+       fprintf (stream, " %5d", -1);
+      else
+       fprintf (stream, " %5zd",
+                (offset - level2_offset) / sizeof (uint32_t));
+      if (i+1 < t.level1_size)
+       fprintf (stream, ",");
+    }
+  if (t.level1_size > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " },\n");
+  fprintf (stream, "  {");
+  if (t.level2_size << t.q > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level2_size << t.q; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      offset = ((uint32_t *) (t.result + level2_offset))[i];
+      if (offset == 0)
+       fprintf (stream, " %5d", -1);
+      else
+       fprintf (stream, " %5zd",
+                (offset - level3_offset) / sizeof (int32_t));
+      if (i+1 < t.level2_size << t.q)
+       fprintf (stream, ",");
+    }
+  if (t.level2_size << t.q > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " },\n");
+  fprintf (stream, "  {");
+  if (t.level3_size << t.p > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level3_size << t.p; i++)
+    {
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      fprintf (stream, " %5d", ((int32_t *) (t.result + level3_offset))[i]);
+      if (i+1 < t.level3_size << t.p)
+       fprintf (stream, ",");
+    }
+  if (t.level3_size << t.p > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " }\n");
+  fprintf (stream, "};\n");
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+/* ========================================================================= */
+
+/* Properties.  */
+
+/* Reading PropList.txt and DerivedCoreProperties.txt.  */
+enum
+{
+  /* PropList.txt */
+  PROP_WHITE_SPACE,
+  PROP_BIDI_CONTROL,
+  PROP_JOIN_CONTROL,
+  PROP_DASH,
+  PROP_HYPHEN,
+  PROP_QUOTATION_MARK,
+  PROP_TERMINAL_PUNCTUATION,
+  PROP_OTHER_MATH,
+  PROP_HEX_DIGIT,
+  PROP_ASCII_HEX_DIGIT,
+  PROP_OTHER_ALPHABETIC,
+  PROP_IDEOGRAPHIC,
+  PROP_DIACRITIC,
+  PROP_EXTENDER,
+  PROP_OTHER_LOWERCASE,
+  PROP_OTHER_UPPERCASE,
+  PROP_NONCHARACTER_CODE_POINT,
+  PROP_OTHER_GRAPHEME_EXTEND,
+  PROP_IDS_BINARY_OPERATOR,
+  PROP_IDS_TRINARY_OPERATOR,
+  PROP_RADICAL,
+  PROP_UNIFIED_IDEOGRAPH,
+  PROP_OTHER_DEFAULT_IGNORABLE_CODE_POINT,
+  PROP_DEPRECATED,
+  PROP_SOFT_DOTTED,
+  PROP_LOGICAL_ORDER_EXCEPTION,
+  PROP_OTHER_ID_START,
+  PROP_OTHER_ID_CONTINUE,
+  PROP_STERM,
+  PROP_VARIATION_SELECTOR,
+  PROP_PATTERN_WHITE_SPACE,
+  PROP_PATTERN_SYNTAX,
+  /* DerivedCoreProperties.txt */
+  PROP_MATH,
+  PROP_ALPHABETIC,
+  PROP_LOWERCASE,
+  PROP_UPPERCASE,
+  PROP_ID_START,
+  PROP_ID_CONTINUE,
+  PROP_XID_START,
+  PROP_XID_CONTINUE,
+  PROP_DEFAULT_IGNORABLE_CODE_POINT,
+  PROP_GRAPHEME_EXTEND,
+  PROP_GRAPHEME_BASE,
+  PROP_GRAPHEME_LINK
+};
+unsigned long long unicode_properties[0x110000];
+
+static void
+clear_properties (void)
+{
+  unsigned int i;
+
+  for (i = 0; i < 0x110000; i++)
+    unicode_properties[i] = 0;
+}
+
+/* Stores in unicode_properties[] the properties from the
+   PropList.txt or DerivedCoreProperties.txt file.  */
+static void
+fill_properties (const char *proplist_filename)
+{
+  unsigned int i;
+  FILE *stream;
+
+  stream = fopen (proplist_filename, "r");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "error during fopen of '%s'\n", proplist_filename);
+      exit (1);
+    }
+
+  for (;;)
+    {
+      char buf[200+1];
+      unsigned int i1, i2;
+      char padding[200+1];
+      char propname[200+1];
+      unsigned int propvalue;
+
+      if (fscanf (stream, "%200[^\n]\n", buf) < 1)
+       break;
+
+      if (buf[0] == '\0' || buf[0] == '#')
+       continue;
+
+      if (sscanf (buf, "%X..%X%[ ;]%[^ ]", &i1, &i2, padding, propname) != 4)
+       {
+         if (sscanf (buf, "%X%[ ;]%[^ ]", &i1, padding, propname) != 3)
+           {
+             fprintf (stderr, "parse error in '%s'\n", proplist_filename);
+             exit (1);
+           }
+         i2 = i1;
+       }
+#define PROP(name,value) \
+      if (strcmp (propname, name) == 0) propvalue = value; else
+      /* PropList.txt */
+      PROP ("White_Space", PROP_WHITE_SPACE)
+      PROP ("Bidi_Control", PROP_BIDI_CONTROL)
+      PROP ("Join_Control", PROP_JOIN_CONTROL)
+      PROP ("Dash", PROP_DASH)
+      PROP ("Hyphen", PROP_HYPHEN)
+      PROP ("Quotation_Mark", PROP_QUOTATION_MARK)
+      PROP ("Terminal_Punctuation", PROP_TERMINAL_PUNCTUATION)
+      PROP ("Other_Math", PROP_OTHER_MATH)
+      PROP ("Hex_Digit", PROP_HEX_DIGIT)
+      PROP ("ASCII_Hex_Digit", PROP_ASCII_HEX_DIGIT)
+      PROP ("Other_Alphabetic", PROP_OTHER_ALPHABETIC)
+      PROP ("Ideographic", PROP_IDEOGRAPHIC)
+      PROP ("Diacritic", PROP_DIACRITIC)
+      PROP ("Extender", PROP_EXTENDER)
+      PROP ("Other_Lowercase", PROP_OTHER_LOWERCASE)
+      PROP ("Other_Uppercase", PROP_OTHER_UPPERCASE)
+      PROP ("Noncharacter_Code_Point", PROP_NONCHARACTER_CODE_POINT)
+      PROP ("Other_Grapheme_Extend", PROP_OTHER_GRAPHEME_EXTEND)
+      PROP ("IDS_Binary_Operator", PROP_IDS_BINARY_OPERATOR)
+      PROP ("IDS_Trinary_Operator", PROP_IDS_TRINARY_OPERATOR)
+      PROP ("Radical", PROP_RADICAL)
+      PROP ("Unified_Ideograph", PROP_UNIFIED_IDEOGRAPH)
+      PROP ("Other_Default_Ignorable_Code_Point", PROP_OTHER_DEFAULT_IGNORABLE_CODE_POINT)
+      PROP ("Deprecated", PROP_DEPRECATED)
+      PROP ("Soft_Dotted", PROP_SOFT_DOTTED)
+      PROP ("Logical_Order_Exception", PROP_LOGICAL_ORDER_EXCEPTION)
+      PROP ("Other_ID_Start", PROP_OTHER_ID_START)
+      PROP ("Other_ID_Continue", PROP_OTHER_ID_CONTINUE)
+      PROP ("STerm", PROP_STERM)
+      PROP ("Variation_Selector", PROP_VARIATION_SELECTOR)
+      PROP ("Pattern_White_Space", PROP_PATTERN_WHITE_SPACE)
+      PROP ("Pattern_Syntax", PROP_PATTERN_SYNTAX)
+      /* DerivedCoreProperties.txt */
+      PROP ("Math", PROP_MATH)
+      PROP ("Alphabetic", PROP_ALPHABETIC)
+      PROP ("Lowercase", PROP_LOWERCASE)
+      PROP ("Uppercase", PROP_UPPERCASE)
+      PROP ("ID_Start", PROP_ID_START)
+      PROP ("ID_Continue", PROP_ID_CONTINUE)
+      PROP ("XID_Start", PROP_XID_START)
+      PROP ("XID_Continue", PROP_XID_CONTINUE)
+      PROP ("Default_Ignorable_Code_Point", PROP_DEFAULT_IGNORABLE_CODE_POINT)
+      PROP ("Grapheme_Extend", PROP_GRAPHEME_EXTEND)
+      PROP ("Grapheme_Base", PROP_GRAPHEME_BASE)
+      PROP ("Grapheme_Link", PROP_GRAPHEME_LINK)
+#undef PROP
+       {
+         fprintf (stderr, "unknown property named '%s' in '%s'\n", propname,
+                  proplist_filename);
+         exit (1);
+       }
+      if (!(i1 <= i2 && i2 < 0x110000))
+       abort ();
+
+      for (i = i1; i <= i2; i++)
+       unicode_properties[i] |= 1ULL << propvalue;
+    }
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error reading from '%s'\n", proplist_filename);
+      exit (1);
+    }
+}
+
+/* Stores in array the given property from the Unicode 3.0 PropList.txt
+   file.  */
+static void
+fill_property30 (char array[0x110000], const char *proplist_filename, const char *property_name)
+{
+  unsigned int i;
+  FILE *stream;
+  char buf[100+1];
+
+  for (i = 0; i < 0x110000; i++)
+    array[i] = 0;
+
+  stream = fopen (proplist_filename, "r");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "error during fopen of '%s'\n", proplist_filename);
+      exit (1);
+    }
+
+  /* Search for the "Property dump for: ..." line.  */
+  do
+    {
+      if (fscanf (stream, "%100[^\n]\n", buf) < 1)
+       {
+         fprintf (stderr, "no property found in '%s'\n", proplist_filename);
+         exit (1);
+       }
+    }
+  while (strstr (buf, property_name) == NULL);
+
+  for (;;)
+    {
+      unsigned int i1, i2;
+
+      if (fscanf (stream, "%100[^\n]\n", buf) < 1)
+       break;
+      if (buf[0] == '*')
+       break;
+      if (strlen (buf) >= 10 && buf[4] == '.' && buf[5] == '.')
+       {
+         if (sscanf (buf, "%4X..%4X", &i1, &i2) < 2)
+           {
+             fprintf (stderr, "parse error in property in '%s'\n",
+                      proplist_filename);
+             exit (1);
+           }
+       }
+      else if (strlen (buf) >= 4)
+       {
+         if (sscanf (buf, "%4X", &i1) < 1)
+           {
+             fprintf (stderr, "parse error in property in '%s'\n",
+                      proplist_filename);
+             exit (1);
+           }
+         i2 = i1;
+       }
+      else
+       {
+         fprintf (stderr, "parse error in property in '%s'\n",
+                  proplist_filename);
+         exit (1);
+       }
+      if (!(i1 <= i2 && i2 < 0x110000))
+       abort ();
+      for (i = i1; i <= i2; i++)
+       array[i] = 1;
+    }
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error reading from '%s'\n", proplist_filename);
+      exit (1);
+    }
+}
+
+/* Properties from Unicode 3.0 PropList.txt file.  */
+
+/* The paired punctuation property from the PropList.txt file.  */
+char unicode_pairedpunctuation[0x110000];
+
+/* The left of pair property from the PropList.txt file.  */
+char unicode_leftofpair[0x110000];
+
+static void
+fill_properties30 (const char *proplist30_filename)
+{
+  fill_property30 (unicode_pairedpunctuation, proplist30_filename, "(Paired Punctuation)");
+  fill_property30 (unicode_leftofpair, proplist30_filename, "(Left of Pair)");
+}
+
+/* ------------------------------------------------------------------------- */
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_white_space (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_WHITE_SPACE)) != 0);
+}
+
+/* See Unicode 3.0 book, section 4.10,
+       PropList.txt, UCD.html,
+       DerivedCoreProperties.txt, UCD.html.  */
+static bool
+is_property_alphabetic (unsigned int ch)
+{
+  bool result1 =
+    is_category_L (ch)
+    || ((unicode_properties[ch] & (1ULL << PROP_OTHER_ALPHABETIC)) != 0)
+    /* For some reason, the following are listed as having property
+       Alphabetic but not as having property Other_Alphabetic.  */
+    || (ch >= 0x16EE && ch <= 0x16F0) /* RUNIC SYMBOLS */
+    || (ch >= 0x2160 && ch <= 0x2182) /* ROMAN NUMERALS */
+    || (ch >= 0x24D0 && ch <= 0x24E9) /* CIRCLED LATIN SMALL LETTER */
+    || (ch == 0x3007) /* IDEOGRAPHIC NUMBER ZERO */
+    || (ch >= 0x3021 && ch <= 0x3029) /* HANGZHOU NUMERAL */
+    || (ch >= 0x3038 && ch <= 0x303A) /* HANGZHOU NUMERAL */
+    || (ch >= 0x10140 && ch <= 0x10174) /* GREEK ACROPHONICS */
+    || (ch == 0x10341) /* GOTHIC LETTER NINETY */
+    || (ch == 0x1034A) /* GOTHIC LETTER NINE HUNDRED */
+    || (ch >= 0x103D1 && ch <= 0x103D5) /* OLD PERSIAN NUMBERS */
+    || (ch >= 0x12400 && ch <= 0x12462); /* CUNEIFORM NUMERIC SIGNS */
+  bool result2 =
+    ((unicode_properties[ch] & (1ULL << PROP_ALPHABETIC)) != 0);
+
+  if (result1 != result2)
+    abort ();
+  return result1;
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_other_alphabetic (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_OTHER_ALPHABETIC)) != 0);
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_not_a_character (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_NONCHARACTER_CODE_POINT)) != 0);
+}
+
+/* See PropList.txt, UCD.html,
+       DerivedCoreProperties.txt, UCD.html.  */
+static bool
+is_property_default_ignorable_code_point (unsigned int ch)
+{
+  bool result1 =
+    (is_category_Cf (ch)
+     && !(ch >= 0xFFF9 && ch <= 0xFFFB)) /* Annotations */
+    || ((is_category_Cc (ch) || is_category_Cs (ch))
+       && !is_property_white_space (ch))
+    || ((unicode_properties[ch] & (1ULL << PROP_OTHER_DEFAULT_IGNORABLE_CODE_POINT)) != 0)
+    || ((unicode_properties[ch] & (1ULL << PROP_VARIATION_SELECTOR)) != 0)
+    || is_property_not_a_character (ch);
+  bool result2 =
+    ((unicode_properties[ch] & (1ULL << PROP_DEFAULT_IGNORABLE_CODE_POINT)) != 0);
+
+  if (result1 != result2)
+    abort ();
+  return result1;
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_other_default_ignorable_code_point (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_OTHER_DEFAULT_IGNORABLE_CODE_POINT)) != 0);
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_deprecated (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_DEPRECATED)) != 0);
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_logical_order_exception (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_LOGICAL_ORDER_EXCEPTION)) != 0);
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_variation_selector (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_VARIATION_SELECTOR)) != 0);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_private_use (unsigned int ch)
+{
+  /* Determined through "grep 'Private Use,' UnicodeData-3.1.0.txt".  */
+  return (ch >= 0xE000 && ch <= 0xF8FF)
+        || (ch >= 0xF0000 && ch <= 0xFFFFD)
+        || (ch >= 0x100000 && ch <= 0x10FFFD);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_unassigned_code_value (unsigned int ch)
+{
+  return (is_category_Cn (ch) && !is_property_not_a_character (ch));
+}
+
+/* See PropList.txt, UCD.html,
+       DerivedCoreProperties.txt, UCD.html.  */
+static bool
+is_property_uppercase (unsigned int ch)
+{
+  bool result1 =
+    is_category_Lu (ch)
+    || ((unicode_properties[ch] & (1ULL << PROP_OTHER_UPPERCASE)) != 0);
+  bool result2 =
+    ((unicode_properties[ch] & (1ULL << PROP_UPPERCASE)) != 0);
+
+  if (result1 != result2)
+    abort ();
+  return result1;
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_other_uppercase (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_OTHER_UPPERCASE)) != 0);
+}
+
+/* See PropList.txt, UCD.html,
+       DerivedCoreProperties.txt, UCD.html.  */
+static bool
+is_property_lowercase (unsigned int ch)
+{
+  bool result1 =
+    is_category_Ll (ch)
+    || ((unicode_properties[ch] & (1ULL << PROP_OTHER_LOWERCASE)) != 0);
+  bool result2 =
+    ((unicode_properties[ch] & (1ULL << PROP_LOWERCASE)) != 0);
+
+  if (result1 != result2)
+    abort ();
+  return result1;
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_other_lowercase (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_OTHER_LOWERCASE)) != 0);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_titlecase (unsigned int ch)
+{
+  return is_category_Lt (ch);
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_soft_dotted (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_SOFT_DOTTED)) != 0);
+}
+
+/* See DerivedCoreProperties.txt, UCD.html.  */
+static bool
+is_property_id_start (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_ID_START)) != 0);
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_other_id_start (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_OTHER_ID_START)) != 0);
+}
+
+/* See DerivedCoreProperties.txt, UCD.html.  */
+static bool
+is_property_id_continue (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_ID_CONTINUE)) != 0);
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_other_id_continue (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_OTHER_ID_CONTINUE)) != 0);
+}
+
+/* See DerivedCoreProperties.txt, UCD.html.  */
+static bool
+is_property_xid_start (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_XID_START)) != 0);
+}
+
+/* See DerivedCoreProperties.txt, UCD.html.  */
+static bool
+is_property_xid_continue (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_XID_CONTINUE)) != 0);
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_pattern_white_space (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_PATTERN_WHITE_SPACE)) != 0);
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_pattern_syntax (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_PATTERN_SYNTAX)) != 0);
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_join_control (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_JOIN_CONTROL)) != 0);
+}
+
+/* See DerivedCoreProperties.txt, UCD.html.  */
+static bool
+is_property_grapheme_base (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_GRAPHEME_BASE)) != 0);
+}
+
+/* See DerivedCoreProperties.txt, UCD.html.  */
+static bool
+is_property_grapheme_extend (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_GRAPHEME_EXTEND)) != 0);
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_other_grapheme_extend (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_OTHER_GRAPHEME_EXTEND)) != 0);
+}
+
+/* See DerivedCoreProperties.txt, UCD.html.  */
+static bool
+is_property_grapheme_link (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_GRAPHEME_LINK)) != 0);
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_bidi_control (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_BIDI_CONTROL)) != 0);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_bidi_left_to_right (unsigned int ch)
+{
+  return (get_bidi_category (ch) == UC_BIDI_L);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_bidi_hebrew_right_to_left (unsigned int ch)
+{
+  return (get_bidi_category (ch) == UC_BIDI_R);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_bidi_arabic_right_to_left (unsigned int ch)
+{
+  return (get_bidi_category (ch) == UC_BIDI_AL);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_bidi_european_digit (unsigned int ch)
+{
+  return (get_bidi_category (ch) == UC_BIDI_EN);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_bidi_eur_num_separator (unsigned int ch)
+{
+  return (get_bidi_category (ch) == UC_BIDI_ES);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_bidi_eur_num_terminator (unsigned int ch)
+{
+  return (get_bidi_category (ch) == UC_BIDI_ET);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_bidi_arabic_digit (unsigned int ch)
+{
+  return (get_bidi_category (ch) == UC_BIDI_AN);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_bidi_common_separator (unsigned int ch)
+{
+  return (get_bidi_category (ch) == UC_BIDI_CS);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_bidi_block_separator (unsigned int ch)
+{
+  return (get_bidi_category (ch) == UC_BIDI_B);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_bidi_segment_separator (unsigned int ch)
+{
+  return (get_bidi_category (ch) == UC_BIDI_S);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_bidi_whitespace (unsigned int ch)
+{
+  return (get_bidi_category (ch) == UC_BIDI_WS);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_bidi_non_spacing_mark (unsigned int ch)
+{
+  return (get_bidi_category (ch) == UC_BIDI_NSM);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_bidi_boundary_neutral (unsigned int ch)
+{
+  return (get_bidi_category (ch) == UC_BIDI_BN);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_bidi_pdf (unsigned int ch)
+{
+  return (get_bidi_category (ch) == UC_BIDI_PDF);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_bidi_embedding_or_override (unsigned int ch)
+{
+  int category = get_bidi_category (ch);
+  return (category == UC_BIDI_LRE || category == UC_BIDI_LRO
+         || category == UC_BIDI_RLE || category == UC_BIDI_RLO);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_bidi_other_neutral (unsigned int ch)
+{
+  return (get_bidi_category (ch) == UC_BIDI_ON);
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_hex_digit (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_HEX_DIGIT)) != 0);
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_ascii_hex_digit (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_ASCII_HEX_DIGIT)) != 0);
+}
+
+/* See Unicode 3.0 book, section 4.10,
+       PropList.txt, UCD.html.  */
+static bool
+is_property_ideographic (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_IDEOGRAPHIC)) != 0);
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_unified_ideograph (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_UNIFIED_IDEOGRAPH)) != 0);
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_radical (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_RADICAL)) != 0);
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_ids_binary_operator (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_IDS_BINARY_OPERATOR)) != 0);
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_ids_trinary_operator (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_IDS_TRINARY_OPERATOR)) != 0);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_zero_width (unsigned int ch)
+{
+  return is_category_Cf (ch)
+        || (unicode_attributes[ch].name != NULL
+            && strstr (unicode_attributes[ch].name, "ZERO WIDTH") != NULL);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_space (unsigned int ch)
+{
+  return is_category_Zs (ch);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_non_break (unsigned int ch)
+{
+  /* This is exactly the set of characters having line breaking
+     property GL.  */
+  return (ch == 0x00A0 /* NO-BREAK SPACE */
+         || ch == 0x034F /* COMBINING GRAPHEME JOINER */
+         || ch == 0x035C /* COMBINING DOUBLE BREVE BELOW */
+         || ch == 0x035D /* COMBINING DOUBLE BREVE */
+         || ch == 0x035E /* COMBINING DOUBLE MACRON */
+         || ch == 0x035F /* COMBINING DOUBLE MACRON BELOW */
+         || ch == 0x0360 /* COMBINING DOUBLE TILDE */
+         || ch == 0x0361 /* COMBINING DOUBLE INVERTED BREVE */
+         || ch == 0x0362 /* COMBINING DOUBLE RIGHTWARDS ARROW BELOW */
+         || ch == 0x0F08 /* TIBETAN MARK SBRUL SHAD */
+         || ch == 0x0F0C /* TIBETAN MARK DELIMITER TSHEG BSTAR */
+         || ch == 0x0F12 /* TIBETAN MARK RGYA GRAM SHAD */
+         || ch == 0x180E /* MONGOLIAN VOWEL SEPARATOR */
+         || ch == 0x2007 /* FIGURE SPACE */
+         || ch == 0x2011 /* NON-BREAKING HYPHEN */
+         || ch == 0x202F /* NARROW NO-BREAK SPACE */);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_iso_control (unsigned int ch)
+{
+  bool result1 =
+    (unicode_attributes[ch].name != NULL
+     && strcmp (unicode_attributes[ch].name, "<control>") == 0);
+  bool result2 =
+    is_category_Cc (ch);
+
+  if (result1 != result2)
+    abort ();
+  return result1;
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_format_control (unsigned int ch)
+{
+  return (is_category_Cf (ch)
+         && get_bidi_category (ch) == UC_BIDI_BN
+         && !is_property_join_control (ch)
+         && ch != 0xFEFF);
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_dash (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_DASH)) != 0);
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_hyphen (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_HYPHEN)) != 0);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_punctuation (unsigned int ch)
+{
+  return is_category_P (ch);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_line_separator (unsigned int ch)
+{
+  return is_category_Zl (ch);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_paragraph_separator (unsigned int ch)
+{
+  return is_category_Zp (ch);
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_quotation_mark (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_QUOTATION_MARK)) != 0);
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_sentence_terminal (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_STERM)) != 0);
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_terminal_punctuation (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_TERMINAL_PUNCTUATION)) != 0);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_currency_symbol (unsigned int ch)
+{
+  return is_category_Sc (ch);
+}
+
+/* See Unicode 3.0 book, section 4.9,
+       PropList.txt, UCD.html,
+       DerivedCoreProperties.txt, UCD.html.  */
+static bool
+is_property_math (unsigned int ch)
+{
+  bool result1 =
+    is_category_Sm (ch)
+    || ((unicode_properties[ch] & (1ULL << PROP_OTHER_MATH)) != 0);
+  bool result2 =
+    ((unicode_properties[ch] & (1ULL << PROP_MATH)) != 0);
+
+  if (result1 != result2)
+    abort ();
+  return result1;
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_other_math (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_OTHER_MATH)) != 0);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_paired_punctuation (unsigned int ch)
+{
+  return unicode_pairedpunctuation[ch];
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_left_of_pair (unsigned int ch)
+{
+  return unicode_leftofpair[ch];
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_combining (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && (strcmp (unicode_attributes[ch].combining, "0") != 0
+             || is_category_Mc (ch)
+             || is_category_Me (ch)
+             || is_category_Mn (ch)));
+}
+
+#if 0 /* same as is_property_bidi_non_spacing_mark */
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_non_spacing (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && get_bidi_category (ch) == UC_BIDI_NSM);
+}
+#endif
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_composite (unsigned int ch)
+{
+  /* This definition differs from the one in PropList-3.0.1.txt, but is more
+     logical in some sense.  */
+  if (ch >= 0xAC00 && ch <= 0xD7A4) /* Hangul Syllables */
+    return true;
+  if (unicode_attributes[ch].name != NULL
+      && unicode_attributes[ch].decomposition != NULL)
+    {
+      /* Test whether the decomposition contains more than one character,
+        and the first is not a space.  */
+      const char *decomp = unicode_attributes[ch].decomposition;
+      if (decomp[0] == '<')
+       {
+         decomp = strchr (decomp, '>') + 1;
+         if (decomp[0] == ' ')
+           decomp++;
+       }
+      return strchr (decomp, ' ') != NULL && strncmp (decomp, "0020 ", 5) != 0;
+    }
+  return false;
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_decimal_digit (unsigned int ch)
+{
+  return is_category_Nd (ch);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_numeric (unsigned int ch)
+{
+  return ((get_numeric_value (ch)).denominator > 0)
+        || (ch == 0x09F8) /* BENGALI CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR */
+        || (ch == 0x2183); /* ROMAN NUMERAL REVERSED ONE HUNDRED */
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_diacritic (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_DIACRITIC)) != 0);
+}
+
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_extender (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << PROP_EXTENDER)) != 0);
+}
+
+/* See PropList-3.0.1.txt.  */
+static bool
+is_property_ignorable_control (unsigned int ch)
+{
+  return ((is_category_Cc (ch) && get_bidi_category (ch) == UC_BIDI_BN)
+         || is_category_Cf (ch))
+        && ch != 0x0000;
+}
+
+/* ------------------------------------------------------------------------- */
+
+/* Output all properties.  */
+static void
+output_properties (const char *version)
+{
+#define PROPERTY(P) \
+  debug_output_predicate ("unictype/pr_" #P ".txt", is_property_ ## P); \
+  output_predicate_test ("../tests/unictype/test-pr_" #P ".c", is_property_ ## P, "uc_is_property_" #P " (c)"); \
+  output_predicate ("unictype/pr_" #P ".h", is_property_ ## P, "u_property_" #P, "Properties", version);
+  PROPERTY(white_space)
+  PROPERTY(alphabetic)
+  PROPERTY(other_alphabetic)
+  PROPERTY(not_a_character)
+  PROPERTY(default_ignorable_code_point)
+  PROPERTY(other_default_ignorable_code_point)
+  PROPERTY(deprecated)
+  PROPERTY(logical_order_exception)
+  PROPERTY(variation_selector)
+  PROPERTY(private_use)
+  PROPERTY(unassigned_code_value)
+  PROPERTY(uppercase)
+  PROPERTY(other_uppercase)
+  PROPERTY(lowercase)
+  PROPERTY(other_lowercase)
+  PROPERTY(titlecase)
+  PROPERTY(soft_dotted)
+  PROPERTY(id_start)
+  PROPERTY(other_id_start)
+  PROPERTY(id_continue)
+  PROPERTY(other_id_continue)
+  PROPERTY(xid_start)
+  PROPERTY(xid_continue)
+  PROPERTY(pattern_white_space)
+  PROPERTY(pattern_syntax)
+  PROPERTY(join_control)
+  PROPERTY(grapheme_base)
+  PROPERTY(grapheme_extend)
+  PROPERTY(other_grapheme_extend)
+  PROPERTY(grapheme_link)
+  PROPERTY(bidi_control)
+  PROPERTY(bidi_left_to_right)
+  PROPERTY(bidi_hebrew_right_to_left)
+  PROPERTY(bidi_arabic_right_to_left)
+  PROPERTY(bidi_european_digit)
+  PROPERTY(bidi_eur_num_separator)
+  PROPERTY(bidi_eur_num_terminator)
+  PROPERTY(bidi_arabic_digit)
+  PROPERTY(bidi_common_separator)
+  PROPERTY(bidi_block_separator)
+  PROPERTY(bidi_segment_separator)
+  PROPERTY(bidi_whitespace)
+  PROPERTY(bidi_non_spacing_mark)
+  PROPERTY(bidi_boundary_neutral)
+  PROPERTY(bidi_pdf)
+  PROPERTY(bidi_embedding_or_override)
+  PROPERTY(bidi_other_neutral)
+  PROPERTY(hex_digit)
+  PROPERTY(ascii_hex_digit)
+  PROPERTY(ideographic)
+  PROPERTY(unified_ideograph)
+  PROPERTY(radical)
+  PROPERTY(ids_binary_operator)
+  PROPERTY(ids_trinary_operator)
+  PROPERTY(zero_width)
+  PROPERTY(space)
+  PROPERTY(non_break)
+  PROPERTY(iso_control)
+  PROPERTY(format_control)
+  PROPERTY(dash)
+  PROPERTY(hyphen)
+  PROPERTY(punctuation)
+  PROPERTY(line_separator)
+  PROPERTY(paragraph_separator)
+  PROPERTY(quotation_mark)
+  PROPERTY(sentence_terminal)
+  PROPERTY(terminal_punctuation)
+  PROPERTY(currency_symbol)
+  PROPERTY(math)
+  PROPERTY(other_math)
+  PROPERTY(paired_punctuation)
+  PROPERTY(left_of_pair)
+  PROPERTY(combining)
+  PROPERTY(composite)
+  PROPERTY(decimal_digit)
+  PROPERTY(numeric)
+  PROPERTY(diacritic)
+  PROPERTY(extender)
+  PROPERTY(ignorable_control)
+#undef PROPERTY
+}
+
+/* ========================================================================= */
+
+/* Scripts.  */
+
+static const char *scripts[256];
+static unsigned int numscripts;
+
+static uint8_t unicode_scripts[0x110000];
+
+static void
+fill_scripts (const char *scripts_filename)
+{
+  FILE *stream;
+  unsigned int i;
+
+  stream = fopen (scripts_filename, "r");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "error during fopen of '%s'\n", scripts_filename);
+      exit (1);
+    }
+
+  numscripts = 0;
+
+  for (i = 0; i < 0x110000; i++)
+    unicode_scripts[i] = (uint8_t)~(uint8_t)0;
+
+  for (;;)
+    {
+      char buf[200+1];
+      unsigned int i1, i2;
+      char padding[200+1];
+      char scriptname[200+1];
+      int script;
+
+      if (fscanf (stream, "%200[^\n]\n", buf) < 1)
+       break;
+
+      if (buf[0] == '\0' || buf[0] == '#')
+       continue;
+
+      if (sscanf (buf, "%X..%X%[ ;]%[^ ]", &i1, &i2, padding, scriptname) != 4)
+       {
+         if (sscanf (buf, "%X%[ ;]%[^ ]", &i1, padding, scriptname) != 3)
+           {
+             fprintf (stderr, "parse error in '%s'\n", scripts_filename);
+             exit (1);
+           }
+         i2 = i1;
+       }
+      if (i2 < i1)
+       abort ();
+      if (i2 >= 0x110000)
+       abort ();
+
+      for (script = numscripts - 1; script >= 0; script--)
+       if (strcmp (scripts[script], scriptname) == 0)
+         break;
+      if (script < 0)
+       {
+         scripts[numscripts] = strdup (scriptname);
+         script = numscripts;
+         numscripts++;
+         if (numscripts == 256)
+           abort ();
+       }
+
+      for (i = i1; i <= i2; i++)
+       {
+         if (unicode_scripts[i] != (uint8_t)~(uint8_t)0)
+           fprintf (stderr, "0x%04X belongs to multiple scripts\n", i);
+         unicode_scripts[i] = script;
+       }
+    }
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error reading from '%s'\n", scripts_filename);
+      exit (1);
+    }
+}
+
+/* Construction of sparse 3-level tables.  */
+#define TABLE script_table
+#define ELEMENT uint8_t
+#define DEFAULT (uint8_t)~(uint8_t)0
+#define xmalloc malloc
+#define xrealloc realloc
+#include "3level.h"
+
+static void
+output_scripts (const char *version)
+{
+  const char *filename = "unictype/scripts.h";
+  FILE *stream;
+  unsigned int ch, s, i;
+  struct script_table t;
+  unsigned int level1_offset, level2_offset, level3_offset;
+
+  typedef struct
+  {
+    const char *lowercase_name;
+  }
+  scriptinfo_t;
+  scriptinfo_t scriptinfo[256];
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
+  fprintf (stream, "/* Unicode scripts.  */\n");
+  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
+          version);
+
+  for (s = 0; s < numscripts; s++)
+    {
+      char *lcp = strdup (scripts[s]);
+      char *cp;
+
+      for (cp = lcp; *cp != '\0'; cp++)
+       if (*cp >= 'A' && *cp <= 'Z')
+         *cp += 'a' - 'A';
+
+      scriptinfo[s].lowercase_name = lcp;
+    }
+
+  for (s = 0; s < numscripts; s++)
+    {
+      fprintf (stream, "static const uc_interval_t script_%s_intervals[] =\n",
+              scriptinfo[s].lowercase_name);
+      fprintf (stream, "{\n");
+      i = 0;
+      for (ch = 0; ch < 0x110000; ch++)
+       if (unicode_scripts[ch] == s)
+         {
+           unsigned int start;
+           unsigned int end;
+
+           start = ch;
+           while (ch + 1 < 0x110000 && unicode_scripts[ch + 1] == s)
+             ch++;
+           end = ch;
+
+           if (i > 0)
+             fprintf (stream, ",\n");
+           if (start == end)
+             fprintf (stream, "  { 0x%04X, 1, 1 }", start);
+           else
+             fprintf (stream, "  { 0x%04X, 1, 0 }, { 0x%04X, 0, 1 }",
+                      start, end);
+           i++;
+         }
+      fprintf (stream, "\n");
+      fprintf (stream, "};\n");
+    }
+
+  fprintf (stream, "static const uc_script_t scripts[%d] =\n", numscripts);
+  fprintf (stream, "{\n");
+  for (s = 0; s < numscripts; s++)
+    {
+      fprintf (stream, "  {\n");
+      fprintf (stream, "    sizeof (script_%s_intervals) / sizeof (uc_interval_t),\n",
+              scriptinfo[s].lowercase_name);
+      fprintf (stream, "    script_%s_intervals,\n",
+              scriptinfo[s].lowercase_name);
+      fprintf (stream, "    \"%s\"\n", scripts[s]);
+      fprintf (stream, "  }");
+      if (s+1 < numscripts)
+       fprintf (stream, ",");
+      fprintf (stream, "\n");
+    }
+  fprintf (stream, "};\n");
+
+  t.p = 7;
+  t.q = 9;
+  script_table_init (&t);
+
+  for (ch = 0; ch < 0x110000; ch++)
+    {
+      unsigned int s = unicode_scripts[ch];
+      if (s != (uint8_t)~(uint8_t)0)
+       script_table_add (&t, ch, s);
+    }
+
+  script_table_finalize (&t);
+
+  /* Offsets in t.result, in memory of this process.  */
+  level1_offset =
+    5 * sizeof (uint32_t);
+  level2_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t);
+  level3_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t)
+    + (t.level2_size << t.q) * sizeof (uint32_t);
+
+  for (i = 0; i < 5; i++)
+    fprintf (stream, "#define script_header_%d %d\n", i,
+            ((uint32_t *) t.result)[i]);
+  fprintf (stream, "static const\n");
+  fprintf (stream, "struct\n");
+  fprintf (stream, "  {\n");
+  fprintf (stream, "    int level1[%zu];\n", t.level1_size);
+  fprintf (stream, "    short level2[%zu << %d];\n", t.level2_size, t.q);
+  fprintf (stream, "    unsigned char level3[%zu << %d];\n", t.level3_size, t.p);
+  fprintf (stream, "  }\n");
+  fprintf (stream, "u_script =\n");
+  fprintf (stream, "{\n");
+  fprintf (stream, "  {");
+  if (t.level1_size > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level1_size; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      offset = ((uint32_t *) (t.result + level1_offset))[i];
+      if (offset == 0)
+       fprintf (stream, " %5d", -1);
+      else
+       fprintf (stream, " %5zd",
+                (offset - level2_offset) / sizeof (uint32_t));
+      if (i+1 < t.level1_size)
+       fprintf (stream, ",");
+    }
+  if (t.level1_size > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " },\n");
+  fprintf (stream, "  {");
+  if (t.level2_size << t.q > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level2_size << t.q; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      offset = ((uint32_t *) (t.result + level2_offset))[i];
+      if (offset == 0)
+       fprintf (stream, " %5d", -1);
+      else
+       fprintf (stream, " %5zd",
+                (offset - level3_offset) / sizeof (uint8_t));
+      if (i+1 < t.level2_size << t.q)
+       fprintf (stream, ",");
+    }
+  if (t.level2_size << t.q > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " },\n");
+  fprintf (stream, "  {");
+  if (t.level3_size << t.p > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level3_size << t.p; i++)
+    {
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      fprintf (stream, " %3d", ((uint8_t *) (t.result + level3_offset))[i]);
+      if (i+1 < t.level3_size << t.p)
+       fprintf (stream, ",");
+    }
+  if (t.level3_size << t.p > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " }\n");
+  fprintf (stream, "};\n");
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+static void
+output_scripts_byname (const char *version)
+{
+  const char *filename = "unictype/scripts_byname.gperf";
+  FILE *stream;
+  unsigned int s;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
+  fprintf (stream, "/* Unicode scripts.  */\n");
+  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
+          version);
+  fprintf (stream, "struct named_script { const char *name; unsigned int index; };\n");
+  fprintf (stream, "%%struct-type\n");
+  fprintf (stream, "%%language=ANSI-C\n");
+  fprintf (stream, "%%define hash-function-name scripts_hash\n");
+  fprintf (stream, "%%define lookup-function-name uc_script_lookup\n");
+  fprintf (stream, "%%readonly-tables\n");
+  fprintf (stream, "%%global-table\n");
+  fprintf (stream, "%%define word-array-name script_names\n");
+  fprintf (stream, "%%%%\n");
+  for (s = 0; s < numscripts; s++)
+    fprintf (stream, "%s, %u\n", scripts[s], s);
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+/* ========================================================================= */
+
+/* Blocks.  */
+
+typedef struct { unsigned int start; unsigned int end; const char *name; }
+  block_t;
+static block_t blocks[256];
+static unsigned int numblocks;
+
+static void
+fill_blocks (const char *blocks_filename)
+{
+  FILE *stream;
+
+  stream = fopen (blocks_filename, "r");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "error during fopen of '%s'\n", blocks_filename);
+      exit (1);
+    }
+
+  for (;;)
+    {
+      char buf[200+1];
+      unsigned int i1, i2;
+      char padding[200+1];
+      char blockname[200+1];
+
+      if (fscanf (stream, "%200[^\n]\n", buf) < 1)
+       break;
+
+      if (buf[0] == '\0' || buf[0] == '#')
+       continue;
+
+      if (sscanf (buf, "%X..%X%[ ;]%[^\r]", &i1, &i2, padding, blockname) != 4)
+       {
+         fprintf (stderr, "parse error in '%s'\n", blocks_filename);
+         exit (1);
+       }
+      blocks[numblocks].start = i1;
+      blocks[numblocks].end = i2;
+      blocks[numblocks].name = strdup (blockname);
+      /* It must be sorted.  */
+      if (numblocks > 0 && !(blocks[numblocks-1].end < blocks[numblocks].start))
+       abort ();
+      numblocks++;
+      if (numblocks == 256)
+       abort ();
+    }
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error reading from '%s'\n", blocks_filename);
+      exit (1);
+    }
+}
+
+/* Return the smallest block index among the blocks for characters >= ch.  */
+static unsigned int
+block_first_index (unsigned int ch)
+{
+  /* Binary search.  */
+  unsigned int lo = 0;
+  unsigned int hi = numblocks;
+  /* Invariants:
+     All blocks[i], i < lo, have blocks[i].end < ch,
+     all blocks[i], i >= hi, have blocks[i].end >= ch.  */
+  while (lo < hi)
+    {
+      unsigned int mid = (lo + hi) / 2; /* >= lo, < hi */
+      if (blocks[mid].end < ch)
+       lo = mid + 1;
+      else
+       hi = mid;
+    }
+  return hi;
+}
+
+/* Return the largest block index among the blocks for characters <= ch,
+   plus 1.  */
+static unsigned int
+block_last_index (unsigned int ch)
+{
+  /* Binary search.  */
+  unsigned int lo = 0;
+  unsigned int hi = numblocks;
+  /* Invariants:
+     All blocks[i], i < lo, have blocks[i].start <= ch,
+     all blocks[i], i >= hi, have blocks[i].start > ch.  */
+  while (lo < hi)
+    {
+      unsigned int mid = (lo + hi) / 2; /* >= lo, < hi */
+      if (blocks[mid].start <= ch)
+       lo = mid + 1;
+      else
+       hi = mid;
+    }
+  return hi;
+}
+
+static void
+output_blocks (const char *version)
+{
+  const char *filename = "unictype/blocks.h";
+  const unsigned int shift = 8; /* bits to shift away for array access */
+  const unsigned int threshold = 0x30000; /* cut-off table here to save space */
+  FILE *stream;
+  unsigned int i;
+  unsigned int i1;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
+  fprintf (stream, "/* Unicode blocks.  */\n");
+  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
+          version);
+
+  fprintf (stream, "static const uc_block_t blocks[] =\n");
+  fprintf (stream, "{\n");
+  for (i = 0; i < numblocks; i++)
+    {
+      fprintf (stream, "  { 0x%04X, 0x%04X, \"%s\" }", blocks[i].start,
+              blocks[i].end, blocks[i].name);
+      if (i+1 < numblocks)
+       fprintf (stream, ",");
+      fprintf (stream, "\n");
+    }
+  fprintf (stream, "};\n");
+  fprintf (stream, "#define blocks_level1_shift %d\n", shift);
+  fprintf (stream, "#define blocks_level1_threshold 0x%04X\n", threshold);
+  fprintf (stream, "static const uint8_t blocks_level1[%d * 2] =\n",
+          threshold >> shift);
+  fprintf (stream, "{\n");
+  for (i1 = 0; i1 < (threshold >> shift); i1++)
+    {
+      unsigned int first_index = block_first_index (i1 << shift);
+      unsigned int last_index = block_last_index (((i1 + 1) << shift) - 1);
+      fprintf (stream, "  %3d, %3d", first_index, last_index);
+      if (i1+1 < (threshold >> shift))
+       fprintf (stream, ",");
+      fprintf (stream, "\n");
+    }
+  fprintf (stream, "};\n");
+  fprintf (stream, "#define blocks_upper_first_index %d\n",
+          block_first_index (threshold));
+  fprintf (stream, "#define blocks_upper_last_index %d\n",
+          block_last_index (0x10FFFF));
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+/* ========================================================================= */
+
+/* C and Java syntax.  */
+
+enum
+{
+  UC_IDENTIFIER_START,    /* valid as first or subsequent character */
+  UC_IDENTIFIER_VALID,    /* valid as subsequent character only */
+  UC_IDENTIFIER_INVALID,  /* not valid */
+  UC_IDENTIFIER_IGNORABLE /* ignorable (Java only) */
+};
+
+/* ISO C 99 section 6.4.(3).  */
+static bool
+is_c_whitespace (unsigned int ch)
+{
+  return (ch == ' ' /* space */
+         || ch == '\t' /* horizontal tab */
+         || ch == '\n' || ch == '\r' /* new-line */
+         || ch == '\v' /* vertical tab */
+         || ch == '\f'); /* form-feed */
+}
+
+/* ISO C 99 section 6.4.2.1 and appendix D.  */
+static int
+c_ident_category (unsigned int ch)
+{
+  /* Section 6.4.2.1.  */
+  if (ch >= '0' && ch <= '9')
+    return UC_IDENTIFIER_VALID;
+  if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '_')
+    return UC_IDENTIFIER_START;
+  /* Appendix D.  */
+  if (0
+      /* Latin */
+      || (ch == 0x00AA)
+      || (ch == 0x00BA)
+      || (ch >= 0x00C0 && ch <= 0x00D6)
+      || (ch >= 0x00D8 && ch <= 0x00F6)
+      || (ch >= 0x00F8 && ch <= 0x01F5)
+      || (ch >= 0x01FA && ch <= 0x0217)
+      || (ch >= 0x0250 && ch <= 0x02A8)
+      || (ch >= 0x1E00 && ch <= 0x1E9B)
+      || (ch >= 0x1EA0 && ch <= 0x1EF9)
+      || (ch == 0x207F)
+      /* Greek */
+      || (ch == 0x0386)
+      || (ch >= 0x0388 && ch <= 0x038A)
+      || (ch == 0x038C)
+      || (ch >= 0x038E && ch <= 0x03A1)
+      || (ch >= 0x03A3 && ch <= 0x03CE)
+      || (ch >= 0x03D0 && ch <= 0x03D6)
+      || (ch == 0x03DA)
+      || (ch == 0x03DC)
+      || (ch == 0x03DE)
+      || (ch == 0x03E0)
+      || (ch >= 0x03E2 && ch <= 0x03F3)
+      || (ch >= 0x1F00 && ch <= 0x1F15)
+      || (ch >= 0x1F18 && ch <= 0x1F1D)
+      || (ch >= 0x1F20 && ch <= 0x1F45)
+      || (ch >= 0x1F48 && ch <= 0x1F4D)
+      || (ch >= 0x1F50 && ch <= 0x1F57)
+      || (ch == 0x1F59)
+      || (ch == 0x1F5B)
+      || (ch == 0x1F5D)
+      || (ch >= 0x1F5F && ch <= 0x1F7D)
+      || (ch >= 0x1F80 && ch <= 0x1FB4)
+      || (ch >= 0x1FB6 && ch <= 0x1FBC)
+      || (ch >= 0x1FC2 && ch <= 0x1FC4)
+      || (ch >= 0x1FC6 && ch <= 0x1FCC)
+      || (ch >= 0x1FD0 && ch <= 0x1FD3)
+      || (ch >= 0x1FD6 && ch <= 0x1FDB)
+      || (ch >= 0x1FE0 && ch <= 0x1FEC)
+      || (ch >= 0x1FF2 && ch <= 0x1FF4)
+      || (ch >= 0x1FF6 && ch <= 0x1FFC)
+      /* Cyrillic */
+      || (ch >= 0x0401 && ch <= 0x040C)
+      || (ch >= 0x040E && ch <= 0x044F)
+      || (ch >= 0x0451 && ch <= 0x045C)
+      || (ch >= 0x045E && ch <= 0x0481)
+      || (ch >= 0x0490 && ch <= 0x04C4)
+      || (ch >= 0x04C7 && ch <= 0x04C8)
+      || (ch >= 0x04CB && ch <= 0x04CC)
+      || (ch >= 0x04D0 && ch <= 0x04EB)
+      || (ch >= 0x04EE && ch <= 0x04F5)
+      || (ch >= 0x04F8 && ch <= 0x04F9)
+      /* Armenian */
+      || (ch >= 0x0531 && ch <= 0x0556)
+      || (ch >= 0x0561 && ch <= 0x0587)
+      /* Hebrew */
+      || (ch >= 0x05B0 && ch <= 0x05B9)
+      || (ch >= 0x05BB && ch <= 0x05BD)
+      || (ch == 0x05BF)
+      || (ch >= 0x05C1 && ch <= 0x05C2)
+      || (ch >= 0x05D0 && ch <= 0x05EA)
+      || (ch >= 0x05F0 && ch <= 0x05F2)
+      /* Arabic */
+      || (ch >= 0x0621 && ch <= 0x063A)
+      || (ch >= 0x0640 && ch <= 0x0652)
+      || (ch >= 0x0670 && ch <= 0x06B7)
+      || (ch >= 0x06BA && ch <= 0x06BE)
+      || (ch >= 0x06C0 && ch <= 0x06CE)
+      || (ch >= 0x06D0 && ch <= 0x06DC)
+      || (ch >= 0x06E5 && ch <= 0x06E8)
+      || (ch >= 0x06EA && ch <= 0x06ED)
+      /* Devanagari */
+      || (ch >= 0x0901 && ch <= 0x0903)
+      || (ch >= 0x0905 && ch <= 0x0939)
+      || (ch >= 0x093E && ch <= 0x094D)
+      || (ch >= 0x0950 && ch <= 0x0952)
+      || (ch >= 0x0958 && ch <= 0x0963)
+      /* Bengali */
+      || (ch >= 0x0981 && ch <= 0x0983)
+      || (ch >= 0x0985 && ch <= 0x098C)
+      || (ch >= 0x098F && ch <= 0x0990)
+      || (ch >= 0x0993 && ch <= 0x09A8)
+      || (ch >= 0x09AA && ch <= 0x09B0)
+      || (ch == 0x09B2)
+      || (ch >= 0x09B6 && ch <= 0x09B9)
+      || (ch >= 0x09BE && ch <= 0x09C4)
+      || (ch >= 0x09C7 && ch <= 0x09C8)
+      || (ch >= 0x09CB && ch <= 0x09CD)
+      || (ch >= 0x09DC && ch <= 0x09DD)
+      || (ch >= 0x09DF && ch <= 0x09E3)
+      || (ch >= 0x09F0 && ch <= 0x09F1)
+      /* Gurmukhi */
+      || (ch == 0x0A02)
+      || (ch >= 0x0A05 && ch <= 0x0A0A)
+      || (ch >= 0x0A0F && ch <= 0x0A10)
+      || (ch >= 0x0A13 && ch <= 0x0A28)
+      || (ch >= 0x0A2A && ch <= 0x0A30)
+      || (ch >= 0x0A32 && ch <= 0x0A33)
+      || (ch >= 0x0A35 && ch <= 0x0A36)
+      || (ch >= 0x0A38 && ch <= 0x0A39)
+      || (ch >= 0x0A3E && ch <= 0x0A42)
+      || (ch >= 0x0A47 && ch <= 0x0A48)
+      || (ch >= 0x0A4B && ch <= 0x0A4D)
+      || (ch >= 0x0A59 && ch <= 0x0A5C)
+      || (ch == 0x0A5E)
+      || (ch == 0x0A74)
+      /* Gujarati */
+      || (ch >= 0x0A81 && ch <= 0x0A83)
+      || (ch >= 0x0A85 && ch <= 0x0A8B)
+      || (ch == 0x0A8D)
+      || (ch >= 0x0A8F && ch <= 0x0A91)
+      || (ch >= 0x0A93 && ch <= 0x0AA8)
+      || (ch >= 0x0AAA && ch <= 0x0AB0)
+      || (ch >= 0x0AB2 && ch <= 0x0AB3)
+      || (ch >= 0x0AB5 && ch <= 0x0AB9)
+      || (ch >= 0x0ABD && ch <= 0x0AC5)
+      || (ch >= 0x0AC7 && ch <= 0x0AC9)
+      || (ch >= 0x0ACB && ch <= 0x0ACD)
+      || (ch == 0x0AD0)
+      || (ch == 0x0AE0)
+      /* Oriya */
+      || (ch >= 0x0B01 && ch <= 0x0B03)
+      || (ch >= 0x0B05 && ch <= 0x0B0C)
+      || (ch >= 0x0B0F && ch <= 0x0B10)
+      || (ch >= 0x0B13 && ch <= 0x0B28)
+      || (ch >= 0x0B2A && ch <= 0x0B30)
+      || (ch >= 0x0B32 && ch <= 0x0B33)
+      || (ch >= 0x0B36 && ch <= 0x0B39)
+      || (ch >= 0x0B3E && ch <= 0x0B43)
+      || (ch >= 0x0B47 && ch <= 0x0B48)
+      || (ch >= 0x0B4B && ch <= 0x0B4D)
+      || (ch >= 0x0B5C && ch <= 0x0B5D)
+      || (ch >= 0x0B5F && ch <= 0x0B61)
+      /* Tamil */
+      || (ch >= 0x0B82 && ch <= 0x0B83)
+      || (ch >= 0x0B85 && ch <= 0x0B8A)
+      || (ch >= 0x0B8E && ch <= 0x0B90)
+      || (ch >= 0x0B92 && ch <= 0x0B95)
+      || (ch >= 0x0B99 && ch <= 0x0B9A)
+      || (ch == 0x0B9C)
+      || (ch >= 0x0B9E && ch <= 0x0B9F)
+      || (ch >= 0x0BA3 && ch <= 0x0BA4)
+      || (ch >= 0x0BA8 && ch <= 0x0BAA)
+      || (ch >= 0x0BAE && ch <= 0x0BB5)
+      || (ch >= 0x0BB7 && ch <= 0x0BB9)
+      || (ch >= 0x0BBE && ch <= 0x0BC2)
+      || (ch >= 0x0BC6 && ch <= 0x0BC8)
+      || (ch >= 0x0BCA && ch <= 0x0BCD)
+      /* Telugu */
+      || (ch >= 0x0C01 && ch <= 0x0C03)
+      || (ch >= 0x0C05 && ch <= 0x0C0C)
+      || (ch >= 0x0C0E && ch <= 0x0C10)
+      || (ch >= 0x0C12 && ch <= 0x0C28)
+      || (ch >= 0x0C2A && ch <= 0x0C33)
+      || (ch >= 0x0C35 && ch <= 0x0C39)
+      || (ch >= 0x0C3E && ch <= 0x0C44)
+      || (ch >= 0x0C46 && ch <= 0x0C48)
+      || (ch >= 0x0C4A && ch <= 0x0C4D)
+      || (ch >= 0x0C60 && ch <= 0x0C61)
+      /* Kannada */
+      || (ch >= 0x0C82 && ch <= 0x0C83)
+      || (ch >= 0x0C85 && ch <= 0x0C8C)
+      || (ch >= 0x0C8E && ch <= 0x0C90)
+      || (ch >= 0x0C92 && ch <= 0x0CA8)
+      || (ch >= 0x0CAA && ch <= 0x0CB3)
+      || (ch >= 0x0CB5 && ch <= 0x0CB9)
+      || (ch >= 0x0CBE && ch <= 0x0CC4)
+      || (ch >= 0x0CC6 && ch <= 0x0CC8)
+      || (ch >= 0x0CCA && ch <= 0x0CCD)
+      || (ch == 0x0CDE)
+      || (ch >= 0x0CE0 && ch <= 0x0CE1)
+      /* Malayalam */
+      || (ch >= 0x0D02 && ch <= 0x0D03)
+      || (ch >= 0x0D05 && ch <= 0x0D0C)
+      || (ch >= 0x0D0E && ch <= 0x0D10)
+      || (ch >= 0x0D12 && ch <= 0x0D28)
+      || (ch >= 0x0D2A && ch <= 0x0D39)
+      || (ch >= 0x0D3E && ch <= 0x0D43)
+      || (ch >= 0x0D46 && ch <= 0x0D48)
+      || (ch >= 0x0D4A && ch <= 0x0D4D)
+      || (ch >= 0x0D60 && ch <= 0x0D61)
+      /* Thai */
+      || (ch >= 0x0E01 && ch <= 0x0E3A)
+      || (ch >= 0x0E40 && ch <= 0x0E5B)
+      /* Lao */
+      || (ch >= 0x0E81 && ch <= 0x0E82)
+      || (ch == 0x0E84)
+      || (ch >= 0x0E87 && ch <= 0x0E88)
+      || (ch == 0x0E8A)
+      || (ch == 0x0E8D)
+      || (ch >= 0x0E94 && ch <= 0x0E97)
+      || (ch >= 0x0E99 && ch <= 0x0E9F)
+      || (ch >= 0x0EA1 && ch <= 0x0EA3)
+      || (ch == 0x0EA5)
+      || (ch == 0x0EA7)
+      || (ch >= 0x0EAA && ch <= 0x0EAB)
+      || (ch >= 0x0EAD && ch <= 0x0EAE)
+      || (ch >= 0x0EB0 && ch <= 0x0EB9)
+      || (ch >= 0x0EBB && ch <= 0x0EBD)
+      || (ch >= 0x0EC0 && ch <= 0x0EC4)
+      || (ch == 0x0EC6)
+      || (ch >= 0x0EC8 && ch <= 0x0ECD)
+      || (ch >= 0x0EDC && ch <= 0x0EDD)
+      /* Tibetan */
+      || (ch == 0x0F00)
+      || (ch >= 0x0F18 && ch <= 0x0F19)
+      || (ch == 0x0F35)
+      || (ch == 0x0F37)
+      || (ch == 0x0F39)
+      || (ch >= 0x0F3E && ch <= 0x0F47)
+      || (ch >= 0x0F49 && ch <= 0x0F69)
+      || (ch >= 0x0F71 && ch <= 0x0F84)
+      || (ch >= 0x0F86 && ch <= 0x0F8B)
+      || (ch >= 0x0F90 && ch <= 0x0F95)
+      || (ch == 0x0F97)
+      || (ch >= 0x0F99 && ch <= 0x0FAD)
+      || (ch >= 0x0FB1 && ch <= 0x0FB7)
+      || (ch == 0x0FB9)
+      /* Georgian */
+      || (ch >= 0x10A0 && ch <= 0x10C5)
+      || (ch >= 0x10D0 && ch <= 0x10F6)
+      /* Hiragana */
+      || (ch >= 0x3041 && ch <= 0x3093)
+      || (ch >= 0x309B && ch <= 0x309C)
+      /* Katakana */
+      || (ch >= 0x30A1 && ch <= 0x30F6)
+      || (ch >= 0x30FB && ch <= 0x30FC)
+      /* Bopomofo */
+      || (ch >= 0x3105 && ch <= 0x312C)
+      /* CJK Unified Ideographs */
+      || (ch >= 0x4E00 && ch <= 0x9FA5)
+      /* Hangul */
+      || (ch >= 0xAC00 && ch <= 0xD7A3)
+      /* Digits */
+      || (ch >= 0x0660 && ch <= 0x0669)
+      || (ch >= 0x06F0 && ch <= 0x06F9)
+      || (ch >= 0x0966 && ch <= 0x096F)
+      || (ch >= 0x09E6 && ch <= 0x09EF)
+      || (ch >= 0x0A66 && ch <= 0x0A6F)
+      || (ch >= 0x0AE6 && ch <= 0x0AEF)
+      || (ch >= 0x0B66 && ch <= 0x0B6F)
+      || (ch >= 0x0BE7 && ch <= 0x0BEF)
+      || (ch >= 0x0C66 && ch <= 0x0C6F)
+      || (ch >= 0x0CE6 && ch <= 0x0CEF)
+      || (ch >= 0x0D66 && ch <= 0x0D6F)
+      || (ch >= 0x0E50 && ch <= 0x0E59)
+      || (ch >= 0x0ED0 && ch <= 0x0ED9)
+      || (ch >= 0x0F20 && ch <= 0x0F33)
+      /* Special characters */
+      || (ch == 0x00B5)
+      || (ch == 0x00B7)
+      || (ch >= 0x02B0 && ch <= 0x02B8)
+      || (ch == 0x02BB)
+      || (ch >= 0x02BD && ch <= 0x02C1)
+      || (ch >= 0x02D0 && ch <= 0x02D1)
+      || (ch >= 0x02E0 && ch <= 0x02E4)
+      || (ch == 0x037A)
+      || (ch == 0x0559)
+      || (ch == 0x093D)
+      || (ch == 0x0B3D)
+      || (ch == 0x1FBE)
+      || (ch >= 0x203F && ch <= 0x2040)
+      || (ch == 0x2102)
+      || (ch == 0x2107)
+      || (ch >= 0x210A && ch <= 0x2113)
+      || (ch == 0x2115)
+      || (ch >= 0x2118 && ch <= 0x211D)
+      || (ch == 0x2124)
+      || (ch == 0x2126)
+      || (ch == 0x2128)
+      || (ch >= 0x212A && ch <= 0x2131)
+      || (ch >= 0x2133 && ch <= 0x2138)
+      || (ch >= 0x2160 && ch <= 0x2182)
+      || (ch >= 0x3005 && ch <= 0x3007)
+      || (ch >= 0x3021 && ch <= 0x3029)
+     )
+    return UC_IDENTIFIER_START;
+  return UC_IDENTIFIER_INVALID;
+}
+
+/* The Java Language Specification, 3rd edition, §3.6.
+   http://java.sun.com/docs/books/jls/third_edition/html/lexical.html#95710  */
+static bool
+is_java_whitespace (unsigned int ch)
+{
+  return (ch == ' ' || ch == '\t' || ch == '\f'
+         || ch == '\n' || ch == '\r');
+}
+
+/* The Java Language Specification, 3rd edition, §3.8.
+   http://java.sun.com/docs/books/jls/third_edition/html/lexical.html#40625
+   and Character.isJavaIdentifierStart and Character.isJavaIdentifierPart  */
+static int
+java_ident_category (unsigned int ch)
+{
+  /* FIXME: Check this against Sun's JDK implementation.  */
+  if (is_category_L (ch) /* = Character.isLetter(ch) */
+      || is_category_Nl (ch) /* = Character.getType(ch)==LETTER_NUMBER */
+      || is_category_Sc (ch) /* currency symbol */
+      || is_category_Pc (ch) /* connector punctuation */
+     )
+    return UC_IDENTIFIER_START;
+  if (is_category_Nd (ch) /* digit */
+      || is_category_Mc (ch) /* combining mark */
+      || is_category_Mn (ch) /* non-spacing mark */
+     )
+    return UC_IDENTIFIER_VALID;
+  if ((ch >= 0x0000 && ch <= 0x0008)
+      || (ch >= 0x000E && ch <= 0x001B)
+      || (ch >= 0x007F && ch <= 0x009F)
+      || is_category_Cf (ch) /* = Character.getType(ch)==FORMAT */
+     )
+    return UC_IDENTIFIER_IGNORABLE;
+  return UC_IDENTIFIER_INVALID;
+}
+
+/* Construction of sparse 3-level tables.  */
+#define TABLE identsyntax_table
+#define ELEMENT uint8_t
+#define DEFAULT UC_IDENTIFIER_INVALID
+#define xmalloc malloc
+#define xrealloc realloc
+#include "3level.h"
+
+/* Output an identifier syntax categorization in a three-level bitmap.  */
+static void
+output_ident_category (const char *filename, int (*predicate) (unsigned int), const char *name, const char *version)
+{
+  FILE *stream;
+  unsigned int ch, i;
+  struct identsyntax_table t;
+  unsigned int level1_offset, level2_offset, level3_offset;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
+  fprintf (stream, "/* Language syntax properties of Unicode characters.  */\n");
+  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
+          version);
+
+  t.p = 7; /* or 8 */
+  t.q = 5; /* or 4 */
+  identsyntax_table_init (&t);
+
+  for (ch = 0; ch < 0x110000; ch++)
+    {
+      int syntaxcode = predicate (ch);
+      if (syntaxcode != UC_IDENTIFIER_INVALID)
+       identsyntax_table_add (&t, ch, syntaxcode);
+    }
+
+  identsyntax_table_finalize (&t);
+
+  /* Offsets in t.result, in memory of this process.  */
+  level1_offset =
+    5 * sizeof (uint32_t);
+  level2_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t);
+  level3_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t)
+    + (t.level2_size << t.q) * sizeof (uint32_t);
+
+  for (i = 0; i < 5; i++)
+    fprintf (stream, "#define identsyntax_header_%d %d\n", i,
+            ((uint32_t *) t.result)[i]);
+  fprintf (stream, "static const\n");
+  fprintf (stream, "struct\n");
+  fprintf (stream, "  {\n");
+  fprintf (stream, "    int level1[%zu];\n", t.level1_size);
+  fprintf (stream, "    short level2[%zu << %d];\n", t.level2_size, t.q);
+  fprintf (stream, "    unsigned short level3[%zu * %d];\n", t.level3_size,
+          (1 << t.p) * 2 / 16);
+  fprintf (stream, "  }\n");
+  fprintf (stream, "%s =\n", name);
+  fprintf (stream, "{\n");
+  fprintf (stream, "  {");
+  if (t.level1_size > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level1_size; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      offset = ((uint32_t *) (t.result + level1_offset))[i];
+      if (offset == 0)
+       fprintf (stream, " %5d", -1);
+      else
+       fprintf (stream, " %5zd",
+                (offset - level2_offset) / sizeof (uint32_t));
+      if (i+1 < t.level1_size)
+       fprintf (stream, ",");
+    }
+  if (t.level1_size > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " },\n");
+  fprintf (stream, "  {");
+  if (t.level2_size << t.q > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level2_size << t.q; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      offset = ((uint32_t *) (t.result + level2_offset))[i];
+      if (offset == 0)
+       fprintf (stream, " %5d", -1);
+      else
+       fprintf (stream, " %5zd",
+                (offset - level3_offset) / sizeof (uint8_t));
+      if (i+1 < t.level2_size << t.q)
+       fprintf (stream, ",");
+    }
+  if (t.level2_size << t.q > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " },\n");
+  /* Pack the level3 array.  Each entry needs 2 bits only.  */
+  fprintf (stream, "  {");
+  if ((t.level3_size << t.p) * 2 / 16 > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < (t.level3_size << t.p) * 2 / 16; i++)
+    {
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      fprintf (stream, " 0x%04x",
+              (((uint8_t *) (t.result + level3_offset))[8 * i] << 0)
+              | (((uint8_t *) (t.result + level3_offset))[8 * i + 1] << 2)
+              | (((uint8_t *) (t.result + level3_offset))[8 * i + 2] << 4)
+              | (((uint8_t *) (t.result + level3_offset))[8 * i + 3] << 6)
+              | (((uint8_t *) (t.result + level3_offset))[8 * i + 4] << 8)
+              | (((uint8_t *) (t.result + level3_offset))[8 * i + 5] << 10)
+              | (((uint8_t *) (t.result + level3_offset))[8 * i + 6] << 12)
+              | (((uint8_t *) (t.result + level3_offset))[8 * i + 7] << 14));
+      if (i+1 < (t.level3_size << t.p) * 2 / 16)
+       fprintf (stream, ",");
+    }
+  if ((t.level3_size << t.p) * 2 / 16 > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " }\n");
+  fprintf (stream, "};\n");
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+static void
+output_ident_properties (const char *version)
+{
+#define PROPERTY(P) \
+  debug_output_predicate ("unictype/sy_" #P ".txt", is_ ## P); \
+  output_predicate_test ("../tests/unictype/test-sy_" #P ".c", is_ ## P, "uc_is_" #P " (c)"); \
+  output_predicate ("unictype/sy_" #P ".h", is_ ## P, "u_" #P, "Language syntax properties", version);
+  PROPERTY(c_whitespace)
+  PROPERTY(java_whitespace)
+#undef PROPERTY
+
+  output_ident_category ("unictype/sy_c_ident.h", c_ident_category, "u_c_ident", version);
+  output_ident_category ("unictype/sy_java_ident.h", java_ident_category, "u_java_ident", version);
+}
+
+/* ========================================================================= */
+
+/* Like ISO C <ctype.h> and <wctype.h>.  Compatible to glibc's
+   glibc/localedata/locales/i18n file, generated by
+   glibc/localedata/gen-unicode-ctype.c.  */
+
+/* Character mappings.  */
+
+static unsigned int
+to_upper (unsigned int ch)
+{
+  if (unicode_attributes[ch].name != NULL
+      && unicode_attributes[ch].upper != NONE)
+    return unicode_attributes[ch].upper;
+  else
+    return ch;
+}
+
+static unsigned int
+to_lower (unsigned int ch)
+{
+  if (unicode_attributes[ch].name != NULL
+      && unicode_attributes[ch].lower != NONE)
+    return unicode_attributes[ch].lower;
+  else
+    return ch;
+}
+
+static unsigned int
+to_title (unsigned int ch)
+{
+  if (unicode_attributes[ch].name != NULL
+      && unicode_attributes[ch].title != NONE)
+    return unicode_attributes[ch].title;
+  else
+    return ch;
+}
+
+/* Character class properties.  */
+
+static bool
+is_upper (unsigned int ch)
+{
+  return (to_lower (ch) != ch);
+}
+
+static bool
+is_lower (unsigned int ch)
+{
+  return (to_upper (ch) != ch)
+        /* <U00DF> is lowercase, but without simple to_upper mapping.  */
+        || (ch == 0x00DF);
+}
+
+static bool
+is_alpha (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && ((unicode_attributes[ch].category[0] == 'L'
+              /* Theppitak Karoonboonyanan <thep@links.nectec.or.th> says
+                 <U0E2F>, <U0E46> should belong to is_punct.  */
+              && (ch != 0x0E2F) && (ch != 0x0E46))
+             /* Theppitak Karoonboonyanan <thep@links.nectec.or.th> says
+                <U0E31>, <U0E34>..<U0E3A>, <U0E47>..<U0E4E> are is_alpha.  */
+             || (ch == 0x0E31)
+             || (ch >= 0x0E34 && ch <= 0x0E3A)
+             || (ch >= 0x0E47 && ch <= 0x0E4E)
+             /* Avoid warning for <U0345>.  */
+             || (ch == 0x0345)
+             /* Avoid warnings for <U2160>..<U217F>.  */
+             || (unicode_attributes[ch].category[0] == 'N'
+                 && unicode_attributes[ch].category[1] == 'l')
+             /* Avoid warnings for <U24B6>..<U24E9>.  */
+             || (unicode_attributes[ch].category[0] == 'S'
+                 && unicode_attributes[ch].category[1] == 'o'
+                 && strstr (unicode_attributes[ch].name, " LETTER ")
+                    != NULL)
+             /* Consider all the non-ASCII digits as alphabetic.
+                ISO C 99 forbids us to have them in category "digit",
+                but we want iswalnum to return true on them.  */
+             || (unicode_attributes[ch].category[0] == 'N'
+                 && unicode_attributes[ch].category[1] == 'd'
+                 && !(ch >= 0x0030 && ch <= 0x0039))));
+}
+
+static bool
+is_digit (unsigned int ch)
+{
+#if 0
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'N'
+         && unicode_attributes[ch].category[1] == 'd');
+  /* Note: U+0BE7..U+0BEF and U+1369..U+1371 are digit systems without
+     a zero.  Must add <0> in front of them by hand.  */
+#else
+  /* SUSV2 gives us some freedom for the "digit" category, but ISO C 99
+     takes it away:
+     7.25.2.1.5:
+        The iswdigit function tests for any wide character that corresponds
+        to a decimal-digit character (as defined in 5.2.1).
+     5.2.1:
+        the 10 decimal digits 0 1 2 3 4 5 6 7 8 9
+   */
+  return (ch >= 0x0030 && ch <= 0x0039);
+#endif
+}
+
+static bool
+is_outdigit (unsigned int ch)
+{
+  return (ch >= 0x0030 && ch <= 0x0039);
+}
+
+static bool
+is_alnum (unsigned int ch)
+{
+  return is_alpha (ch) || is_digit (ch);
+}
+
+static bool
+is_blank (unsigned int ch)
+{
+  return (ch == 0x0009 /* '\t' */
+         /* Category Zs without mention of "<noBreak>" */
+         || (unicode_attributes[ch].name != NULL
+             && unicode_attributes[ch].category[0] == 'Z'
+             && unicode_attributes[ch].category[1] == 's'
+             && !strstr (unicode_attributes[ch].decomposition, "<noBreak>")));
+}
+
+static bool
+is_space (unsigned int ch)
+{
+  /* Don't make U+00A0 a space. Non-breaking space means that all programs
+     should treat it like a punctuation character, not like a space. */
+  return (ch == 0x0020 /* ' ' */
+         || ch == 0x000C /* '\f' */
+         || ch == 0x000A /* '\n' */
+         || ch == 0x000D /* '\r' */
+         || ch == 0x0009 /* '\t' */
+         || ch == 0x000B /* '\v' */
+         /* Categories Zl, Zp, and Zs without mention of "<noBreak>" */
+         || (unicode_attributes[ch].name != NULL
+             && unicode_attributes[ch].category[0] == 'Z'
+             && (unicode_attributes[ch].category[1] == 'l'
+                 || unicode_attributes[ch].category[1] == 'p'
+                 || (unicode_attributes[ch].category[1] == 's'
+                     && !strstr (unicode_attributes[ch].decomposition,
+                                 "<noBreak>")))));
+}
+
+static bool
+is_cntrl (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && (strcmp (unicode_attributes[ch].name, "<control>") == 0
+             /* Categories Zl and Zp */
+             || (unicode_attributes[ch].category[0] == 'Z'
+                 && (unicode_attributes[ch].category[1] == 'l'
+                     || unicode_attributes[ch].category[1] == 'p'))));
+}
+
+static bool
+is_xdigit (unsigned int ch)
+{
+#if 0
+  return is_digit (ch)
+        || (ch >= 0x0041 && ch <= 0x0046)
+        || (ch >= 0x0061 && ch <= 0x0066);
+#else
+  /* SUSV2 gives us some freedom for the "xdigit" category, but ISO C 99
+     takes it away:
+     7.25.2.1.12:
+        The iswxdigit function tests for any wide character that corresponds
+        to a hexadecimal-digit character (as defined in 6.4.4.1).
+     6.4.4.1:
+        hexadecimal-digit: one of 0 1 2 3 4 5 6 7 8 9 a b c d e f A B C D E F
+   */
+  return (ch >= 0x0030 && ch <= 0x0039)
+        || (ch >= 0x0041 && ch <= 0x0046)
+        || (ch >= 0x0061 && ch <= 0x0066);
+#endif
+}
+
+static bool
+is_graph (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && strcmp (unicode_attributes[ch].name, "<control>")
+         && !is_space (ch));
+}
+
+static bool
+is_print (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+         && strcmp (unicode_attributes[ch].name, "<control>")
+         /* Categories Zl and Zp */
+         && !(unicode_attributes[ch].name != NULL
+              && unicode_attributes[ch].category[0] == 'Z'
+              && (unicode_attributes[ch].category[1] == 'l'
+                  || unicode_attributes[ch].category[1] == 'p')));
+}
+
+static bool
+is_punct (unsigned int ch)
+{
+#if 0
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'P');
+#else
+  /* The traditional POSIX definition of punctuation is every graphic,
+     non-alphanumeric character.  */
+  return (is_graph (ch) && !is_alpha (ch) && !is_digit (ch));
+#endif
+}
+
+/* Output all properties.  */
+static void
+output_old_ctype (const char *version)
+{
+#define PROPERTY(P) \
+  debug_output_predicate ("unictype/ctype_" #P ".txt", is_ ## P); \
+  output_predicate_test ("../tests/unictype/test-ctype_" #P ".c", is_ ## P, "uc_is_" #P " (c)"); \
+  output_predicate ("unictype/ctype_" #P ".h", is_ ## P, "u_is_" #P, "ISO C <ctype.h> like properties", version);
+  PROPERTY(alnum)
+  PROPERTY(alpha)
+  PROPERTY(cntrl)
+  PROPERTY(digit)
+  PROPERTY(graph)
+  PROPERTY(lower)
+  PROPERTY(print)
+  PROPERTY(punct)
+  PROPERTY(space)
+  PROPERTY(upper)
+  PROPERTY(xdigit)
+  PROPERTY(blank)
+#undef PROPERTY
+}
+
+#if 0
+
+static bool
+is_combining (unsigned int ch)
+{
+  /* Up to Unicode 3.0.1 we took the Combining property from the PropList.txt
+     file. In 3.0.1 it was identical to the union of the general categories
+     "Mn", "Mc", "Me". In Unicode 3.1 this property has been dropped from the
+     PropList.txt file, so we take the latter definition.  */
+  return (unicode_attributes[ch].name != NULL
+         && unicode_attributes[ch].category[0] == 'M'
+         && (unicode_attributes[ch].category[1] == 'n'
+             || unicode_attributes[ch].category[1] == 'c'
+             || unicode_attributes[ch].category[1] == 'e'));
+}
+
+static bool
+is_combining_level3 (unsigned int ch)
+{
+  return is_combining (ch)
+        && !(unicode_attributes[ch].combining[0] != '\0'
+             && unicode_attributes[ch].combining[0] != '0'
+             && strtoul (unicode_attributes[ch].combining, NULL, 10) >= 200);
+}
+
+/* Return the UCS symbol string for a Unicode character.  */
+static const char *
+ucs_symbol (unsigned int i)
+{
+  static char buf[11+1];
+
+  sprintf (buf, (i < 0x10000 ? "<U%04X>" : "<U%08X>"), i);
+  return buf;
+}
+
+/* Return the UCS symbol range string for a Unicode characters interval.  */
+static const char *
+ucs_symbol_range (unsigned int low, unsigned int high)
+{
+  static char buf[24+1];
+
+  strcpy (buf, ucs_symbol (low));
+  strcat (buf, "..");
+  strcat (buf, ucs_symbol (high));
+  return buf;
+}
+
+/* Output a character class (= property) table.  */
+
+static void
+output_charclass (FILE *stream, const char *classname,
+                 bool (*func) (unsigned int))
+{
+  char table[0x110000];
+  unsigned int i;
+  bool need_semicolon;
+  const int max_column = 75;
+  int column;
+
+  for (i = 0; i < 0x110000; i++)
+    table[i] = (int) func (i);
+
+  fprintf (stream, "%s ", classname);
+  need_semicolon = false;
+  column = 1000;
+  for (i = 0; i < 0x110000; )
+    {
+      if (!table[i])
+       i++;
+      else
+       {
+         unsigned int low, high;
+         char buf[25];
+
+         low = i;
+         do
+           i++;
+         while (i < 0x110000 && table[i]);
+         high = i - 1;
+
+         if (low == high)
+           strcpy (buf, ucs_symbol (low));
+         else
+           strcpy (buf, ucs_symbol_range (low, high));
+
+         if (need_semicolon)
+           {
+             fprintf (stream, ";");
+             column++;
+           }
+
+         if (column + strlen (buf) > max_column)
+           {
+             fprintf (stream, "/\n   ");
+             column = 3;
+           }
+
+         fprintf (stream, "%s", buf);
+         column += strlen (buf);
+         need_semicolon = true;
+       }
+    }
+  fprintf (stream, "\n");
+}
+
+/* Output a character mapping table.  */
+
+static void
+output_charmap (FILE *stream, const char *mapname,
+               unsigned int (*func) (unsigned int))
+{
+  char table[0x110000];
+  unsigned int i;
+  bool need_semicolon;
+  const int max_column = 75;
+  int column;
+
+  for (i = 0; i < 0x110000; i++)
+    table[i] = (func (i) != i);
+
+  fprintf (stream, "%s ", mapname);
+  need_semicolon = false;
+  column = 1000;
+  for (i = 0; i < 0x110000; i++)
+    if (table[i])
+      {
+       char buf[25+1];
+
+       strcpy (buf, "(");
+       strcat (buf, ucs_symbol (i));
+       strcat (buf, ",");
+       strcat (buf, ucs_symbol (func (i)));
+       strcat (buf, ")");
+
+       if (need_semicolon)
+         {
+           fprintf (stream, ";");
+           column++;
+         }
+
+       if (column + strlen (buf) > max_column)
+         {
+           fprintf (stream, "/\n   ");
+           column = 3;
+         }
+
+       fprintf (stream, "%s", buf);
+       column += strlen (buf);
+       need_semicolon = true;
+      }
+  fprintf (stream, "\n");
+}
+
+/* Output the width table.  */
+
+static void
+output_widthmap (FILE *stream)
+{
+}
+
+/* Output the tables to the given file.  */
+
+static void
+output_tables (const char *filename, const char *version)
+{
+  FILE *stream;
+  unsigned int ch;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  fprintf (stream, "escape_char /\n");
+  fprintf (stream, "comment_char %%\n");
+  fprintf (stream, "\n");
+  fprintf (stream, "%% Generated automatically by gen-unicode-ctype for Unicode %s.\n",
+          version);
+  fprintf (stream, "\n");
+
+  fprintf (stream, "LC_IDENTIFICATION\n");
+  fprintf (stream, "title     \"Unicode %s FDCC-set\"\n", version);
+  fprintf (stream, "source    \"UnicodeData.txt, PropList.txt\"\n");
+  fprintf (stream, "address   \"\"\n");
+  fprintf (stream, "contact   \"\"\n");
+  fprintf (stream, "email     \"bug-glibc@gnu.org\"\n");
+  fprintf (stream, "tel       \"\"\n");
+  fprintf (stream, "fax       \"\"\n");
+  fprintf (stream, "language  \"\"\n");
+  fprintf (stream, "territory \"Earth\"\n");
+  fprintf (stream, "revision  \"%s\"\n", version);
+  {
+    time_t now;
+    char date[11];
+    now = time (NULL);
+    strftime (date, sizeof (date), "%Y-%m-%d", gmtime (&now));
+    fprintf (stream, "date      \"%s\"\n", date);
+  }
+  fprintf (stream, "category  \"unicode:2001\";LC_CTYPE\n");
+  fprintf (stream, "END LC_IDENTIFICATION\n");
+  fprintf (stream, "\n");
+
+  /* Verifications. */
+  for (ch = 0; ch < 0x110000; ch++)
+    {
+      /* toupper restriction: "Only characters specified for the keywords
+        lower and upper shall be specified.  */
+      if (to_upper (ch) != ch && !(is_lower (ch) || is_upper (ch)))
+       fprintf (stderr,
+                "%s is not upper|lower but toupper(0x%04X) = 0x%04X\n",
+                ucs_symbol (ch), ch, to_upper (ch));
+
+      /* tolower restriction: "Only characters specified for the keywords
+        lower and upper shall be specified.  */
+      if (to_lower (ch) != ch && !(is_lower (ch) || is_upper (ch)))
+       fprintf (stderr,
+                "%s is not upper|lower but tolower(0x%04X) = 0x%04X\n",
+                ucs_symbol (ch), ch, to_lower (ch));
+
+      /* alpha restriction: "Characters classified as either upper or lower
+        shall automatically belong to this class.  */
+      if ((is_lower (ch) || is_upper (ch)) && !is_alpha (ch))
+       fprintf (stderr, "%s is upper|lower but not alpha\n", ucs_symbol (ch));
+
+      /* alpha restriction: "No character specified for the keywords cntrl,
+        digit, punct or space shall be specified."  */
+      if (is_alpha (ch) && is_cntrl (ch))
+       fprintf (stderr, "%s is alpha and cntrl\n", ucs_symbol (ch));
+      if (is_alpha (ch) && is_digit (ch))
+       fprintf (stderr, "%s is alpha and digit\n", ucs_symbol (ch));
+      if (is_alpha (ch) && is_punct (ch))
+       fprintf (stderr, "%s is alpha and punct\n", ucs_symbol (ch));
+      if (is_alpha (ch) && is_space (ch))
+       fprintf (stderr, "%s is alpha and space\n", ucs_symbol (ch));
+
+      /* space restriction: "No character specified for the keywords upper,
+        lower, alpha, digit, graph or xdigit shall be specified."
+        upper, lower, alpha already checked above.  */
+      if (is_space (ch) && is_digit (ch))
+       fprintf (stderr, "%s is space and digit\n", ucs_symbol (ch));
+      if (is_space (ch) && is_graph (ch))
+       fprintf (stderr, "%s is space and graph\n", ucs_symbol (ch));
+      if (is_space (ch) && is_xdigit (ch))
+       fprintf (stderr, "%s is space and xdigit\n", ucs_symbol (ch));
+
+      /* cntrl restriction: "No character specified for the keywords upper,
+        lower, alpha, digit, punct, graph, print or xdigit shall be
+        specified."  upper, lower, alpha already checked above.  */
+      if (is_cntrl (ch) && is_digit (ch))
+       fprintf (stderr, "%s is cntrl and digit\n", ucs_symbol (ch));
+      if (is_cntrl (ch) && is_punct (ch))
+       fprintf (stderr, "%s is cntrl and punct\n", ucs_symbol (ch));
+      if (is_cntrl (ch) && is_graph (ch))
+       fprintf (stderr, "%s is cntrl and graph\n", ucs_symbol (ch));
+      if (is_cntrl (ch) && is_print (ch))
+       fprintf (stderr, "%s is cntrl and print\n", ucs_symbol (ch));
+      if (is_cntrl (ch) && is_xdigit (ch))
+       fprintf (stderr, "%s is cntrl and xdigit\n", ucs_symbol (ch));
+
+      /* punct restriction: "No character specified for the keywords upper,
+        lower, alpha, digit, cntrl, xdigit or as the <space> character shall
+        be specified."  upper, lower, alpha, cntrl already checked above.  */
+      if (is_punct (ch) && is_digit (ch))
+       fprintf (stderr, "%s is punct and digit\n", ucs_symbol (ch));
+      if (is_punct (ch) && is_xdigit (ch))
+       fprintf (stderr, "%s is punct and xdigit\n", ucs_symbol (ch));
+      if (is_punct (ch) && (ch == 0x0020))
+       fprintf (stderr, "%s is punct\n", ucs_symbol (ch));
+
+      /* graph restriction: "No character specified for the keyword cntrl
+        shall be specified."  Already checked above.  */
+
+      /* print restriction: "No character specified for the keyword cntrl
+        shall be specified."  Already checked above.  */
+
+      /* graph - print relation: differ only in the <space> character.
+        How is this possible if there are more than one space character?!
+        I think susv2/xbd/locale.html should speak of "space characters",
+        not "space character".  */
+      if (is_print (ch) && !(is_graph (ch) || /* ch == 0x0020 */ is_space (ch)))
+       fprintf (stderr,
+                "%s is print but not graph|<space>\n", ucs_symbol (ch));
+      if (!is_print (ch) && (is_graph (ch) || ch == 0x0020))
+       fprintf (stderr,
+                "%s is graph|<space> but not print\n", ucs_symbol (ch));
+    }
+
+  fprintf (stream, "LC_CTYPE\n");
+  output_charclass (stream, "upper", is_upper);
+  output_charclass (stream, "lower", is_lower);
+  output_charclass (stream, "alpha", is_alpha);
+  output_charclass (stream, "digit", is_digit);
+  output_charclass (stream, "outdigit", is_outdigit);
+  output_charclass (stream, "blank", is_blank);
+  output_charclass (stream, "space", is_space);
+  output_charclass (stream, "cntrl", is_cntrl);
+  output_charclass (stream, "punct", is_punct);
+  output_charclass (stream, "xdigit", is_xdigit);
+  output_charclass (stream, "graph", is_graph);
+  output_charclass (stream, "print", is_print);
+  output_charclass (stream, "class \"combining\";", is_combining);
+  output_charclass (stream, "class \"combining_level3\";", is_combining_level3);
+  output_charmap (stream, "toupper", to_upper);
+  output_charmap (stream, "tolower", to_lower);
+  output_charmap (stream, "map \"totitle\";", to_title);
+  output_widthmap (stream);
+  fprintf (stream, "END LC_CTYPE\n");
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+#endif
+
+/* ========================================================================= */
+
+/* The width property from the EastAsianWidth.txt file.
+   Each is NULL (unassigned) or "N", "A", "H", "W", "F", "Na".  */
+const char * unicode_width[0x110000];
+
+/* Stores in unicode_width[] the width property from the EastAsianWidth.txt
+   file.  */
+static void
+fill_width (const char *width_filename)
+{
+  unsigned int i, j;
+  FILE *stream;
+  char field0[FIELDLEN];
+  char field1[FIELDLEN];
+  char field2[FIELDLEN];
+  int lineno = 0;
+
+  for (i = 0; i < 0x110000; i++)
+    unicode_width[i] = (unicode_attributes[i].name != NULL ? "N" : NULL);
+
+  stream = fopen (width_filename, "r");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "error during fopen of '%s'\n", width_filename);
+      exit (1);
+    }
+
+  for (;;)
+    {
+      int n;
+      int c;
+
+      lineno++;
+      c = getc (stream);
+      if (c == EOF)
+       break;
+      if (c == '#')
+       {
+         do c = getc (stream); while (c != EOF && c != '\n');
+         continue;
+       }
+      ungetc (c, stream);
+      n = getfield (stream, field0, ';');
+      n += getfield (stream, field1, ' ');
+      n += getfield (stream, field2, '\n');
+      if (n == 0)
+       break;
+      if (n != 3)
+       {
+         fprintf (stderr, "short line in '%s':%d\n", width_filename, lineno);
+         exit (1);
+       }
+      i = strtoul (field0, NULL, 16);
+      if (strstr (field0, "..") != NULL)
+       {
+         /* Deal with a range.  */
+         j = strtoul (strstr (field0, "..") + 2, NULL, 16);
+         for (; i <= j; i++)
+           unicode_width[i] = strdup (field1);
+       }
+      else
+       {
+         /* Single character line.  */
+         unicode_width[i] = strdup (field1);
+       }
+    }
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error reading from '%s'\n", width_filename);
+      exit (1);
+    }
+}
+
+/* Line breaking classification.  */
+
+enum
+{
+  /* Values >= 24 are resolved at run time. */
+  LBP_BK = 24, /* mandatory break */
+/*LBP_CR,         carriage return - not used here because it's a DOSism */
+/*LBP_LF,         line feed - not used here because it's a DOSism */
+  LBP_CM = 25, /* attached characters and combining marks */
+/*LBP_NL,         next line - not used here because it's equivalent to LBP_BK */
+/*LBP_SG,         surrogates - not used here because they are not characters */
+  LBP_WJ =  0, /* word joiner */
+  LBP_ZW = 26, /* zero width space */
+  LBP_GL =  1, /* non-breaking (glue) */
+  LBP_SP = 27, /* space */
+  LBP_B2 =  2, /* break opportunity before and after */
+  LBP_BA =  3, /* break opportunity after */
+  LBP_BB =  4, /* break opportunity before */
+  LBP_HY =  5, /* hyphen */
+  LBP_CB = 28, /* contingent break opportunity */
+  LBP_CL =  6, /* closing punctuation */
+  LBP_EX =  7, /* exclamation/interrogation */
+  LBP_IN =  8, /* inseparable */
+  LBP_NS =  9, /* non starter */
+  LBP_OP = 10, /* opening punctuation */
+  LBP_QU = 11, /* ambiguous quotation */
+  LBP_IS = 12, /* infix separator (numeric) */
+  LBP_NU = 13, /* numeric */
+  LBP_PO = 14, /* postfix (numeric) */
+  LBP_PR = 15, /* prefix (numeric) */
+  LBP_SY = 16, /* symbols allowing breaks */
+  LBP_AI = 29, /* ambiguous (alphabetic or ideograph) */
+  LBP_AL = 17, /* ordinary alphabetic and symbol characters */
+  LBP_H2 = 18, /* Hangul LV syllable */
+  LBP_H3 = 19, /* Hangul LVT syllable */
+  LBP_ID = 20, /* ideographic */
+  LBP_JL = 21, /* Hangul L Jamo */
+  LBP_JV = 22, /* Hangul V Jamo */
+  LBP_JT = 23, /* Hangul T Jamo */
+  LBP_SA = 30, /* complex context (South East Asian) */
+  LBP_XX = 31  /* unknown */
+};
+
+/* Returns the line breaking classification for ch, as a bit mask.  */
+static int
+get_lbp (unsigned int ch)
+{
+  int attr = 0;
+
+  if (unicode_attributes[ch].name != NULL)
+    {
+      /* mandatory break */
+      if (ch == 0x000A || ch == 0x000D || ch == 0x0085 /* newline */
+         || ch == 0x000C /* form feed */
+         || ch == 0x000B /* line tabulation */
+         || ch == 0x2028 /* LINE SEPARATOR */
+         || ch == 0x2029 /* PARAGRAPH SEPARATOR */)
+       attr |= 1 << LBP_BK;
+
+      if (ch == 0x2060 /* WORD JOINER */
+         || ch == 0xFEFF /* ZERO WIDTH NO-BREAK SPACE */)
+       attr |= 1 << LBP_WJ;
+
+      /* zero width space */
+      if (ch == 0x200B /* ZERO WIDTH SPACE */)
+       attr |= 1 << LBP_ZW;
+
+      /* non-breaking (glue) */
+      if (ch == 0x00A0 /* NO-BREAK SPACE */
+         || ch == 0x202F /* NARROW NO-BREAK SPACE */
+         || ch == 0x180E /* MONGOLIAN VOWEL SEPARATOR */
+         || ch == 0x034F /* COMBINING GRAPHEME JOINER */
+         || ch == 0x2007 /* FIGURE SPACE */
+         || ch == 0x2011 /* NON-BREAKING HYPHEN */
+         || ch == 0x0F08 /* TIBETAN MARK SBRUL SHAD */
+         || ch == 0x0F0C /* TIBETAN MARK DELIMITER TSHEG BSTAR */
+         || ch == 0x0F12 /* TIBETAN MARK RGYA GRAM SHAD */
+         || (ch >= 0x035C && ch <= 0x0362) /* COMBINING DOUBLE ... */)
+       attr |= 1 << LBP_GL;
+
+      /* space */
+      if (ch == 0x0020 /* SPACE */)
+       attr |= 1 << LBP_SP;
+
+      /* break opportunity before and after */
+      if (ch == 0x2014 /* EM DASH */)
+       attr |= 1 << LBP_B2;
+
+      /* break opportunity after */
+      if (ch == 0x1680 /* OGHAM SPACE MARK */
+         || ch == 0x2000 /* EN QUAD */
+         || ch == 0x2001 /* EM QUAD */
+         || ch == 0x2002 /* EN SPACE */
+         || ch == 0x2003 /* EM SPACE */
+         || ch == 0x2004 /* THREE-PER-EM SPACE */
+         || ch == 0x2005 /* FOUR-PER-EM SPACE */
+         || ch == 0x2006 /* SIX-PER-EM SPACE */
+         || ch == 0x2008 /* PUNCTUATION SPACE */
+         || ch == 0x2009 /* THIN SPACE */
+         || ch == 0x200A /* HAIR SPACE */
+         || ch == 0x205F /* MEDIUM MATHEMATICAL SPACE */
+         || ch == 0x0009 /* tab */
+         || ch == 0x00AD /* SOFT HYPHEN */
+         || ch == 0x058A /* ARMENIAN HYPHEN */
+         || ch == 0x2010 /* HYPHEN */
+         || ch == 0x2012 /* FIGURE DASH */
+         || ch == 0x2013 /* EN DASH */
+         || ch == 0x05BE /* HEBREW PUNCTUATION MAQAF */
+         || ch == 0x0F0B /* TIBETAN MARK INTERSYLLABIC TSHEG */
+         || ch == 0x1361 /* ETHIOPIC WORDSPACE */
+         || ch == 0x17D8 /* KHMER SIGN BEYYAL */
+         || ch == 0x17DA /* KHMER SIGN KOOMUUT */
+         || ch == 0x2027 /* HYPHENATION POINT */
+         || ch == 0x007C /* VERTICAL LINE */
+         || ch == 0x16EB /* RUNIC SINGLE PUNCTUATION */
+         || ch == 0x16EC /* RUNIC MULTIPLE PUNCTUATION */
+         || ch == 0x16ED /* RUNIC CROSS PUNCTUATION */
+         || ch == 0x2056 /* THREE DOT PUNCTUATION */
+         || ch == 0x2058 /* FOUR DOT PUNCTUATION */
+         || ch == 0x2059 /* FIVE DOT PUNCTUATION */
+         || ch == 0x205A /* TWO DOT PUNCTUATION */
+         || ch == 0x205B /* FOUR DOT MARK */
+         || ch == 0x205D /* TRICOLON */
+         || ch == 0x205E /* VERTICAL FOUR DOTS */
+         || ch == 0x2E19 /* PALM BRANCH */
+         || ch == 0x2E2A /* TWO DOTS OVER ONE DOT PUNCTUATION */
+         || ch == 0x2E2B /* ONE DOT OVER TWO DOTS PUNCTUATION */
+         || ch == 0x2E2C /* SQUARED FOUR DOT PUNCTUATION */
+         || ch == 0x2E2D /* FIVE DOT PUNCTUATION */
+         || ch == 0x2E30 /* RING POINT */
+         || ch == 0x10100 /* AEGEAN WORD SEPARATOR LINE */
+         || ch == 0x10101 /* AEGEAN WORD SEPARATOR DOT */
+         || ch == 0x10102 /* AEGEAN CHECK MARK */
+         || ch == 0x1039F /* UGARITIC WORD DIVIDER */
+         || ch == 0x103D0 /* OLD PERSIAN WORD DIVIDER */
+         || ch == 0x1091F /* PHOENICIAN WORD SEPARATOR */
+         || ch == 0x12470 /* CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER */
+         || ch == 0x0964 /* DEVANAGARI DANDA */
+         || ch == 0x0965 /* DEVANAGARI DOUBLE DANDA */
+         || ch == 0x0E5A /* THAI CHARACTER ANGKHANKHU */
+         || ch == 0x0E5B /* THAI CHARACTER KHOMUT */
+         || ch == 0x104A /* MYANMAR SIGN LITTLE SECTION */
+         || ch == 0x104B /* MYANMAR SIGN SECTION */
+         || ch == 0x1735 /* PHILIPPINE SINGLE PUNCTUATION */
+         || ch == 0x1736 /* PHILIPPINE DOUBLE PUNCTUATION */
+         || ch == 0x17D4 /* KHMER SIGN KHAN */
+         || ch == 0x17D5 /* KHMER SIGN BARIYOOSAN */
+         || ch == 0x1B5E /* BALINESE CARIK SIKI */
+         || ch == 0x1B5F /* BALINESE CARIK PAREREN */
+         || ch == 0xA8CE /* SAURASHTRA DANDA */
+         || ch == 0xA8CF /* SAURASHTRA DOUBLE DANDA */
+         || ch == 0xAA5D /* CHAM PUNCTUATION DANDA */
+         || ch == 0xAA5E /* CHAM PUNCTUATION DOUBLE DANDA */
+         || ch == 0xAA5F /* CHAM PUNCTUATION TRIPLE DANDA */
+         || ch == 0x10A56 /* KHAROSHTHI PUNCTUATION DANDA */
+         || ch == 0x10A57 /* KHAROSHTHI PUNCTUATION DOUBLE DANDA */
+         || ch == 0x0F34 /* TIBETAN MARK BSDUS RTAGS */
+         || ch == 0x0F7F /* TIBETAN SIGN RNAM BCAD */
+         || ch == 0x0F85 /* TIBETAN MARK PALUTA */
+         || ch == 0x0FBE /* TIBETAN KU RU KHA */
+         || ch == 0x0FBF /* TIBETAN KU RU KHA BZHI MIG CAN */
+         || ch == 0x0FD2 /* TIBETAN MARK NYIS TSHEG */
+#if !REVISION_22
+         || ch == 0x1802 /* MONGOLIAN COMMA */
+         || ch == 0x1803 /* MONGOLIAN FULL STOP */
+#endif
+         || ch == 0x1804 /* MONGOLIAN COLON */
+         || ch == 0x1805 /* MONGOLIAN FOUR DOTS */
+#if !REVISION_22
+         || ch == 0x1808 /* MONGOLIAN MANCHU COMMA */
+         || ch == 0x1809 /* MONGOLIAN MANCHU FULL STOP */
+#endif
+         || ch == 0x1B5A /* BALINESE PANTI */
+         || ch == 0x1B5B /* BALINESE PAMADA */
+         || ch == 0x1B5C /* BALINESE WINDU */
+         || ch == 0x1B5D /* BALINESE CARIK PAMUNGKAH */
+         || ch == 0x1B60 /* BALINESE PAMENENG */
+         || ch == 0x1C3B /* LEPCHA PUNCTUATION TA-ROL */
+         || ch == 0x1C3C /* LEPCHA PUNCTUATION NYET THYOOM TA-ROL */
+         || ch == 0x1C3D /* LEPCHA PUNCTUATION CER-WA */
+         || ch == 0x1C3E /* LEPCHA PUNCTUATION TSHOOK CER-WA */
+         || ch == 0x1C3F /* LEPCHA PUNCTUATION TSHOOK */
+         || ch == 0x1C7E /* OL CHIKI PUNCTUATION MUCAAD */
+         || ch == 0x1C7F /* OL CHIKI PUNCTUATION DOUBLE MUCAAD */
+#if !REVISION_22
+         || ch == 0x2CF9 /* COPTIC OLD NUBIAN FULL STOP */
+#endif
+         || ch == 0x2CFA /* COPTIC OLD NUBIAN DIRECT QUESTION MARK */
+         || ch == 0x2CFB /* COPTIC OLD NUBIAN INDIRECT QUESTION MARK */
+         || ch == 0x2CFC /* COPTIC OLD NUBIAN VERSE DIVIDER */
+#if !REVISION_22
+         || ch == 0x2CFE /* COPTIC FULL STOP */
+#endif
+         || ch == 0x2CFF /* COPTIC MORPHOLOGICAL DIVIDER */
+         || (ch >= 0x2E0E && ch <= 0x2E15) /* EDITORIAL CORONIS .. UPWARDS ANCORA */
+         || ch == 0x2E17 /* DOUBLE OBLIQUE HYPHEN */
+         || ch == 0xA60D /* VAI COMMA */
+         || ch == 0xA60F /* VAI QUESTION MARK */
+         || ch == 0xA92E /* KAYAH LI SIGN CWI */
+         || ch == 0xA92F /* KAYAH LI SIGN SHYA */
+         || ch == 0x10A50 /* KHAROSHTHI PUNCTUATION DOT */
+         || ch == 0x10A51 /* KHAROSHTHI PUNCTUATION SMALL CIRCLE */
+         || ch == 0x10A52 /* KHAROSHTHI PUNCTUATION CIRCLE */
+         || ch == 0x10A53 /* KHAROSHTHI PUNCTUATION CRESCENT BAR */
+         || ch == 0x10A54 /* KHAROSHTHI PUNCTUATION MANGALAM */
+         || ch == 0x10A55 /* KHAROSHTHI PUNCTUATION LOTUS */
+         /* Extra characters for compatibility with Unicode LineBreak.txt.  */
+#if !REVISION_22
+         || ch == 0x1A1E /* BUGINESE PALLAWA */
+#endif
+         || ch == 0x12471 /* CUNEIFORM PUNCTUATION SIGN VERTICAL COLON */
+         || ch == 0x12472 /* CUNEIFORM PUNCTUATION SIGN DIAGONAL COLON */
+         || ch == 0x12473 /* CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON */)
+       attr |= 1 << LBP_BA;
+
+      /* break opportunity before */
+      if (ch == 0x00B4 /* ACUTE ACCENT */
+#if REVISION_22
+         || ch == 0x1FFD /* GREEK OXIA */
+         || ch == 0x02DF /* MODIFIER LETTER CROSS ACCENT */
+#endif
+         || ch == 0x02C8 /* MODIFIER LETTER VERTICAL LINE */
+         || ch == 0x02CC /* MODIFIER LETTER LOW VERTICAL LINE */
+         || ch == 0x0F01 /* TIBETAN MARK GTER YIG MGO TRUNCATED A */
+         || ch == 0x0F02 /* TIBETAN MARK GTER YIG MGO -UM RNAM BCAD MA */
+         || ch == 0x0F03 /* TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA */
+         || ch == 0x0F04 /* TIBETAN MARK INITIAL YIG MGO MDUN MA */
+         || ch == 0x0F06 /* TIBETAN MARK CARET YIG MGO PHUR SHAD MA */
+         || ch == 0x0F07 /* TIBETAN MARK YIG MGO TSHEG SHAD MA */
+         || ch == 0x0F09 /* TIBETAN MARK BSKUR YIG MGO */
+         || ch == 0x0F0A /* TIBETAN MARK BKA- SHOG YIG MGO */
+         || ch == 0x0FD0 /* TIBETAN MARK BSKA- SHOG GI MGO RGYAN */
+         || ch == 0x0FD1 /* TIBETAN MARK MNYAM YIG GI MGO RGYAN */
+         || ch == 0x0FD3 /* TIBETAN MARK INITIAL BRDA RNYING YIG MGO MDUN MA */
+         || ch == 0xA874 /* PHAGS-PA SINGLE HEAD MARK */
+         || ch == 0xA875 /* PHAGS-PA DOUBLE HEAD MARK */
+         || ch == 0x1806 /* MONGOLIAN TODO SOFT HYPHEN */)
+       attr |= 1 << LBP_BB;
+
+      /* hyphen */
+      if (ch == 0x002D /* HYPHEN-MINUS */)
+       attr |= 1 << LBP_HY;
+
+      /* contingent break opportunity */
+      if (ch == 0xFFFC /* OBJECT REPLACEMENT CHARACTER */)
+       attr |= 1 << LBP_CB;
+
+      /* closing punctuation */
+      if ((unicode_attributes[ch].category[0] == 'P'
+          && unicode_attributes[ch].category[1] == 'e')
+         || ch == 0x3001 /* IDEOGRAPHIC COMMA */
+         || ch == 0x3002 /* IDEOGRAPHIC FULL STOP */
+         || ch == 0xFE11 /* PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC COMMA */
+         || ch == 0xFE12 /* PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC FULL STOP */
+         || ch == 0xFE50 /* SMALL COMMA */
+         || ch == 0xFE52 /* SMALL FULL STOP */
+         || ch == 0xFF0C /* FULLWIDTH COMMA */
+         || ch == 0xFF0E /* FULLWIDTH FULL STOP */
+         || ch == 0xFF61 /* HALFWIDTH IDEOGRAPHIC FULL STOP */
+         || ch == 0xFF64 /* HALFWIDTH IDEOGRAPHIC COMMA */)
+       attr |= 1 << LBP_CL;
+
+      /* exclamation/interrogation */
+      if (ch == 0x0021 /* EXCLAMATION MARK */
+         || ch == 0x003F /* QUESTION MARK */
+         || ch == 0x05C6 /* HEBREW PUNCTUATION NUN HAFUKHA */
+#if !REVISION_22
+         || ch == 0x060C /* ARABIC COMMA */
+#endif
+         || ch == 0x061B /* ARABIC SEMICOLON */
+         || ch == 0x061E /* ARABIC TRIPLE DOT PUNCTUATION MARK */
+         || ch == 0x061F /* ARABIC QUESTION MARK */
+#if !REVISION_22
+         || ch == 0x066A /* ARABIC PERCENT SIGN */
+#endif
+         || ch == 0x06D4 /* ARABIC FULL STOP */
+         || ch == 0x07F9 /* NKO EXCLAMATION MARK */
+         || ch == 0x0F0D /* TIBETAN MARK SHAD */
+         || ch == 0x0F0E /* TIBETAN MARK NYIS SHAD */
+         || ch == 0x0F0F /* TIBETAN MARK TSHEG SHAD */
+         || ch == 0x0F10 /* TIBETAN MARK NYIS TSHEG SHAD */
+         || ch == 0x0F11 /* TIBETAN MARK RIN CHEN SPUNGS SHAD */
+         || ch == 0x0F14 /* TIBETAN MARK GTER TSHEG */
+#if REVISION_22
+         || ch == 0x1802 /* MONGOLIAN COMMA */
+         || ch == 0x1803 /* MONGOLIAN FULL STOP */
+         || ch == 0x1808 /* MONGOLIAN MANCHU COMMA */
+         || ch == 0x1809 /* MONGOLIAN MANCHU FULL STOP */
+#endif
+         || ch == 0x1944 /* LIMBU EXCLAMATION MARK */
+         || ch == 0x1945 /* LIMBU QUESTION MARK */
+         || ch == 0x2762 /* HEAVY EXCLAMATION MARK ORNAMENT */
+         || ch == 0x2763 /* HEAVY HEART EXCLAMATION MARK ORNAMENT */
+#if REVISION_22
+         || ch == 0x2CF9 /* COPTIC OLD NUBIAN FULL STOP */
+         || ch == 0x2CFE /* COPTIC FULL STOP */
+#endif
+         || ch == 0x2E2E /* REVERSED QUESTION MARK */
+         || ch == 0xA60C /* VAI SYLLABLE LENGTHENER */
+         || ch == 0xA60E /* VAI FULL STOP */
+         || ch == 0xA876 /* PHAGS-PA MARK SHAD */
+         || ch == 0xA877 /* PHAGS-PA MARK DOUBLE SHAD */
+         || ch == 0xFE15 /* PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK */
+         || ch == 0xFE16 /* PRESENTATION FORM FOR VERTICAL QUESTION MARK */
+         || ch == 0xFE56 /* SMALL QUESTION MARK */
+         || ch == 0xFE57 /* SMALL EXCLAMATION MARK */
+         || ch == 0xFF01 /* FULLWIDTH EXCLAMATION MARK */
+         || ch == 0xFF1F /* FULLWIDTH QUESTION MARK */)
+       attr |= 1 << LBP_EX;
+
+      /* inseparable */
+      if (ch == 0x2024 /* ONE DOT LEADER */
+         || ch == 0x2025 /* TWO DOT LEADER */
+         || ch == 0x2026 /* HORIZONTAL ELLIPSIS */
+         || ch == 0xFE19 /* PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS */)
+       attr |= 1 << LBP_IN;
+
+      /* non starter */
+      if (ch == 0x17D6 /* KHMER SIGN CAMNUC PII KUUH */
+         || ch == 0x203C /* DOUBLE EXCLAMATION MARK */
+         || ch == 0x203D /* INTERROBANG */
+         || ch == 0x2047 /* DOUBLE QUESTION MARK */
+         || ch == 0x2048 /* QUESTION EXCLAMATION MARK */
+         || ch == 0x2049 /* EXCLAMATION QUESTION MARK */
+         || ch == 0x3005 /* IDEOGRAPHIC ITERATION MARK */
+         || ch == 0x301C /* WAVE DASH */
+         || ch == 0x303C /* MASU MARK */
+         || ch == 0x303B /* VERTICAL IDEOGRAPHIC ITERATION MARK */
+         || ch == 0x309B /* KATAKANA-HIRAGANA VOICED SOUND MARK */
+         || ch == 0x309C /* KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK */
+         || ch == 0x309D /* HIRAGANA ITERATION MARK */
+         || ch == 0x309E /* HIRAGANA VOICED ITERATION MARK */
+         || ch == 0x30A0 /* KATAKANA-HIRAGANA DOUBLE HYPHEN */
+         || ch == 0x30FB /* KATAKANA MIDDLE DOT */
+         || ch == 0x30FC /* KATAKANA-HIRAGANA PROLONGED SOUND MARK */
+         || ch == 0x30FD /* KATAKANA ITERATION MARK */
+         || ch == 0x30FE /* KATAKANA VOICED ITERATION MARK */
+         || ch == 0xA015 /* YI SYLLABLE WU */
+         || ch == 0xFE54 /* SMALL SEMICOLON */
+         || ch == 0xFE55 /* SMALL COLON */
+         || ch == 0xFF1A /* FULLWIDTH COLON */
+         || ch == 0xFF1B /* FULLWIDTH SEMICOLON */
+         || ch == 0xFF65 /* HALFWIDTH KATAKANA MIDDLE DOT */
+         || ch == 0xFF70 /* HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK */
+         || ch == 0xFF9E /* HALFWIDTH KATAKANA VOICED SOUND MARK */
+         || ch == 0xFF9F /* HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK */
+         || strstr (unicode_attributes[ch].name, "HIRAGANA LETTER SMALL ") != NULL
+         || strstr (unicode_attributes[ch].name, "KATAKANA LETTER SMALL ") != NULL)
+       attr |= 1 << LBP_NS;
+
+      /* opening punctuation */
+      if ((unicode_attributes[ch].category[0] == 'P'
+          && unicode_attributes[ch].category[1] == 's')
+#if REVISION_22
+         || ch == 0x00A1 /* INVERTED EXCLAMATION MARK */
+         || ch == 0x00BF /* INVERTED QUESTION MARK */
+#endif
+         || ch == 0x2E18 /* INVERTED INTERROBANG */)
+       attr |= 1 << LBP_OP;
+
+      /* ambiguous quotation */
+      if ((unicode_attributes[ch].category[0] == 'P'
+          && (unicode_attributes[ch].category[1] == 'f'
+              || unicode_attributes[ch].category[1] == 'i'))
+         || ch == 0x0022 /* QUOTATION MARK */
+         || ch == 0x0027 /* APOSTROPHE */
+         || ch == 0x275B /* HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT */
+         || ch == 0x275C /* HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT */
+         || ch == 0x275D /* HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT */
+         || ch == 0x275E /* HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT */
+         || ch == 0x2E00 /* RIGHT ANGLE SUBSTITUTION MARKER */
+         || ch == 0x2E01 /* RIGHT ANGLE DOTTED SUBSTITUTION MARKER */
+         || ch == 0x2E06 /* RAISED INTERPOLATION MARKER */
+         || ch == 0x2E07 /* RAISED DOTTED INTERPOLATION MARKER */
+         || ch == 0x2E08 /* DOTTED TRANSPOSITION MARKER */
+         || ch == 0x2E0B /* RAISED SQUARE */)
+       attr |= 1 << LBP_QU;
+
+      /* infix separator (numeric) */
+      if (ch == 0x002C /* COMMA */
+         || ch == 0x002E /* FULL STOP */
+         || ch == 0x003A /* COLON */
+         || ch == 0x003B /* SEMICOLON */
+         || ch == 0x037E /* GREEK QUESTION MARK */
+         || ch == 0x0589 /* ARMENIAN FULL STOP */
+#if REVISION_22
+         || ch == 0x060C /* ARABIC COMMA */
+#endif
+         || ch == 0x060D /* ARABIC DATE SEPARATOR */
+         || ch == 0x07F8 /* NKO COMMA */
+         || ch == 0x2044 /* FRACTION SLASH */
+         || ch == 0xFE10 /* PRESENTATION FORM FOR VERTICAL COMMA */
+         || ch == 0xFE13 /* PRESENTATION FORM FOR VERTICAL COLON */
+         || ch == 0xFE14 /* PRESENTATION FORM FOR VERTICAL SEMICOLON */)
+       attr |= 1 << LBP_IS;
+
+      /* numeric */
+      if ((unicode_attributes[ch].category[0] == 'N'
+          && unicode_attributes[ch].category[1] == 'd'
+          && strstr (unicode_attributes[ch].name, "FULLWIDTH") == NULL)
+         || ch == 0x066B /* ARABIC DECIMAL SEPARATOR */
+         || ch == 0x066C /* ARABIC THOUSANDS SEPARATOR */)
+       attr |= 1 << LBP_NU;
+
+      /* postfix (numeric) */
+      if (ch == 0x0025 /* PERCENT SIGN */
+         || ch == 0x00A2 /* CENT SIGN */
+         || ch == 0x00B0 /* DEGREE SIGN */
+         || ch == 0x060B /* AFGHANI SIGN */
+#if REVISION_22
+         || ch == 0x066A /* ARABIC PERCENT SIGN */
+#endif
+         || ch == 0x2030 /* PER MILLE SIGN */
+         || ch == 0x2031 /* PER TEN THOUSAND SIGN */
+         || ch == 0x2032 /* PRIME */
+         || ch == 0x2033 /* DOUBLE PRIME */
+         || ch == 0x2034 /* TRIPLE PRIME */
+         || ch == 0x2035 /* REVERSED PRIME */
+         || ch == 0x2036 /* REVERSED DOUBLE PRIME */
+         || ch == 0x2037 /* REVERSED TRIPLE PRIME */
+         || ch == 0x20A7 /* PESETA SIGN */
+         || ch == 0x2103 /* DEGREE CELSIUS */
+         || ch == 0x2109 /* DEGREE FAHRENHEIT */
+         || ch == 0xFDFC /* RIAL SIGN */
+         || ch == 0xFE6A /* SMALL PERCENT SIGN */
+         || ch == 0xFF05 /* FULLWIDTH PERCENT SIGN */
+         || ch == 0xFFE0 /* FULLWIDTH DIGIT ZERO */)
+       attr |= 1 << LBP_PO;
+
+      /* prefix (numeric) */
+      if ((unicode_attributes[ch].category[0] == 'S'
+          && unicode_attributes[ch].category[1] == 'c')
+         || ch == 0x002B /* PLUS SIGN */
+         || ch == 0x005C /* REVERSE SOLIDUS */
+         || ch == 0x00B1 /* PLUS-MINUS SIGN */
+         || ch == 0x2116 /* NUMERO SIGN */
+         || ch == 0x2212 /* MINUS SIGN */
+         || ch == 0x2213 /* MINUS-OR-PLUS SIGN */)
+       if (!(attr & (1 << LBP_PO)))
+         attr |= 1 << LBP_PR;
+
+      /* symbols allowing breaks */
+      if (ch == 0x002F /* SOLIDUS */)
+       attr |= 1 << LBP_SY;
+
+      if (ch >= 0xAC00 && ch <= 0xD7A3 && ((ch - 0xAC00) % 28) == 0)
+       attr |= 1 << LBP_H2;
+
+      if (ch >= 0xAC00 && ch <= 0xD7A3 && ((ch - 0xAC00) % 28) != 0)
+       attr |= 1 << LBP_H3;
+
+      if ((ch >= 0x1100 && ch <= 0x1159) || ch == 0x115F)
+       attr |= 1 << LBP_JL;
+
+      if (ch >= 0x1160 && ch <= 0x11A2)
+       attr |= 1 << LBP_JV;
+
+      if (ch >= 0x11A8 && ch <= 0x11F9)
+       attr |= 1 << LBP_JT;
+
+      /* complex context (South East Asian) */
+      if (((unicode_attributes[ch].category[0] == 'C'
+           && unicode_attributes[ch].category[1] == 'f')
+          || (unicode_attributes[ch].category[0] == 'L'
+              && (unicode_attributes[ch].category[1] == 'm'
+                  || unicode_attributes[ch].category[1] == 'o'))
+          || (unicode_attributes[ch].category[0] == 'M'
+              && (unicode_attributes[ch].category[1] == 'c'
+                  || unicode_attributes[ch].category[1] == 'n'))
+          /* Extra characters for compatibility with Unicode LineBreak.txt.  */
+          || ch == 0x19DE /* NEW TAI LUE SIGN LAE */
+          || ch == 0x19DF /* NEW TAI LUE SIGN LAEV */)
+         && ((ch >= 0x0E00 && ch <= 0x0EFF)
+             || (ch >= 0x1000 && ch <= 0x109F)
+             || (ch >= 0x1780 && ch <= 0x17FF)
+             || (ch >= 0x1950 && ch <= 0x19DF)))
+       attr |= 1 << LBP_SA;
+
+      /* attached characters and combining marks */
+      if ((unicode_attributes[ch].category[0] == 'M'
+          && (unicode_attributes[ch].category[1] == 'c'
+              || unicode_attributes[ch].category[1] == 'e'
+              || unicode_attributes[ch].category[1] == 'n'))
+         || (unicode_attributes[ch].category[0] == 'C'
+             && (unicode_attributes[ch].category[1] == 'c'
+                 || unicode_attributes[ch].category[1] == 'f')))
+       if (!(attr & ((1 << LBP_BK) | (1 << LBP_BA) | (1 << LBP_GL) | (1 << LBP_SA) | (1 << LBP_WJ) | (1 << LBP_ZW))))
+         attr |= 1 << LBP_CM;
+
+      /* ideographic */
+      if ((ch >= 0x2E80 && ch <= 0x2FFF) /* CJK RADICAL, KANGXI RADICAL, IDEOGRAPHIC DESCRIPTION */
+         || ch == 0x3000 /* IDEOGRAPHIC SPACE */
+         || (ch >= 0x3040 && ch <= 0x309F) /* HIRAGANA */
+         || (ch >= 0x30A0 && ch <= 0x30FF) /* KATAKANA */
+         || (ch >= 0x3400 && ch <= 0x4DB5) /* CJK Ideograph Extension A */
+         || (ch >= 0x4E00 && ch <= 0x9FBB) /* CJK Ideograph */
+         || (ch >= 0xF900 && ch <= 0xFAD9) /* CJK COMPATIBILITY IDEOGRAPH */
+         || (ch >= 0xA000 && ch <= 0xA48F) /* YI SYLLABLE */
+         || (ch >= 0xA490 && ch <= 0xA4CF) /* YI RADICAL */
+         || ch == 0xFE62 /* SMALL PLUS SIGN */
+         || ch == 0xFE63 /* SMALL HYPHEN-MINUS */
+         || ch == 0xFE64 /* SMALL LESS-THAN SIGN */
+         || ch == 0xFE65 /* SMALL GREATER-THAN SIGN */
+         || ch == 0xFE66 /* SMALL EQUALS SIGN */
+         || (ch >= 0xFF10 && ch <= 0xFF19) /* FULLWIDTH DIGIT */
+         || (ch >= 0x20000 && ch <= 0x2A6D6) /* CJK Ideograph Extension B */
+         || (ch >= 0x2F800 && ch <= 0x2FA1D) /* CJK COMPATIBILITY IDEOGRAPH */
+         || strstr (unicode_attributes[ch].name, "FULLWIDTH LATIN ") != NULL
+         || (ch >= 0x3000 && ch <= 0x33FF
+             && !(attr & ((1 << LBP_CM) | (1 << LBP_NS) | (1 << LBP_OP) | (1 << LBP_CL))))
+         /* Extra characters for compatibility with Unicode LineBreak.txt.  */
+         || ch == 0xFE30 /* PRESENTATION FORM FOR VERTICAL TWO DOT LEADER */
+         || ch == 0xFE31 /* PRESENTATION FORM FOR VERTICAL EM DASH */
+         || ch == 0xFE32 /* PRESENTATION FORM FOR VERTICAL EN DASH */
+         || ch == 0xFE33 /* PRESENTATION FORM FOR VERTICAL LOW LINE */
+         || ch == 0xFE34 /* PRESENTATION FORM FOR VERTICAL WAVY LOW LINE */
+         || ch == 0xFE45 /* SESAME DOT */
+         || ch == 0xFE46 /* WHITE SESAME DOT */
+         || ch == 0xFE49 /* DASHED OVERLINE */
+         || ch == 0xFE4A /* CENTRELINE OVERLINE */
+         || ch == 0xFE4B /* WAVY OVERLINE */
+         || ch == 0xFE4C /* DOUBLE WAVY OVERLINE */
+         || ch == 0xFE4D /* DASHED LOW LINE */
+         || ch == 0xFE4E /* CENTRELINE LOW LINE */
+         || ch == 0xFE4F /* WAVY LOW LINE */
+         || ch == 0xFE51 /* SMALL IDEOGRAPHIC COMMA */
+         || ch == 0xFE58 /* SMALL EM DASH */
+         || ch == 0xFE5F /* SMALL NUMBER SIGN */
+         || ch == 0xFE60 /* SMALL AMPERSAND */
+         || ch == 0xFE61 /* SMALL ASTERISK */
+         || ch == 0xFE68 /* SMALL REVERSE SOLIDUS */
+         || ch == 0xFE6B /* SMALL COMMERCIAL AT */
+         || ch == 0xFF02 /* FULLWIDTH QUOTATION MARK */
+         || ch == 0xFF03 /* FULLWIDTH NUMBER SIGN */
+         || ch == 0xFF06 /* FULLWIDTH AMPERSAND */
+         || ch == 0xFF07 /* FULLWIDTH APOSTROPHE */
+         || ch == 0xFF0A /* FULLWIDTH ASTERISK */
+         || ch == 0xFF0B /* FULLWIDTH PLUS SIGN */
+         || ch == 0xFF0D /* FULLWIDTH HYPHEN-MINUS */
+         || ch == 0xFF0F /* FULLWIDTH SOLIDUS */
+         || ch == 0xFF1C /* FULLWIDTH LESS-THAN SIGN */
+         || ch == 0xFF1D /* FULLWIDTH EQUALS SIGN */
+         || ch == 0xFF1E /* FULLWIDTH GREATER-THAN SIGN */
+         || ch == 0xFF20 /* FULLWIDTH COMMERCIAL AT */
+         || ch == 0xFF3C /* FULLWIDTH REVERSE SOLIDUS */
+         || ch == 0xFF3E /* FULLWIDTH CIRCUMFLEX ACCENT */
+         || ch == 0xFF3F /* FULLWIDTH LOW LINE */
+         || ch == 0xFF40 /* FULLWIDTH GRAVE ACCENT */
+         || ch == 0xFF5C /* FULLWIDTH VERTICAL LINE */
+         || ch == 0xFF5E /* FULLWIDTH TILDE */
+         || ch == 0xFFE2 /* FULLWIDTH NOT SIGN */
+         || ch == 0xFFE3 /* FULLWIDTH MACRON */
+         || ch == 0xFFE4 /* FULLWIDTH BROKEN BAR */)
+       if (!(attr & ((1 << LBP_NS) | (1 << LBP_CM))))
+         {
+           /* ambiguous (ideograph) ? */
+           if ((unicode_width[ch] != NULL
+                && unicode_width[ch][0] == 'A'
+                && ch >= 0x2000)
+               || ch == 0x24EA /* CIRCLED DIGIT ZERO */
+               || (ch >= 0x2780 && ch <= 0x2793) /* DINGBAT ... CIRCLED DIGIT ... */)
+             attr |= 1 << LBP_AI;
+           else
+             attr |= 1 << LBP_ID;
+         }
+
+      /* ordinary alphabetic and symbol characters */
+      if ((unicode_attributes[ch].category[0] == 'L'
+          && (unicode_attributes[ch].category[1] == 'u'
+              || unicode_attributes[ch].category[1] == 'l'
+              || unicode_attributes[ch].category[1] == 't'
+              || unicode_attributes[ch].category[1] == 'm'
+              || unicode_attributes[ch].category[1] == 'o'))
+         || (unicode_attributes[ch].category[0] == 'S'
+             && (unicode_attributes[ch].category[1] == 'm'
+                 || unicode_attributes[ch].category[1] == 'k'
+                 || unicode_attributes[ch].category[1] == 'o'))
+         || (unicode_attributes[ch].category[0] == 'N'
+             && (unicode_attributes[ch].category[1] == 'l'
+                 || unicode_attributes[ch].category[1] == 'o'))
+         || (unicode_attributes[ch].category[0] == 'P'
+             && (unicode_attributes[ch].category[1] == 'c'
+                 || unicode_attributes[ch].category[1] == 'd'
+                 || unicode_attributes[ch].category[1] == 'o'))
+         || ch == 0x0600 /* ARABIC NUMBER SIGN */
+         || ch == 0x0601 /* ARABIC SIGN SANAH */
+         || ch == 0x0602 /* ARABIC FOOTNOTE MARKER */
+         || ch == 0x0603 /* ARABIC SIGN SAFHA */
+         || ch == 0x06DD /* ARABIC END OF AYAH */
+         || ch == 0x070F /* SYRIAC ABBREVIATION MARK */
+         || ch == 0x2061 /* FUNCTION APPLICATION */
+         || ch == 0x2062 /* INVISIBLE TIMES */
+         || ch == 0x2063 /* INVISIBLE SEPARATOR */
+         || ch == 0x2064 /* INVISIBLE PLUS */)
+       if (!(attr & ((1 << LBP_GL) | (1 << LBP_B2) | (1 << LBP_BA) | (1 << LBP_BB) | (1 << LBP_HY) | (1 << LBP_CB) | (1 << LBP_CL) | (1 << LBP_EX) | (1 << LBP_IN) | (1 << LBP_NS) | (1 << LBP_OP) | (1 << LBP_QU) | (1 << LBP_IS) | (1 << LBP_NU) | (1 << LBP_PO) | (1 << LBP_PR) | (1 << LBP_SY) | (1 << LBP_H2) | (1 << LBP_H3) | (1 << LBP_JL) | (1 << LBP_JV) | (1 << LBP_JT) | (1 << LBP_SA) | (1 << LBP_ID))))
+         {
+           /* ambiguous (alphabetic) ? */
+           if ((unicode_width[ch] != NULL
+                && unicode_width[ch][0] == 'A'
+                && ch >= 0x2000
+                /* Extra exceptions for compatibility with Unicode LineBreak.txt.  */
+                && ch != 0x2022 /* BULLET */
+                && ch != 0x203E /* OVERLINE */
+                && ch != 0x2126 /* OHM SIGN */
+                && ch != 0x2153 /* VULGAR FRACTION ONE THIRD */
+                && ch != 0x215C /* VULGAR FRACTION THREE EIGHTHS */
+                && ch != 0x215D /* VULGAR FRACTION FIVE EIGHTHS */
+                && ch != 0x21B8 /* NORTH WEST ARROW TO LONG BAR */
+                && ch != 0x21B9 /* LEFTWARDS ARROW TO BAR OVER RIGHTWARDS ARROW TO BAR */
+                && ch != 0x21E7 /* UPWARDS WHITE ARROW */
+                && ch != 0x24FF /* NEGATIVE CIRCLED DIGIT ZERO */
+                && ch != 0x273D /* HEAVY TEARDROP-SPOKED ASTERISK */)
+#if !REVISION_22
+               || ch == 0x00A1 /* INVERTED EXCLAMATION MARK */
+               || ch == 0x00A7 /* SECTION SIGN */
+               || ch == 0x00A8 /* DIAERESIS */
+               || ch == 0x00AA /* FEMININE ORDINAL INDICATOR */
+               || ch == 0x00B2 /* SUPERSCRIPT TWO */
+               || ch == 0x00B3 /* SUPERSCRIPT THREE */
+               || ch == 0x00B6 /* PILCROW SIGN */
+               || ch == 0x00B7 /* MIDDLE DOT */
+               || ch == 0x00B8 /* CEDILLA */
+               || ch == 0x00B9 /* SUPERSCRIPT ONE */
+               || ch == 0x00BA /* MASCULINE ORDINAL INDICATOR */
+               || ch == 0x00BC /* VULGAR FRACTION ONE QUARTER */
+               || ch == 0x00BD /* VULGAR FRACTION ONE HALF */
+               || ch == 0x00BE /* VULGAR FRACTION THREE QUARTERS */
+               || ch == 0x00BF /* INVERTED QUESTION MARK */
+               || ch == 0x00D7 /* MULTIPLICATION SIGN */
+               || ch == 0x00F7 /* DIVISION SIGN */
+               || ch == 0x02C7 /* CARON */
+               || ch == 0x02C9 /* MODIFIER LETTER MACRON */
+               || ch == 0x02CA /* MODIFIER LETTER ACUTE ACCENT */
+               || ch == 0x02CB /* MODIFIER LETTER GRAVE ACCENT */
+               || ch == 0x02CD /* MODIFIER LETTER LOW MACRON */
+               || ch == 0x02D0 /* MODIFIER LETTER TRIANGULAR COLON */
+               || ch == 0x02D8 /* BREVE */
+               || ch == 0x02D9 /* DOT ABOVE */
+               || ch == 0x02DA /* RING ABOVE */
+               || ch == 0x02DB /* OGONEK */
+               || ch == 0x02DD /* DOUBLE ACUTE ACCENT */
+#endif
+               || ch == 0x24EA /* CIRCLED DIGIT ZERO */
+               || (ch >= 0x2780 && ch <= 0x2793) /* DINGBAT ... CIRCLED DIGIT ... */
+               /* Extra characters for compatibility with Unicode LineBreak.txt.  */
+               || ch == 0x2155 /* VULGAR FRACTION ONE FIFTH */
+               || ch == 0x2574 /* BOX DRAWINGS LIGHT LEFT */
+               || ch == 0x2616 /* WHITE SHOGI PIECE */
+               || ch == 0x2617 /* BLACK SHOGI PIECE */)
+             attr |= 1 << LBP_AI;
+           else
+             attr |= 1 << LBP_AL;
+           attr &= ~(1 << LBP_CM);
+         }
+    }
+
+  if (attr == 0)
+    /* unknown */
+    attr |= 1 << LBP_XX;
+
+  return attr;
+}
+
+/* Output the line breaking properties in a human readable format.  */
+static void
+debug_output_lbp (FILE *stream)
+{
+  unsigned int i;
+
+  for (i = 0; i < 0x110000; i++)
+    {
+      int attr = get_lbp (i);
+      if (attr != 1 << LBP_XX)
+       {
+         fprintf (stream, "0x%04X", i);
+#define PRINT_BIT(attr,bit) \
+  if (attr & (1 << bit)) fprintf (stream, " " #bit);
+         PRINT_BIT(attr,LBP_BK);
+         PRINT_BIT(attr,LBP_CM);
+         PRINT_BIT(attr,LBP_WJ);
+         PRINT_BIT(attr,LBP_ZW);
+         PRINT_BIT(attr,LBP_GL);
+         PRINT_BIT(attr,LBP_SP);
+         PRINT_BIT(attr,LBP_B2);
+         PRINT_BIT(attr,LBP_BA);
+         PRINT_BIT(attr,LBP_BB);
+         PRINT_BIT(attr,LBP_HY);
+         PRINT_BIT(attr,LBP_CB);
+         PRINT_BIT(attr,LBP_CL);
+         PRINT_BIT(attr,LBP_EX);
+         PRINT_BIT(attr,LBP_IN);
+         PRINT_BIT(attr,LBP_NS);
+         PRINT_BIT(attr,LBP_OP);
+         PRINT_BIT(attr,LBP_QU);
+         PRINT_BIT(attr,LBP_IS);
+         PRINT_BIT(attr,LBP_NU);
+         PRINT_BIT(attr,LBP_PO);
+         PRINT_BIT(attr,LBP_PR);
+         PRINT_BIT(attr,LBP_SY);
+         PRINT_BIT(attr,LBP_AI);
+         PRINT_BIT(attr,LBP_AL);
+         PRINT_BIT(attr,LBP_H2);
+         PRINT_BIT(attr,LBP_H3);
+         PRINT_BIT(attr,LBP_ID);
+         PRINT_BIT(attr,LBP_JL);
+         PRINT_BIT(attr,LBP_JV);
+         PRINT_BIT(attr,LBP_JT);
+         PRINT_BIT(attr,LBP_SA);
+         PRINT_BIT(attr,LBP_XX);
+#undef PRINT_BIT
+         fprintf (stream, "\n");
+       }
+    }
+}
+
+static void
+debug_output_lbrk_tables (const char *filename)
+{
+  FILE *stream;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  debug_output_lbp (stream);
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+/* The line breaking property from the LineBreak.txt file.  */
+int unicode_org_lbp[0x110000];
+
+/* Stores in unicode_org_lbp[] the line breaking property from the
+   LineBreak.txt file.  */
+static void
+fill_org_lbp (const char *linebreak_filename)
+{
+  unsigned int i, j;
+  FILE *stream;
+  char field0[FIELDLEN];
+  char field1[FIELDLEN];
+  char field2[FIELDLEN];
+  int lineno = 0;
+
+  for (i = 0; i < 0x110000; i++)
+    unicode_org_lbp[i] = LBP_XX;
+
+  stream = fopen (linebreak_filename, "r");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "error during fopen of '%s'\n", linebreak_filename);
+      exit (1);
+    }
+
+  for (;;)
+    {
+      int n;
+      int c;
+      int value;
+
+      lineno++;
+      c = getc (stream);
+      if (c == EOF)
+       break;
+      if (c == '#')
+       {
+         do c = getc (stream); while (c != EOF && c != '\n');
+         continue;
+       }
+      ungetc (c, stream);
+      n = getfield (stream, field0, ';');
+      n += getfield (stream, field1, ' ');
+      n += getfield (stream, field2, '\n');
+      if (n == 0)
+       break;
+      if (n != 3)
+       {
+         fprintf (stderr, "short line in '%s':%d\n", linebreak_filename,
+                  lineno);
+         exit (1);
+       }
+#define TRY(bit) else if (strcmp (field1, #bit + 4) == 0) value = bit;
+      if (false) {}
+      TRY(LBP_BK)
+      TRY(LBP_CM)
+      TRY(LBP_WJ)
+      TRY(LBP_ZW)
+      TRY(LBP_GL)
+      TRY(LBP_SP)
+      TRY(LBP_B2)
+      TRY(LBP_BA)
+      TRY(LBP_BB)
+      TRY(LBP_HY)
+      TRY(LBP_CB)
+      TRY(LBP_CL)
+      TRY(LBP_EX)
+      TRY(LBP_IN)
+      TRY(LBP_NS)
+      TRY(LBP_OP)
+      TRY(LBP_QU)
+      TRY(LBP_IS)
+      TRY(LBP_NU)
+      TRY(LBP_PO)
+      TRY(LBP_PR)
+      TRY(LBP_SY)
+      TRY(LBP_AI)
+      TRY(LBP_AL)
+      TRY(LBP_H2)
+      TRY(LBP_H3)
+      TRY(LBP_ID)
+      TRY(LBP_JL)
+      TRY(LBP_JV)
+      TRY(LBP_JT)
+      TRY(LBP_SA)
+      TRY(LBP_XX)
+#undef TRY
+      else if (strcmp (field1, "LF") == 0) value = LBP_BK;
+      else if (strcmp (field1, "CR") == 0) value = LBP_BK;
+      else if (strcmp (field1, "NL") == 0) value = LBP_BK;
+      else if (strcmp (field1, "SG") == 0) value = LBP_XX;
+      else
+       {
+         fprintf (stderr, "unknown property value \"%s\" in '%s':%d\n",
+                  field1, linebreak_filename, lineno);
+         exit (1);
+       }
+      i = strtoul (field0, NULL, 16);
+      if (strstr (field0, "..") != NULL)
+       {
+         /* Deal with a range.  */
+         j = strtoul (strstr (field0, "..") + 2, NULL, 16);
+         for (; i <= j; i++)
+           unicode_org_lbp[i] = value;
+       }
+      else
+       {
+         /* Single character line.  */
+         unicode_org_lbp[i] = value;
+       }
+    }
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error reading from '%s'\n", linebreak_filename);
+      exit (1);
+    }
+}
+
+/* Output the line breaking properties in a human readable format.  */
+static void
+debug_output_org_lbp (FILE *stream)
+{
+  unsigned int i;
+
+  for (i = 0; i < 0x110000; i++)
+    {
+      int attr = unicode_org_lbp[i];
+      if (attr != LBP_XX)
+       {
+         fprintf (stream, "0x%04X", i);
+#define PRINT_BIT(attr,bit) \
+  if (attr == bit) fprintf (stream, " " #bit);
+         PRINT_BIT(attr,LBP_BK);
+         PRINT_BIT(attr,LBP_CM);
+         PRINT_BIT(attr,LBP_WJ);
+         PRINT_BIT(attr,LBP_ZW);
+         PRINT_BIT(attr,LBP_GL);
+         PRINT_BIT(attr,LBP_SP);
+         PRINT_BIT(attr,LBP_B2);
+         PRINT_BIT(attr,LBP_BA);
+         PRINT_BIT(attr,LBP_BB);
+         PRINT_BIT(attr,LBP_HY);
+         PRINT_BIT(attr,LBP_CB);
+         PRINT_BIT(attr,LBP_CL);
+         PRINT_BIT(attr,LBP_EX);
+         PRINT_BIT(attr,LBP_IN);
+         PRINT_BIT(attr,LBP_NS);
+         PRINT_BIT(attr,LBP_OP);
+         PRINT_BIT(attr,LBP_QU);
+         PRINT_BIT(attr,LBP_IS);
+         PRINT_BIT(attr,LBP_NU);
+         PRINT_BIT(attr,LBP_PO);
+         PRINT_BIT(attr,LBP_PR);
+         PRINT_BIT(attr,LBP_SY);
+         PRINT_BIT(attr,LBP_AI);
+         PRINT_BIT(attr,LBP_AL);
+         PRINT_BIT(attr,LBP_H2);
+         PRINT_BIT(attr,LBP_H3);
+         PRINT_BIT(attr,LBP_ID);
+         PRINT_BIT(attr,LBP_JL);
+         PRINT_BIT(attr,LBP_JV);
+         PRINT_BIT(attr,LBP_JT);
+         PRINT_BIT(attr,LBP_SA);
+         PRINT_BIT(attr,LBP_XX);
+#undef PRINT_BIT
+         fprintf (stream, "\n");
+       }
+    }
+}
+
+static void
+debug_output_org_lbrk_tables (const char *filename)
+{
+  FILE *stream;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  debug_output_org_lbp (stream);
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+/* Construction of sparse 3-level tables.  */
+#define TABLE lbp_table
+#define ELEMENT unsigned char
+#define DEFAULT LBP_XX
+#define xmalloc malloc
+#define xrealloc realloc
+#include "3level.h"
+
+static void
+output_lbp (FILE *stream1, FILE *stream2)
+{
+  unsigned int i;
+  struct lbp_table t;
+  unsigned int level1_offset, level2_offset, level3_offset;
+
+  t.p = 7;
+  t.q = 9;
+  lbp_table_init (&t);
+
+  for (i = 0; i < 0x110000; i++)
+    {
+      int attr = get_lbp (i);
+
+      /* Now attr should contain exactly one bit.  */
+      if (attr == 0 || ((attr & (attr - 1)) != 0))
+       abort ();
+
+      if (attr != 1 << LBP_XX)
+       {
+         unsigned int log2_attr;
+         for (log2_attr = 0; attr > 1; attr >>= 1, log2_attr++);
+
+         lbp_table_add (&t, i, log2_attr);
+       }
+    }
+
+  lbp_table_finalize (&t);
+
+  level1_offset =
+    5 * sizeof (uint32_t);
+  level2_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t);
+  level3_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t)
+    + (t.level2_size << t.q) * sizeof (uint32_t);
+
+  for (i = 0; i < 5; i++)
+    fprintf (stream1, "#define lbrkprop_header_%d %d\n", i,
+            ((uint32_t *) t.result)[i]);
+  fprintf (stream1, "\n");
+  fprintf (stream1, "typedef struct\n");
+  fprintf (stream1, "  {\n");
+  fprintf (stream1, "    int level1[%zu];\n", t.level1_size);
+  fprintf (stream1, "    int level2[%zu << %d];\n", t.level2_size, t.q);
+  fprintf (stream1, "    unsigned char level3[%zu << %d];\n", t.level3_size, t.p);
+  fprintf (stream1, "  }\n");
+  fprintf (stream1, "lbrkprop_t;\n");
+  fprintf (stream1, "extern const lbrkprop_t unilbrkprop;\n");
+
+  fprintf (stream2, "const lbrkprop_t unilbrkprop =\n");
+  fprintf (stream2, "{\n");
+  fprintf (stream2, "  {");
+  if (t.level1_size > 8)
+    fprintf (stream2, "\n   ");
+  for (i = 0; i < t.level1_size; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream2, "\n   ");
+      offset = ((uint32_t *) (t.result + level1_offset))[i];
+      fprintf (stream2, " %5zd%s",
+              offset == 0 ? -1 : (offset - level2_offset) / sizeof (uint32_t),
+              (i+1 < t.level1_size ? "," : ""));
+    }
+  if (t.level1_size > 8)
+    fprintf (stream2, "\n ");
+  fprintf (stream2, " },\n");
+  fprintf (stream2, "  {");
+  if (t.level2_size << t.q > 8)
+    fprintf (stream2, "\n   ");
+  for (i = 0; i < t.level2_size << t.q; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream2, "\n   ");
+      offset = ((uint32_t *) (t.result + level2_offset))[i];
+      fprintf (stream2, " %5zd%s",
+              offset == 0 ? -1 : (offset - level3_offset) / sizeof (uint8_t),
+              (i+1 < t.level2_size << t.q ? "," : ""));
+    }
+  if (t.level2_size << t.q > 8)
+    fprintf (stream2, "\n ");
+  fprintf (stream2, " },\n");
+  fprintf (stream2, "  {");
+  if (t.level3_size << t.p > 8)
+    fprintf (stream2, "\n   ");
+  for (i = 0; i < t.level3_size << t.p; i++)
+    {
+      unsigned char value = ((unsigned char *) (t.result + level3_offset))[i];
+      const char *value_string;
+      switch (value)
+       {
+#define CASE(x) case x: value_string = #x; break;
+         CASE(LBP_BK);
+         CASE(LBP_CM);
+         CASE(LBP_WJ);
+         CASE(LBP_ZW);
+         CASE(LBP_GL);
+         CASE(LBP_SP);
+         CASE(LBP_B2);
+         CASE(LBP_BA);
+         CASE(LBP_BB);
+         CASE(LBP_HY);
+         CASE(LBP_CB);
+         CASE(LBP_CL);
+         CASE(LBP_EX);
+         CASE(LBP_IN);
+         CASE(LBP_NS);
+         CASE(LBP_OP);
+         CASE(LBP_QU);
+         CASE(LBP_IS);
+         CASE(LBP_NU);
+         CASE(LBP_PO);
+         CASE(LBP_PR);
+         CASE(LBP_SY);
+         CASE(LBP_AI);
+         CASE(LBP_AL);
+         CASE(LBP_H2);
+         CASE(LBP_H3);
+         CASE(LBP_ID);
+         CASE(LBP_JL);
+         CASE(LBP_JV);
+         CASE(LBP_JT);
+         CASE(LBP_SA);
+         CASE(LBP_XX);
+#undef CASE
+         default:
+           abort ();
+       }
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream2, "\n   ");
+      fprintf (stream2, " %s%s", value_string,
+              (i+1 < t.level3_size << t.p ? "," : ""));
+    }
+  if (t.level3_size << t.p > 8)
+    fprintf (stream2, "\n ");
+  fprintf (stream2, " }\n");
+  fprintf (stream2, "};\n");
+}
+
+static void
+output_lbrk_tables (const char *filename1, const char *filename2, const char *version)
+{
+  const char *filenames[2];
+  FILE *streams[2];
+  size_t i;
+
+  filenames[0] = filename1;
+  filenames[1] = filename2;
+
+  for (i = 0; i < 2; i++)
+    {
+      streams[i] = fopen (filenames[i], "w");
+      if (streams[i] == NULL)
+       {
+         fprintf (stderr, "cannot open '%s' for writing\n", filenames[i]);
+         exit (1);
+       }
+    }
+
+  for (i = 0; i < 2; i++)
+    {
+      FILE *stream = streams[i];
+
+      fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
+      fprintf (stream, "/* Line breaking properties of Unicode characters.  */\n");
+      fprintf (stream, "/* Generated automatically by gen-lbrk for Unicode %s.  */\n",
+              version);
+      fprintf (stream, "\n");
+
+      /* Put a GPL header on it.  The gnulib module is under LGPL (although it
+        still carries the GPL header), and it's gnulib-tool which replaces the
+        GPL header with an LGPL header.  */
+      fprintf (stream, "/* Copyright (C) 2000-2002, 2004, 2008 Free Software Foundation, Inc.\n");
+      fprintf (stream, "\n");
+      fprintf (stream, "   This program is free software: you can redistribute it and/or modify\n");
+      fprintf (stream, "   it under the terms of the GNU General Public License as published by\n");
+      fprintf (stream, "   the Free Software Foundation; either version 3 of the License, or\n");
+      fprintf (stream, "   (at your option) any later version.\n");
+      fprintf (stream, "\n");
+      fprintf (stream, "   This program is distributed in the hope that it will be useful,\n");
+      fprintf (stream, "   but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
+      fprintf (stream, "   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n");
+      fprintf (stream, "   GNU General Public License for more details.\n");
+      fprintf (stream, "\n");
+      fprintf (stream, "   You should have received a copy of the GNU General Public License\n");
+      fprintf (stream, "   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */\n");
+      fprintf (stream, "\n");
+    }
+
+  output_lbp (streams[0], streams[1]);
+
+  for (i = 0; i < 2; i++)
+    {
+      if (ferror (streams[i]) || fclose (streams[i]))
+       {
+         fprintf (stderr, "error writing to '%s'\n", filenames[i]);
+         exit (1);
+       }
+    }
+}
+
+/* ========================================================================= */
+
+int
+main (int argc, char * argv[])
+{
+  const char *unicodedata_filename;
+  const char *proplist_filename;
+  const char *derivedproplist_filename;
+  const char *scripts_filename;
+  const char *blocks_filename;
+  const char *proplist30_filename;
+  const char *eastasianwidth_filename;
+  const char *linebreak_filename;
+  const char *version;
+
+  if (argc != 10)
+    {
+      fprintf (stderr, "Usage: %s UnicodeData.txt PropList.txt DerivedCoreProperties.txt Scripts.txt Blocks.txt PropList-3.0.1.txt EastAsianWidth.txt LineBreak.txt version\n",
+              argv[0]);
+      exit (1);
+    }
+
+  unicodedata_filename = argv[1];
+  proplist_filename = argv[2];
+  derivedproplist_filename = argv[3];
+  scripts_filename = argv[4];
+  blocks_filename = argv[5];
+  proplist30_filename = argv[6];
+  eastasianwidth_filename = argv[7];
+  linebreak_filename = argv[8];
+  version = argv[9];
+
+  fill_attributes (unicodedata_filename);
+  clear_properties ();
+  fill_properties (proplist_filename);
+  fill_properties (derivedproplist_filename);
+  fill_properties30 (proplist30_filename);
+  fill_scripts (scripts_filename);
+  fill_blocks (blocks_filename);
+  fill_width (eastasianwidth_filename);
+  fill_org_lbp (linebreak_filename);
+
+  output_categories (version);
+  output_category ("unictype/categ_of.h", version);
+  output_combclass ("unictype/combining.h", version);
+  output_bidi_category ("unictype/bidi_of.h", version);
+  output_decimal_digit_test ("../tests/unictype/test-decdigit.h", version);
+  output_decimal_digit ("unictype/decdigit.h", version);
+  output_digit_test ("../tests/unictype/test-digit.h", version);
+  output_digit ("unictype/digit.h", version);
+  output_numeric_test ("../tests/unictype/test-numeric.h", version);
+  output_numeric ("unictype/numeric.h", version);
+  output_mirror ("unictype/mirror.h", version);
+  output_properties (version);
+  output_scripts (version);
+  output_scripts_byname (version);
+  output_blocks (version);
+  output_ident_properties (version);
+  output_old_ctype (version);
+
+  debug_output_lbrk_tables ("unilbrk/lbrkprop.txt");
+  debug_output_org_lbrk_tables ("unilbrk/lbrkprop_org.txt");
+  output_lbrk_tables ("unilbrk/lbrkprop1.h", "unilbrk/lbrkprop2.h", version);
+
+  return 0;
+}
+
+/*
+ * For Emacs M-x compile
+ * Local Variables:
+ * compile-command: "
+   gcc -O -Wall gen-uni-tables.c -Iunictype -o gen-uni-tables && \
+   ./gen-uni-tables \
+        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/UnicodeData.txt \
+        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/PropList.txt \
+        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/DerivedCoreProperties.txt \
+        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/Scripts.txt \
+        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/Blocks.txt \
+        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/3.0.1/PropList-3.0.1.txt \
+        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/EastAsianWidth.txt \
+        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/LineBreak.txt \
+        5.0.0
+   "
+ * End:
+ */
diff --git a/lib/unictype/gen-ctype.c b/lib/unictype/gen-ctype.c

deleted file mode 100644 (file)

index c1c43c7..0000000
--- a/lib/unictype/gen-ctype.c
+++ /dev/null
@@ -1,5134 +0,0 @@
-/* Generate Unicode conforming character classification tables from a
-   UnicodeData file.
-   Copyright (C) 2000-2002, 2007-2009 Free Software Foundation, Inc.
-   Written by Bruno Haible <bruno@clisp.org>, 2000-2002.
-
-   This program is free software: you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3 of the License, or
-   (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
-
-/* Usage example:
-     $ gen-ctype /usr/local/share/Unidata/UnicodeData.txt \
-                 /usr/local/share/Unidata/PropList.txt \
-                 /usr/local/share/Unidata/DerivedCoreProperties.txt \
-                 /usr/local/share/Unidata/Scripts.txt \
-                 /usr/local/share/Unidata/Blocks.txt \
-                 /usr/local/share/Unidata/PropList-3.0.1.txt \
-                 5.0.0
- */
-
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-
-/* ========================================================================= */
-
-/* Reading UnicodeData.txt.  */
-/* See UCD.html.  */
-
-/* This structure represents one line in the UnicodeData.txt file.  */
-struct unicode_attribute
-{
-  const char *name;           /* Character name */
-  const char *category;       /* General category */
-  const char *combining;      /* Canonical combining class */
-  const char *bidi;           /* Bidirectional category */
-  const char *decomposition;  /* Character decomposition mapping */
-  const char *decdigit;       /* Decimal digit value */
-  const char *digit;          /* Digit value */
-  const char *numeric;        /* Numeric value */
-  bool mirrored;              /* mirrored */
-  const char *oldname;        /* Old Unicode 1.0 name */
-  const char *comment;        /* Comment */
-  unsigned int upper;         /* Uppercase mapping */
-  unsigned int lower;         /* Lowercase mapping */
-  unsigned int title;         /* Titlecase mapping */
-};
-
-/* Missing fields are represented with "" for strings, and NONE for
-   characters.  */
-#define NONE (~(unsigned int)0)
-
-/* The entire contents of the UnicodeData.txt file.  */
-struct unicode_attribute unicode_attributes [0x110000];
-
-/* Stores in unicode_attributes[i] the values from the given fields.  */
-static void
-fill_attribute (unsigned int i,
-               const char *field1, const char *field2,
-               const char *field3, const char *field4,
-               const char *field5, const char *field6,
-               const char *field7, const char *field8,
-               const char *field9, const char *field10,
-               const char *field11, const char *field12,
-               const char *field13, const char *field14)
-{
-  struct unicode_attribute * uni;
-
-  if (i >= 0x110000)
-    {
-      fprintf (stderr, "index too large\n");
-      exit (1);
-    }
-  if (strcmp (field2, "Cs") == 0)
-    /* Surrogates are UTF-16 artefacts, not real characters. Ignore them.  */
-    return;
-  uni = &unicode_attributes[i];
-  /* Copy the strings.  */
-  uni->name          = strdup (field1);
-  uni->category      = (field2[0] == '\0' ? "" : strdup (field2));
-  uni->combining     = (field3[0] == '\0' ? "" : strdup (field3));
-  uni->bidi          = (field4[0] == '\0' ? "" : strdup (field4));
-  uni->decomposition = (field5[0] == '\0' ? "" : strdup (field5));
-  uni->decdigit      = (field6[0] == '\0' ? "" : strdup (field6));
-  uni->digit         = (field7[0] == '\0' ? "" : strdup (field7));
-  uni->numeric       = (field8[0] == '\0' ? "" : strdup (field8));
-  uni->mirrored      = (field9[0] == 'Y');
-  uni->oldname       = (field10[0] == '\0' ? "" : strdup (field10));
-  uni->comment       = (field11[0] == '\0' ? "" : strdup (field11));
-  uni->upper = (field12[0] =='\0' ? NONE : strtoul (field12, NULL, 16));
-  uni->lower = (field13[0] =='\0' ? NONE : strtoul (field13, NULL, 16));
-  uni->title = (field14[0] =='\0' ? NONE : strtoul (field14, NULL, 16));
-}
-
-/* Maximum length of a field in the UnicodeData.txt file.  */
-#define FIELDLEN 120
-
-/* Reads the next field from STREAM.  The buffer BUFFER has size FIELDLEN.
-   Reads up to (but excluding) DELIM.
-   Returns 1 when a field was successfully read, otherwise 0.  */
-static int
-getfield (FILE *stream, char *buffer, int delim)
-{
-  int count = 0;
-  int c;
-
-  for (; (c = getc (stream)), (c != EOF && c != delim); )
-    {
-      /* The original unicode.org UnicodeData.txt file happens to have
-        CR/LF line terminators.  Silently convert to LF.  */
-      if (c == '\r')
-       continue;
-
-      /* Put c into the buffer.  */
-      if (++count >= FIELDLEN - 1)
-       {
-         fprintf (stderr, "field longer than expected, increase FIELDLEN\n");
-         exit (1);
-       }
-      *buffer++ = c;
-    }
-
-  if (c == EOF)
-    return 0;
-
-  *buffer = '\0';
-  return 1;
-}
-
-/* Stores in unicode_attributes[] the entire contents of the UnicodeData.txt
-   file.  */
-static void
-fill_attributes (const char *unicodedata_filename)
-{
-  unsigned int i, j;
-  FILE *stream;
-  char field0[FIELDLEN];
-  char field1[FIELDLEN];
-  char field2[FIELDLEN];
-  char field3[FIELDLEN];
-  char field4[FIELDLEN];
-  char field5[FIELDLEN];
-  char field6[FIELDLEN];
-  char field7[FIELDLEN];
-  char field8[FIELDLEN];
-  char field9[FIELDLEN];
-  char field10[FIELDLEN];
-  char field11[FIELDLEN];
-  char field12[FIELDLEN];
-  char field13[FIELDLEN];
-  char field14[FIELDLEN];
-  int lineno = 0;
-
-  for (i = 0; i < 0x110000; i++)
-    unicode_attributes[i].name = NULL;
-
-  stream = fopen (unicodedata_filename, "r");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "error during fopen of '%s'\n", unicodedata_filename);
-      exit (1);
-    }
-
-  for (;;)
-    {
-      int n;
-
-      lineno++;
-      n = getfield (stream, field0, ';');
-      n += getfield (stream, field1, ';');
-      n += getfield (stream, field2, ';');
-      n += getfield (stream, field3, ';');
-      n += getfield (stream, field4, ';');
-      n += getfield (stream, field5, ';');
-      n += getfield (stream, field6, ';');
-      n += getfield (stream, field7, ';');
-      n += getfield (stream, field8, ';');
-      n += getfield (stream, field9, ';');
-      n += getfield (stream, field10, ';');
-      n += getfield (stream, field11, ';');
-      n += getfield (stream, field12, ';');
-      n += getfield (stream, field13, ';');
-      n += getfield (stream, field14, '\n');
-      if (n == 0)
-       break;
-      if (n != 15)
-       {
-         fprintf (stderr, "short line in '%s':%d\n",
-                  unicodedata_filename, lineno);
-         exit (1);
-       }
-      i = strtoul (field0, NULL, 16);
-      if (field1[0] == '<'
-         && strlen (field1) >= 9
-         && strcmp (field1 + strlen(field1) - 8, ", First>") == 0)
-       {
-         /* Deal with a range. */
-         lineno++;
-         n = getfield (stream, field0, ';');
-         n += getfield (stream, field1, ';');
-         n += getfield (stream, field2, ';');
-         n += getfield (stream, field3, ';');
-         n += getfield (stream, field4, ';');
-         n += getfield (stream, field5, ';');
-         n += getfield (stream, field6, ';');
-         n += getfield (stream, field7, ';');
-         n += getfield (stream, field8, ';');
-         n += getfield (stream, field9, ';');
-         n += getfield (stream, field10, ';');
-         n += getfield (stream, field11, ';');
-         n += getfield (stream, field12, ';');
-         n += getfield (stream, field13, ';');
-         n += getfield (stream, field14, '\n');
-         if (n != 15)
-           {
-             fprintf (stderr, "missing end range in '%s':%d\n",
-                      unicodedata_filename, lineno);
-             exit (1);
-           }
-         if (!(field1[0] == '<'
-               && strlen (field1) >= 8
-               && strcmp (field1 + strlen (field1) - 7, ", Last>") == 0))
-           {
-             fprintf (stderr, "missing end range in '%s':%d\n",
-                      unicodedata_filename, lineno);
-             exit (1);
-           }
-         field1[strlen (field1) - 7] = '\0';
-         j = strtoul (field0, NULL, 16);
-         for (; i <= j; i++)
-           fill_attribute (i, field1+1, field2, field3, field4, field5,
-                              field6, field7, field8, field9, field10,
-                              field11, field12, field13, field14);
-       }
-      else
-       {
-         /* Single character line */
-         fill_attribute (i, field1, field2, field3, field4, field5,
-                            field6, field7, field8, field9, field10,
-                            field11, field12, field13, field14);
-       }
-    }
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error reading from '%s'\n", unicodedata_filename);
-      exit (1);
-    }
-}
-
-/* ========================================================================= */
-
-/* General category.  */
-/* See Unicode 3.0 book, section 4.5,
-       UCD.html.  */
-
-static bool
-is_category_L (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'L');
-}
-
-static bool
-is_category_Lu (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'L'
-         && unicode_attributes[ch].category[1] == 'u');
-}
-
-static bool
-is_category_Ll (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'L'
-         && unicode_attributes[ch].category[1] == 'l');
-}
-
-static bool
-is_category_Lt (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'L'
-         && unicode_attributes[ch].category[1] == 't');
-}
-
-static bool
-is_category_Lm (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'L'
-         && unicode_attributes[ch].category[1] == 'm');
-}
-
-static bool
-is_category_Lo (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'L'
-         && unicode_attributes[ch].category[1] == 'o');
-}
-
-static bool
-is_category_M (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'M');
-}
-
-static bool
-is_category_Mn (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'M'
-         && unicode_attributes[ch].category[1] == 'n');
-}
-
-static bool
-is_category_Mc (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'M'
-         && unicode_attributes[ch].category[1] == 'c');
-}
-
-static bool
-is_category_Me (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'M'
-         && unicode_attributes[ch].category[1] == 'e');
-}
-
-static bool
-is_category_N (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'N');
-}
-
-static bool
-is_category_Nd (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'N'
-         && unicode_attributes[ch].category[1] == 'd');
-}
-
-static bool
-is_category_Nl (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'N'
-         && unicode_attributes[ch].category[1] == 'l');
-}
-
-static bool
-is_category_No (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'N'
-         && unicode_attributes[ch].category[1] == 'o');
-}
-
-static bool
-is_category_P (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'P');
-}
-
-static bool
-is_category_Pc (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'P'
-         && unicode_attributes[ch].category[1] == 'c');
-}
-
-static bool
-is_category_Pd (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'P'
-         && unicode_attributes[ch].category[1] == 'd');
-}
-
-static bool
-is_category_Ps (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'P'
-         && unicode_attributes[ch].category[1] == 's');
-}
-
-static bool
-is_category_Pe (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'P'
-         && unicode_attributes[ch].category[1] == 'e');
-}
-
-static bool
-is_category_Pi (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'P'
-         && unicode_attributes[ch].category[1] == 'i');
-}
-
-static bool
-is_category_Pf (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'P'
-         && unicode_attributes[ch].category[1] == 'f');
-}
-
-static bool
-is_category_Po (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'P'
-         && unicode_attributes[ch].category[1] == 'o');
-}
-
-static bool
-is_category_S (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'S');
-}
-
-static bool
-is_category_Sm (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'S'
-         && unicode_attributes[ch].category[1] == 'm');
-}
-
-static bool
-is_category_Sc (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'S'
-         && unicode_attributes[ch].category[1] == 'c');
-}
-
-static bool
-is_category_Sk (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'S'
-         && unicode_attributes[ch].category[1] == 'k');
-}
-
-static bool
-is_category_So (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'S'
-         && unicode_attributes[ch].category[1] == 'o');
-}
-
-static bool
-is_category_Z (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'Z');
-}
-
-static bool
-is_category_Zs (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'Z'
-         && unicode_attributes[ch].category[1] == 's');
-}
-
-static bool
-is_category_Zl (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'Z'
-         && unicode_attributes[ch].category[1] == 'l');
-}
-
-static bool
-is_category_Zp (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'Z'
-         && unicode_attributes[ch].category[1] == 'p');
-}
-
-static bool
-is_category_C (unsigned int ch)
-{
-  return (unicode_attributes[ch].name == NULL
-         || unicode_attributes[ch].category[0] == 'C');
-}
-
-static bool
-is_category_Cc (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'C'
-         && unicode_attributes[ch].category[1] == 'c');
-}
-
-static bool
-is_category_Cf (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'C'
-         && unicode_attributes[ch].category[1] == 'f');
-}
-
-static bool
-is_category_Cs (unsigned int ch)
-{
-  return (ch >= 0xd800 && ch < 0xe000);
-}
-
-static bool
-is_category_Co (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'C'
-         && unicode_attributes[ch].category[1] == 'o');
-}
-
-static bool
-is_category_Cn (unsigned int ch)
-{
-  return (unicode_attributes[ch].name == NULL
-         && !(ch >= 0xd800 && ch < 0xe000));
-}
-
-/* Output a boolean property in a human readable format.  */
-static void
-debug_output_predicate (const char *filename, bool (*predicate) (unsigned int))
-{
-  FILE *stream;
-  unsigned int ch;
-
-  stream = fopen (filename, "w");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "cannot open '%s' for writing\n", filename);
-      exit (1);
-    }
-
-#if 0 /* This yields huge text output.  */
-  for (ch = 0; ch < 0x110000; ch++)
-    if (predicate (ch))
-      {
-       fprintf (stream, "0x%04X\n", ch);
-      }
-#else
-  for (ch = 0; ch < 0x110000; ch++)
-    if (predicate (ch))
-      {
-       unsigned int first = ch;
-       unsigned int last;
-
-       while (ch + 1 < 0x110000 && predicate (ch + 1))
-         ch++;
-       last = ch;
-       if (first < last)
-         fprintf (stream, "0x%04X..0x%04X\n", first, last);
-       else
-         fprintf (stream, "0x%04X\n", ch);
-      }
-#endif
-
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error writing to '%s'\n", filename);
-      exit (1);
-    }
-}
-
-/* Output the unit test for a boolean property.  */
-static void
-output_predicate_test (const char *filename, bool (*predicate) (unsigned int), const char *expression)
-{
-  FILE *stream;
-  bool need_comma;
-  unsigned int ch;
-
-  stream = fopen (filename, "w");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "cannot open '%s' for writing\n", filename);
-      exit (1);
-    }
-
-  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
-  fprintf (stream, "/* Test the Unicode character type functions.\n");
-  fprintf (stream, "   Copyright (C) 2007 Free Software Foundation, Inc.\n");
-  fprintf (stream, "\n");
-  fprintf (stream, "   This program is free software: you can redistribute it and/or modify\n");
-  fprintf (stream, "   it under the terms of the GNU General Public License as published by\n");
-  fprintf (stream, "   the Free Software Foundation; either version 3 of the License, or\n");
-  fprintf (stream, "   (at your option) any later version.\n");
-  fprintf (stream, "\n");
-  fprintf (stream, "   This program is distributed in the hope that it will be useful,\n");
-  fprintf (stream, "   but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
-  fprintf (stream, "   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n");
-  fprintf (stream, "   GNU General Public License for more details.\n");
-  fprintf (stream, "\n");
-  fprintf (stream, "   You should have received a copy of the GNU General Public License\n");
-  fprintf (stream, "   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */\n");
-  fprintf (stream, "\n");
-  fprintf (stream, "#include \"test-predicate-part1.h\"\n");
-  fprintf (stream, "\n");
-
-  need_comma = false;
-  for (ch = 0; ch < 0x110000; ch++)
-    if (predicate (ch))
-      {
-       unsigned int first = ch;
-       unsigned int last;
-
-       while (ch + 1 < 0x110000 && predicate (ch + 1))
-         ch++;
-       last = ch;
-       if (need_comma)
-         fprintf (stream, ",\n");
-       fprintf (stream, "    { 0x%04X, 0x%04X }", first, last);
-       need_comma = true;
-      }
-  if (need_comma)
-    fprintf (stream, "\n");
-
-  fprintf (stream, "\n");
-  fprintf (stream, "#define PREDICATE(c) %s\n", expression);
-  fprintf (stream, "#include \"test-predicate-part2.h\"\n");
-
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error writing to '%s'\n", filename);
-      exit (1);
-    }
-}
-
-/* Construction of sparse 3-level tables.  */
-#define TABLE predicate_table
-#define xmalloc malloc
-#define xrealloc realloc
-#include "3levelbit.h"
-
-/* Output a boolean property in a three-level bitmap.  */
-static void
-output_predicate (const char *filename, bool (*predicate) (unsigned int), const char *name, const char *comment, const char *version)
-{
-  FILE *stream;
-  unsigned int ch, i;
-  struct predicate_table t;
-  unsigned int level1_offset, level2_offset, level3_offset;
-
-  stream = fopen (filename, "w");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "cannot open '%s' for writing\n", filename);
-      exit (1);
-    }
-
-  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
-  fprintf (stream, "/* %s of Unicode characters.  */\n", comment);
-  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
-          version);
-
-  t.p = 4; /* or: 5 */
-  t.q = 7; /* or: 6 */
-  predicate_table_init (&t);
-
-  for (ch = 0; ch < 0x110000; ch++)
-    if (predicate (ch))
-      predicate_table_add (&t, ch);
-
-  predicate_table_finalize (&t);
-
-  /* Offsets in t.result, in memory of this process.  */
-  level1_offset =
-    5 * sizeof (uint32_t);
-  level2_offset =
-    5 * sizeof (uint32_t)
-    + t.level1_size * sizeof (uint32_t);
-  level3_offset =
-    5 * sizeof (uint32_t)
-    + t.level1_size * sizeof (uint32_t)
-    + (t.level2_size << t.q) * sizeof (uint32_t);
-
-  for (i = 0; i < 5; i++)
-    if (i != 1)
-      fprintf (stream, "#define header_%d %d\n", i,
-              ((uint32_t *) t.result)[i]);
-
-  fprintf (stream, "static const\n");
-  fprintf (stream, "struct\n");
-  fprintf (stream, "  {\n");
-  fprintf (stream, "    int header[1];\n");
-  fprintf (stream, "    int level1[%zu];\n", t.level1_size);
-  fprintf (stream, "    short level2[%zu << %d];\n", t.level2_size, t.q);
-  fprintf (stream, "    /*unsigned*/ int level3[%zu << %d];\n", t.level3_size, t.p);
-  fprintf (stream, "  }\n");
-  fprintf (stream, "%s =\n", name);
-  fprintf (stream, "{\n");
-  fprintf (stream, "  { %d },\n", ((uint32_t *) t.result)[1]);
-  fprintf (stream, "  {");
-  if (t.level1_size > 1)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < t.level1_size; i++)
-    {
-      uint32_t offset;
-      if (i > 0 && (i % 1) == 0)
-       fprintf (stream, "\n   ");
-      offset = ((uint32_t *) (t.result + level1_offset))[i];
-      if (offset == 0)
-       fprintf (stream, " %5d", -1);
-      else
-       fprintf (stream, " %5zd * sizeof (int) / sizeof (short) + %5zd",
-                1 + t.level1_size, (offset - level2_offset) / sizeof (uint32_t));
-      if (i+1 < t.level1_size)
-        fprintf (stream, ",");
-    }
-  if (t.level1_size > 1)
-    fprintf (stream, "\n ");
-  fprintf (stream, " },\n");
-  fprintf (stream, "  {");
-  if (t.level2_size << t.q > 1)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < t.level2_size << t.q; i++)
-    {
-      uint32_t offset;
-      if (i > 0 && (i % 1) == 0)
-       fprintf (stream, "\n   ");
-      offset = ((uint32_t *) (t.result + level2_offset))[i];
-      if (offset == 0)
-       fprintf (stream, " %5d", -1);
-      else
-       fprintf (stream, " %5zd + %5zd * sizeof (short) / sizeof (int) + %5zd",
-                1 + t.level1_size, t.level2_size << t.q, (offset - level3_offset) / sizeof (uint32_t));
-      if (i+1 < t.level2_size << t.q)
-       fprintf (stream, ",");
-    }
-  if (t.level2_size << t.q > 1)
-    fprintf (stream, "\n ");
-  fprintf (stream, " },\n");
-  fprintf (stream, "  {");
-  if (t.level3_size << t.p > 4)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < t.level3_size << t.p; i++)
-    {
-      if (i > 0 && (i % 4) == 0)
-       fprintf (stream, "\n   ");
-      fprintf (stream, " 0x%08X",
-              ((uint32_t *) (t.result + level3_offset))[i]);
-      if (i+1 < t.level3_size << t.p)
-       fprintf (stream, ",");
-    }
-  if (t.level3_size << t.p > 4)
-    fprintf (stream, "\n ");
-  fprintf (stream, " }\n");
-  fprintf (stream, "};\n");
-
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error writing to '%s'\n", filename);
-      exit (1);
-    }
-}
-
-/* Output all categories.  */
-static void
-output_categories (const char *version)
-{
-#define CATEGORY(C) \
-  debug_output_predicate ("categ_" #C ".txt", is_category_ ## C); \
-  output_predicate_test ("test-categ_" #C ".c", is_category_ ## C, "uc_is_general_category (c, UC_CATEGORY_" #C ")"); \
-  output_predicate ("categ_" #C ".h", is_category_ ## C, "u_categ_" #C, "Categories", version);
-  CATEGORY (L)
-  CATEGORY (Lu)
-  CATEGORY (Ll)
-  CATEGORY (Lt)
-  CATEGORY (Lm)
-  CATEGORY (Lo)
-  CATEGORY (M)
-  CATEGORY (Mn)
-  CATEGORY (Mc)
-  CATEGORY (Me)
-  CATEGORY (N)
-  CATEGORY (Nd)
-  CATEGORY (Nl)
-  CATEGORY (No)
-  CATEGORY (P)
-  CATEGORY (Pc)
-  CATEGORY (Pd)
-  CATEGORY (Ps)
-  CATEGORY (Pe)
-  CATEGORY (Pi)
-  CATEGORY (Pf)
-  CATEGORY (Po)
-  CATEGORY (S)
-  CATEGORY (Sm)
-  CATEGORY (Sc)
-  CATEGORY (Sk)
-  CATEGORY (So)
-  CATEGORY (Z)
-  CATEGORY (Zs)
-  CATEGORY (Zl)
-  CATEGORY (Zp)
-  CATEGORY (C)
-  CATEGORY (Cc)
-  CATEGORY (Cf)
-  CATEGORY (Cs)
-  CATEGORY (Co)
-  CATEGORY (Cn)
-#undef CATEGORY
-}
-
-enum
-{
-  UC_CATEGORY_MASK_L  = 0x0000001f,
-  UC_CATEGORY_MASK_Lu = 0x00000001,
-  UC_CATEGORY_MASK_Ll = 0x00000002,
-  UC_CATEGORY_MASK_Lt = 0x00000004,
-  UC_CATEGORY_MASK_Lm = 0x00000008,
-  UC_CATEGORY_MASK_Lo = 0x00000010,
-  UC_CATEGORY_MASK_M  = 0x000000e0,
-  UC_CATEGORY_MASK_Mn = 0x00000020,
-  UC_CATEGORY_MASK_Mc = 0x00000040,
-  UC_CATEGORY_MASK_Me = 0x00000080,
-  UC_CATEGORY_MASK_N  = 0x00000700,
-  UC_CATEGORY_MASK_Nd = 0x00000100,
-  UC_CATEGORY_MASK_Nl = 0x00000200,
-  UC_CATEGORY_MASK_No = 0x00000400,
-  UC_CATEGORY_MASK_P  = 0x0003f800,
-  UC_CATEGORY_MASK_Pc = 0x00000800,
-  UC_CATEGORY_MASK_Pd = 0x00001000,
-  UC_CATEGORY_MASK_Ps = 0x00002000,
-  UC_CATEGORY_MASK_Pe = 0x00004000,
-  UC_CATEGORY_MASK_Pi = 0x00008000,
-  UC_CATEGORY_MASK_Pf = 0x00010000,
-  UC_CATEGORY_MASK_Po = 0x00020000,
-  UC_CATEGORY_MASK_S  = 0x003c0000,
-  UC_CATEGORY_MASK_Sm = 0x00040000,
-  UC_CATEGORY_MASK_Sc = 0x00080000,
-  UC_CATEGORY_MASK_Sk = 0x00100000,
-  UC_CATEGORY_MASK_So = 0x00200000,
-  UC_CATEGORY_MASK_Z  = 0x01c00000,
-  UC_CATEGORY_MASK_Zs = 0x00400000,
-  UC_CATEGORY_MASK_Zl = 0x00800000,
-  UC_CATEGORY_MASK_Zp = 0x01000000,
-  UC_CATEGORY_MASK_C  = 0x3e000000,
-  UC_CATEGORY_MASK_Cc = 0x02000000,
-  UC_CATEGORY_MASK_Cf = 0x04000000,
-  UC_CATEGORY_MASK_Cs = 0x08000000,
-  UC_CATEGORY_MASK_Co = 0x10000000,
-  UC_CATEGORY_MASK_Cn = 0x20000000
-};
-
-static int
-general_category_byname (const char *category_name)
-{
-  if (category_name[0] != '\0'
-      && (category_name[1] == '\0' || category_name[2] == '\0'))
-    switch (category_name[0])
-      {
-      case 'L':
-       switch (category_name[1])
-         {
-         case '\0': return UC_CATEGORY_MASK_L;
-         case 'u': return UC_CATEGORY_MASK_Lu;
-         case 'l': return UC_CATEGORY_MASK_Ll;
-         case 't': return UC_CATEGORY_MASK_Lt;
-         case 'm': return UC_CATEGORY_MASK_Lm;
-         case 'o': return UC_CATEGORY_MASK_Lo;
-         }
-       break;
-      case 'M':
-       switch (category_name[1])
-         {
-         case '\0': return UC_CATEGORY_MASK_M;
-         case 'n': return UC_CATEGORY_MASK_Mn;
-         case 'c': return UC_CATEGORY_MASK_Mc;
-         case 'e': return UC_CATEGORY_MASK_Me;
-         }
-       break;
-      case 'N':
-       switch (category_name[1])
-         {
-         case '\0': return UC_CATEGORY_MASK_N;
-         case 'd': return UC_CATEGORY_MASK_Nd;
-         case 'l': return UC_CATEGORY_MASK_Nl;
-         case 'o': return UC_CATEGORY_MASK_No;
-         }
-       break;
-      case 'P':
-       switch (category_name[1])
-         {
-         case '\0': return UC_CATEGORY_MASK_P;
-         case 'c': return UC_CATEGORY_MASK_Pc;
-         case 'd': return UC_CATEGORY_MASK_Pd;
-         case 's': return UC_CATEGORY_MASK_Ps;
-         case 'e': return UC_CATEGORY_MASK_Pe;
-         case 'i': return UC_CATEGORY_MASK_Pi;
-         case 'f': return UC_CATEGORY_MASK_Pf;
-         case 'o': return UC_CATEGORY_MASK_Po;
-         }
-       break;
-      case 'S':
-       switch (category_name[1])
-         {
-         case '\0': return UC_CATEGORY_MASK_S;
-         case 'm': return UC_CATEGORY_MASK_Sm;
-         case 'c': return UC_CATEGORY_MASK_Sc;
-         case 'k': return UC_CATEGORY_MASK_Sk;
-         case 'o': return UC_CATEGORY_MASK_So;
-         }
-       break;
-      case 'Z':
-       switch (category_name[1])
-         {
-         case '\0': return UC_CATEGORY_MASK_Z;
-         case 's': return UC_CATEGORY_MASK_Zs;
-         case 'l': return UC_CATEGORY_MASK_Zl;
-         case 'p': return UC_CATEGORY_MASK_Zp;
-         }
-       break;
-      case 'C':
-       switch (category_name[1])
-         {
-         case '\0': return UC_CATEGORY_MASK_C;
-         case 'c': return UC_CATEGORY_MASK_Cc;
-         case 'f': return UC_CATEGORY_MASK_Cf;
-         case 's': return UC_CATEGORY_MASK_Cs;
-         case 'o': return UC_CATEGORY_MASK_Co;
-         case 'n': return UC_CATEGORY_MASK_Cn;
-         }
-       break;
-      }
-  /* Invalid category name.  */
-  abort ();
-}
-
-/* Construction of sparse 3-level tables.  */
-#define TABLE category_table
-#define ELEMENT uint8_t
-#define DEFAULT 29 /* = log2(UC_CATEGORY_MASK_Cn) */
-#define xmalloc malloc
-#define xrealloc realloc
-#include "3level.h"
-
-/* Output the per-character category table.  */
-static void
-output_category (const char *filename, const char *version)
-{
-  FILE *stream;
-  unsigned int ch, i;
-  struct category_table t;
-  unsigned int level1_offset, level2_offset, level3_offset;
-  uint16_t *level3_packed;
-
-  stream = fopen (filename, "w");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "cannot open '%s' for writing\n", filename);
-      exit (1);
-    }
-
-  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
-  fprintf (stream, "/* Categories of Unicode characters.  */\n");
-  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
-          version);
-
-  t.p = 7;
-  t.q = 9;
-  category_table_init (&t);
-
-  for (ch = 0; ch < 0x110000; ch++)
-    {
-      int value;
-      unsigned int log2_value;
-
-      if (is_category_Cs (ch))
-       value = UC_CATEGORY_MASK_Cs;
-      else if (unicode_attributes[ch].name != NULL)
-       value = general_category_byname (unicode_attributes[ch].category);
-      else
-       continue;
-
-      /* Now value should contain exactly one bit.  */
-      if (value == 0 || ((value & (value - 1)) != 0))
-       abort ();
-
-      for (log2_value = 0; value > 1; value >>= 1, log2_value++);
-
-      category_table_add (&t, ch, log2_value);
-    }
-
-  category_table_finalize (&t);
-
-  /* Offsets in t.result, in memory of this process.  */
-  level1_offset =
-    5 * sizeof (uint32_t);
-  level2_offset =
-    5 * sizeof (uint32_t)
-    + t.level1_size * sizeof (uint32_t);
-  level3_offset =
-    5 * sizeof (uint32_t)
-    + t.level1_size * sizeof (uint32_t)
-    + (t.level2_size << t.q) * sizeof (uint32_t);
-
-  for (i = 0; i < 5; i++)
-    fprintf (stream, "#define category_header_%d %d\n", i,
-            ((uint32_t *) t.result)[i]);
-  fprintf (stream, "static const\n");
-  fprintf (stream, "struct\n");
-  fprintf (stream, "  {\n");
-  fprintf (stream, "    int level1[%zu];\n", t.level1_size);
-  fprintf (stream, "    short level2[%zu << %d];\n", t.level2_size, t.q);
-  fprintf (stream, "    unsigned short level3[%zu * %d + 1];\n", t.level3_size,
-          (1 << t.p) * 5 / 16);
-  fprintf (stream, "  }\n");
-  fprintf (stream, "u_category =\n");
-  fprintf (stream, "{\n");
-  fprintf (stream, "  {");
-  if (t.level1_size > 8)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < t.level1_size; i++)
-    {
-      uint32_t offset;
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream, "\n   ");
-      offset = ((uint32_t *) (t.result + level1_offset))[i];
-      if (offset == 0)
-       fprintf (stream, " %5d", -1);
-      else
-       fprintf (stream, " %5zd",
-                (offset - level2_offset) / sizeof (uint32_t));
-      if (i+1 < t.level1_size)
-       fprintf (stream, ",");
-    }
-  if (t.level1_size > 8)
-    fprintf (stream, "\n ");
-  fprintf (stream, " },\n");
-  fprintf (stream, "  {");
-  if (t.level2_size << t.q > 8)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < t.level2_size << t.q; i++)
-    {
-      uint32_t offset;
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream, "\n   ");
-      offset = ((uint32_t *) (t.result + level2_offset))[i];
-      if (offset == 0)
-       fprintf (stream, " %5d", -1);
-      else
-       fprintf (stream, " %5zd",
-                (offset - level3_offset) / sizeof (uint8_t));
-      if (i+1 < t.level2_size << t.q)
-       fprintf (stream, ",");
-    }
-  if (t.level2_size << t.q > 8)
-    fprintf (stream, "\n ");
-  fprintf (stream, " },\n");
-  /* Pack the level3 array.  Each entry needs 5 bits only.  Use 16-bit units,
-     not 32-bit units, in order to make the lookup function easier.  */
-  level3_packed =
-    (uint16_t *)
-    calloc ((t.level3_size << t.p) * 5 / 16 + 1, sizeof (uint16_t));
-  for (i = 0; i < t.level3_size << t.p; i++)
-    {
-      unsigned int j = (i * 5) / 16;
-      unsigned int k = (i * 5) % 16;
-      uint32_t value = ((unsigned char *) (t.result + level3_offset))[i];
-      value = level3_packed[j] | (level3_packed[j+1] << 16) | (value << k);
-      level3_packed[j] = value & 0xffff;
-      level3_packed[j+1] = value >> 16;
-    }
-  fprintf (stream, "  {");
-  if ((t.level3_size << t.p) * 5 / 16 + 1 > 8)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < (t.level3_size << t.p) * 5 / 16 + 1; i++)
-    {
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream, "\n   ");
-      fprintf (stream, " 0x%04x", level3_packed[i]);
-      if (i+1 < (t.level3_size << t.p) * 5 / 16 + 1)
-       fprintf (stream, ",");
-    }
-  if ((t.level3_size << t.p) * 5 / 16 + 1 > 8)
-    fprintf (stream, "\n ");
-  fprintf (stream, " }\n");
-  free (level3_packed);
-  fprintf (stream, "};\n");
-
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error writing to '%s'\n", filename);
-      exit (1);
-    }
-}
-
-/* ========================================================================= */
-
-/* Canonical combining class.  */
-/* See Unicode 3.0 book, section 4.2,
-       UCD.html.  */
-
-/* Construction of sparse 3-level tables.  */
-#define TABLE combclass_table
-#define ELEMENT uint8_t
-#define DEFAULT 0
-#define xmalloc malloc
-#define xrealloc realloc
-#include "3level.h"
-
-/* Output the per-character combining class table.  */
-static void
-output_combclass (const char *filename, const char *version)
-{
-  FILE *stream;
-  unsigned int ch, i;
-  struct combclass_table t;
-  unsigned int level1_offset, level2_offset, level3_offset;
-
-  stream = fopen (filename, "w");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "cannot open '%s' for writing\n", filename);
-      exit (1);
-    }
-
-  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
-  fprintf (stream, "/* Combining class of Unicode characters.  */\n");
-  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
-          version);
-
-  t.p = 7;
-  t.q = 9;
-  combclass_table_init (&t);
-
-  for (ch = 0; ch < 0x110000; ch++)
-    if (unicode_attributes[ch].name != NULL)
-      {
-       int value = atoi (unicode_attributes[ch].combining);
-        if (!(value >= 0 && value <= 255))
-         abort ();
-       combclass_table_add (&t, ch, value);
-      }
-
-  combclass_table_finalize (&t);
-
-  /* Offsets in t.result, in memory of this process.  */
-  level1_offset =
-    5 * sizeof (uint32_t);
-  level2_offset =
-    5 * sizeof (uint32_t)
-    + t.level1_size * sizeof (uint32_t);
-  level3_offset =
-    5 * sizeof (uint32_t)
-    + t.level1_size * sizeof (uint32_t)
-    + (t.level2_size << t.q) * sizeof (uint32_t);
-
-  for (i = 0; i < 5; i++)
-    fprintf (stream, "#define combclass_header_%d %d\n", i,
-            ((uint32_t *) t.result)[i]);
-  fprintf (stream, "static const\n");
-  fprintf (stream, "struct\n");
-  fprintf (stream, "  {\n");
-  fprintf (stream, "    int level1[%zu];\n", t.level1_size);
-  fprintf (stream, "    short level2[%zu << %d];\n", t.level2_size, t.q);
-  fprintf (stream, "    unsigned char level3[%zu << %d];\n", t.level3_size, t.p);
-  fprintf (stream, "  }\n");
-  fprintf (stream, "u_combclass =\n");
-  fprintf (stream, "{\n");
-  fprintf (stream, "  {");
-  if (t.level1_size > 8)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < t.level1_size; i++)
-    {
-      uint32_t offset;
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream, "\n   ");
-      offset = ((uint32_t *) (t.result + level1_offset))[i];
-      if (offset == 0)
-       fprintf (stream, " %5d", -1);
-      else
-       fprintf (stream, " %5zd",
-                (offset - level2_offset) / sizeof (uint32_t));
-      if (i+1 < t.level1_size)
-       fprintf (stream, ",");
-    }
-  if (t.level1_size > 8)
-    fprintf (stream, "\n ");
-  fprintf (stream, " },\n");
-  fprintf (stream, "  {");
-  if (t.level2_size << t.q > 8)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < t.level2_size << t.q; i++)
-    {
-      uint32_t offset;
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream, "\n   ");
-      offset = ((uint32_t *) (t.result + level2_offset))[i];
-      if (offset == 0)
-       fprintf (stream, " %5d", -1);
-      else
-       fprintf (stream, " %5zd",
-                (offset - level3_offset) / sizeof (uint8_t));
-      if (i+1 < t.level2_size << t.q)
-       fprintf (stream, ",");
-    }
-  if (t.level2_size << t.q > 8)
-    fprintf (stream, "\n ");
-  fprintf (stream, " },\n");
-  fprintf (stream, "  {");
-  if (t.level3_size << t.p > 8)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < t.level3_size << t.p; i++)
-    {
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream, "\n   ");
-      fprintf (stream, " %3d", ((uint8_t *) (t.result + level3_offset))[i]);
-      if (i+1 < t.level3_size << t.p)
-       fprintf (stream, ",");
-    }
-  if (t.level3_size << t.p > 8)
-    fprintf (stream, "\n ");
-  fprintf (stream, " }\n");
-  fprintf (stream, "};\n");
-
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error writing to '%s'\n", filename);
-      exit (1);
-    }
-}
-
-/* ========================================================================= */
-
-/* Bidirectional category.  */
-/* See Unicode 3.0 book, section 4.3,
-       UCD.html.  */
-
-enum
-{
-  UC_BIDI_L,   /* Left-to-Right */
-  UC_BIDI_LRE, /* Left-to-Right Embedding */
-  UC_BIDI_LRO, /* Left-to-Right Override */
-  UC_BIDI_R,   /* Right-to-Left */
-  UC_BIDI_AL,  /* Right-to-Left Arabic */
-  UC_BIDI_RLE, /* Right-to-Left Embedding */
-  UC_BIDI_RLO, /* Right-to-Left Override */
-  UC_BIDI_PDF, /* Pop Directional Format */
-  UC_BIDI_EN,  /* European Number */
-  UC_BIDI_ES,  /* European Number Separator */
-  UC_BIDI_ET,  /* European Number Terminator */
-  UC_BIDI_AN,  /* Arabic Number */
-  UC_BIDI_CS,  /* Common Number Separator */
-  UC_BIDI_NSM, /* Non-Spacing Mark */
-  UC_BIDI_BN,  /* Boundary Neutral */
-  UC_BIDI_B,   /* Paragraph Separator */
-  UC_BIDI_S,   /* Segment Separator */
-  UC_BIDI_WS,  /* Whitespace */
-  UC_BIDI_ON   /* Other Neutral */
-};
-
-static int
-bidi_category_byname (const char *category_name)
-{
-  switch (category_name[0])
-    {
-    case 'A':
-      switch (category_name[1])
-       {
-       case 'L':
-         if (category_name[2] == '\0')
-           return UC_BIDI_AL;
-         break;
-       case 'N':
-         if (category_name[2] == '\0')
-           return UC_BIDI_AN;
-         break;
-       }
-      break;
-    case 'B':
-      switch (category_name[1])
-       {
-       case '\0':
-         return UC_BIDI_B;
-       case 'N':
-         if (category_name[2] == '\0')
-           return UC_BIDI_BN;
-         break;
-       }
-      break;
-    case 'C':
-      switch (category_name[1])
-       {
-       case 'S':
-         if (category_name[2] == '\0')
-           return UC_BIDI_CS;
-         break;
-       }
-      break;
-    case 'E':
-      switch (category_name[1])
-       {
-       case 'N':
-         if (category_name[2] == '\0')
-           return UC_BIDI_EN;
-         break;
-       case 'S':
-         if (category_name[2] == '\0')
-           return UC_BIDI_ES;
-         break;
-       case 'T':
-         if (category_name[2] == '\0')
-           return UC_BIDI_ET;
-         break;
-       }
-      break;
-    case 'L':
-      switch (category_name[1])
-       {
-       case '\0':
-         return UC_BIDI_L;
-       case 'R':
-         switch (category_name[2])
-           {
-           case 'E':
-             if (category_name[3] == '\0')
-               return UC_BIDI_LRE;
-             break;
-           case 'O':
-             if (category_name[3] == '\0')
-               return UC_BIDI_LRO;
-             break;
-           }
-         break;
-       }
-      break;
-    case 'N':
-      switch (category_name[1])
-       {
-       case 'S':
-         switch (category_name[2])
-           {
-           case 'M':
-             if (category_name[3] == '\0')
-               return UC_BIDI_NSM;
-             break;
-           }
-         break;
-       }
-      break;
-    case 'O':
-      switch (category_name[1])
-       {
-       case 'N':
-         if (category_name[2] == '\0')
-           return UC_BIDI_ON;
-         break;
-       }
-      break;
-    case 'P':
-      switch (category_name[1])
-       {
-       case 'D':
-         switch (category_name[2])
-           {
-           case 'F':
-             if (category_name[3] == '\0')
-               return UC_BIDI_PDF;
-             break;
-           }
-         break;
-       }
-      break;
-    case 'R':
-      switch (category_name[1])
-       {
-       case '\0':
-         return UC_BIDI_R;
-       case 'L':
-         switch (category_name[2])
-           {
-           case 'E':
-             if (category_name[3] == '\0')
-               return UC_BIDI_RLE;
-             break;
-           case 'O':
-             if (category_name[3] == '\0')
-               return UC_BIDI_RLO;
-             break;
-           }
-         break;
-       }
-      break;
-    case 'S':
-      if (category_name[1] == '\0')
-       return UC_BIDI_S;
-      break;
-    case 'W':
-      switch (category_name[1])
-       {
-       case 'S':
-         if (category_name[2] == '\0')
-           return UC_BIDI_WS;
-         break;
-       }
-      break;
-    }
-  /* Invalid bidi category name.  */
-  abort ();
-}
-
-static int
-get_bidi_category (unsigned int ch)
-{
-  if (unicode_attributes[ch].name != NULL)
-    return bidi_category_byname (unicode_attributes[ch].bidi);
-  else
-    {
-      /* The bidi category of unassigned characters depends on the range.
-        See UTR #9 and DerivedBidiClass.txt.  */
-      if ((ch >= 0x0590 && ch <= 0x05FF)
-         || (ch >= 0x07FB && ch <= 0x08FF)
-         || (ch >= 0xFB37 && ch <= 0xFB45)
-         || (ch >= 0x10800 && ch <= 0x10FFF))
-       return UC_BIDI_R;
-      else if ((ch >= 0x0600 && ch <= 0x07BF)
-              || (ch >= 0x2064 && ch <= 0x2069)
-              || (ch >= 0xFBB2 && ch <= 0xFDCF)
-              || (ch >= 0xFDFE && ch <= 0xFEFE))
-       return UC_BIDI_AL;
-      else if ((ch >= 0xFDD0 && ch <= 0xFDEF)
-              || (ch >= 0xFFF0 && ch <= 0xFFFF)
-              || (ch & 0xFFFF) == 0xFFFE
-              || (ch & 0xFFFF) == 0xFFFF
-              || (ch >= 0xE0000 && ch <= 0xE0FFF))
-       return UC_BIDI_BN;
-      else
-       return UC_BIDI_L;
-    }
-}
-
-/* Construction of sparse 3-level tables.  */
-#define TABLE bidi_category_table
-#define ELEMENT uint8_t
-#define DEFAULT UC_BIDI_L
-#define xmalloc malloc
-#define xrealloc realloc
-#include "3level.h"
-
-/* Output the per-character bidi category table.  */
-static void
-output_bidi_category (const char *filename, const char *version)
-{
-  FILE *stream;
-  unsigned int ch, i;
-  struct bidi_category_table t;
-  unsigned int level1_offset, level2_offset, level3_offset;
-  uint16_t *level3_packed;
-
-  stream = fopen (filename, "w");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "cannot open '%s' for writing\n", filename);
-      exit (1);
-    }
-
-  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
-  fprintf (stream, "/* Bidi categories of Unicode characters.  */\n");
-  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
-          version);
-
-  t.p = 7;
-  t.q = 9;
-  bidi_category_table_init (&t);
-
-  for (ch = 0; ch < 0x110000; ch++)
-    {
-      int value = get_bidi_category (ch);
-
-      bidi_category_table_add (&t, ch, value);
-    }
-
-  bidi_category_table_finalize (&t);
-
-  /* Offsets in t.result, in memory of this process.  */
-  level1_offset =
-    5 * sizeof (uint32_t);
-  level2_offset =
-    5 * sizeof (uint32_t)
-    + t.level1_size * sizeof (uint32_t);
-  level3_offset =
-    5 * sizeof (uint32_t)
-    + t.level1_size * sizeof (uint32_t)
-    + (t.level2_size << t.q) * sizeof (uint32_t);
-
-  for (i = 0; i < 5; i++)
-    fprintf (stream, "#define bidi_category_header_%d %d\n", i,
-            ((uint32_t *) t.result)[i]);
-  fprintf (stream, "static const\n");
-  fprintf (stream, "struct\n");
-  fprintf (stream, "  {\n");
-  fprintf (stream, "    int level1[%zu];\n", t.level1_size);
-  fprintf (stream, "    short level2[%zu << %d];\n", t.level2_size, t.q);
-  fprintf (stream, "    unsigned short level3[%zu * %d + 1];\n", t.level3_size,
-          (1 << t.p) * 5 / 16);
-  fprintf (stream, "  }\n");
-  fprintf (stream, "u_bidi_category =\n");
-  fprintf (stream, "{\n");
-  fprintf (stream, "  {");
-  if (t.level1_size > 8)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < t.level1_size; i++)
-    {
-      uint32_t offset;
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream, "\n   ");
-      offset = ((uint32_t *) (t.result + level1_offset))[i];
-      if (offset == 0)
-       fprintf (stream, " %5d", -1);
-      else
-       fprintf (stream, " %5zd",
-                (offset - level2_offset) / sizeof (uint32_t));
-      if (i+1 < t.level1_size)
-       fprintf (stream, ",");
-    }
-  if (t.level1_size > 8)
-    fprintf (stream, "\n ");
-  fprintf (stream, " },\n");
-  fprintf (stream, "  {");
-  if (t.level2_size << t.q > 8)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < t.level2_size << t.q; i++)
-    {
-      uint32_t offset;
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream, "\n   ");
-      offset = ((uint32_t *) (t.result + level2_offset))[i];
-      if (offset == 0)
-       fprintf (stream, " %5d", -1);
-      else
-       fprintf (stream, " %5zd",
-                (offset - level3_offset) / sizeof (uint8_t));
-      if (i+1 < t.level2_size << t.q)
-       fprintf (stream, ",");
-    }
-  if (t.level2_size << t.q > 8)
-    fprintf (stream, "\n ");
-  fprintf (stream, " },\n");
-  /* Pack the level3 array.  Each entry needs 5 bits only.  Use 16-bit units,
-     not 32-bit units, in order to make the lookup function easier.  */
-  level3_packed =
-    (uint16_t *)
-    calloc ((t.level3_size << t.p) * 5 / 16 + 1, sizeof (uint16_t));
-  for (i = 0; i < t.level3_size << t.p; i++)
-    {
-      unsigned int j = (i * 5) / 16;
-      unsigned int k = (i * 5) % 16;
-      uint32_t value = ((unsigned char *) (t.result + level3_offset))[i];
-      value = level3_packed[j] | (level3_packed[j+1] << 16) | (value << k);
-      level3_packed[j] = value & 0xffff;
-      level3_packed[j+1] = value >> 16;
-    }
-  fprintf (stream, "  {");
-  if ((t.level3_size << t.p) * 5 / 16 + 1 > 8)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < (t.level3_size << t.p) * 5 / 16 + 1; i++)
-    {
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream, "\n   ");
-      fprintf (stream, " 0x%04x", level3_packed[i]);
-      if (i+1 < (t.level3_size << t.p) * 5 / 16 + 1)
-       fprintf (stream, ",");
-    }
-  if ((t.level3_size << t.p) * 5 / 16 + 1 > 8)
-    fprintf (stream, "\n ");
-  fprintf (stream, " }\n");
-  free (level3_packed);
-  fprintf (stream, "};\n");
-
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error writing to '%s'\n", filename);
-      exit (1);
-    }
-}
-
-/* ========================================================================= */
-
-/* Decimal digit value.  */
-/* See Unicode 3.0 book, section 4.6.  */
-
-static int
-get_decdigit_value (unsigned int ch)
-{
-  if (unicode_attributes[ch].name != NULL
-      && unicode_attributes[ch].decdigit[0] != '\0')
-    return atoi (unicode_attributes[ch].decdigit);
-  return -1;
-}
-
-/* Construction of sparse 3-level tables.  */
-#define TABLE decdigit_table
-#define ELEMENT uint8_t
-#define DEFAULT 0
-#define xmalloc malloc
-#define xrealloc realloc
-#include "3level.h"
-
-/* Output the unit test for the per-character decimal digit value table.  */
-static void
-output_decimal_digit_test (const char *filename, const char *version)
-{
-  FILE *stream;
-  bool need_comma;
-  unsigned int ch;
-
-  stream = fopen (filename, "w");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "cannot open '%s' for writing\n", filename);
-      exit (1);
-    }
-
-  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
-  fprintf (stream, "/* Decimal digit values of Unicode characters.  */\n");
-  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
-          version);
-
-  need_comma = false;
-  for (ch = 0; ch < 0x110000; ch++)
-    {
-      int value = get_decdigit_value (ch);
-
-      if (!(value >= -1 && value < 10))
-       abort ();
-
-      if (value >= 0)
-       {
-         if (need_comma)
-           fprintf (stream, ",\n");
-         fprintf (stream, "    { 0x%04X, %d }", ch, value);
-         need_comma = true;
-       }
-    }
-  if (need_comma)
-    fprintf (stream, "\n");
-
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error writing to '%s'\n", filename);
-      exit (1);
-    }
-}
-
-/* Output the per-character decimal digit value table.  */
-static void
-output_decimal_digit (const char *filename, const char *version)
-{
-  FILE *stream;
-  unsigned int ch, i;
-  struct decdigit_table t;
-  unsigned int level1_offset, level2_offset, level3_offset;
-
-  stream = fopen (filename, "w");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "cannot open '%s' for writing\n", filename);
-      exit (1);
-    }
-
-  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
-  fprintf (stream, "/* Decimal digit values of Unicode characters.  */\n");
-  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
-          version);
-
-  t.p = 7;
-  t.q = 9;
-  decdigit_table_init (&t);
-
-  for (ch = 0; ch < 0x110000; ch++)
-    {
-      int value = 1 + get_decdigit_value (ch);
-
-      if (!(value >= 0 && value <= 10))
-       abort ();
-
-      decdigit_table_add (&t, ch, value);
-    }
-
-  decdigit_table_finalize (&t);
-
-  /* Offsets in t.result, in memory of this process.  */
-  level1_offset =
-    5 * sizeof (uint32_t);
-  level2_offset =
-    5 * sizeof (uint32_t)
-    + t.level1_size * sizeof (uint32_t);
-  level3_offset =
-    5 * sizeof (uint32_t)
-    + t.level1_size * sizeof (uint32_t)
-    + (t.level2_size << t.q) * sizeof (uint32_t);
-
-  for (i = 0; i < 5; i++)
-    fprintf (stream, "#define decdigit_header_%d %d\n", i,
-            ((uint32_t *) t.result)[i]);
-  fprintf (stream, "static const\n");
-  fprintf (stream, "struct\n");
-  fprintf (stream, "  {\n");
-  fprintf (stream, "    int level1[%zu];\n", t.level1_size);
-  fprintf (stream, "    short level2[%zu << %d];\n", t.level2_size, t.q);
-  fprintf (stream, "    unsigned char level3[%zu << %d];\n", t.level3_size,
-          t.p - 1);
-  fprintf (stream, "  }\n");
-  fprintf (stream, "u_decdigit =\n");
-  fprintf (stream, "{\n");
-  fprintf (stream, "  {");
-  if (t.level1_size > 8)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < t.level1_size; i++)
-    {
-      uint32_t offset;
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream, "\n   ");
-      offset = ((uint32_t *) (t.result + level1_offset))[i];
-      if (offset == 0)
-       fprintf (stream, " %5d", -1);
-      else
-       fprintf (stream, " %5zd",
-                (offset - level2_offset) / sizeof (uint32_t));
-      if (i+1 < t.level1_size)
-       fprintf (stream, ",");
-    }
-  if (t.level1_size > 8)
-    fprintf (stream, "\n ");
-  fprintf (stream, " },\n");
-  fprintf (stream, "  {");
-  if (t.level2_size << t.q > 8)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < t.level2_size << t.q; i++)
-    {
-      uint32_t offset;
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream, "\n   ");
-      offset = ((uint32_t *) (t.result + level2_offset))[i];
-      if (offset == 0)
-       fprintf (stream, " %5d", -1);
-      else
-       fprintf (stream, " %5zd",
-                (offset - level3_offset) / sizeof (uint8_t));
-      if (i+1 < t.level2_size << t.q)
-       fprintf (stream, ",");
-    }
-  if (t.level2_size << t.q > 8)
-    fprintf (stream, "\n ");
-  fprintf (stream, " },\n");
-  /* Pack the level3 array.  Each entry needs 4 bits only.  */
-  fprintf (stream, "  {");
-  if (t.level3_size << (t.p - 1) > 8)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < t.level3_size << (t.p - 1); i++)
-    {
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream, "\n   ");
-      fprintf (stream, " 0x%02x",
-              ((uint8_t *) (t.result + level3_offset))[2*i]
-              + (((uint8_t *) (t.result + level3_offset))[2*i+1] << 4));
-      if (i+1 < t.level3_size << (t.p - 1))
-       fprintf (stream, ",");
-    }
-  if (t.level3_size << (t.p - 1) > 8)
-    fprintf (stream, "\n ");
-  fprintf (stream, " }\n");
-  fprintf (stream, "};\n");
-
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error writing to '%s'\n", filename);
-      exit (1);
-    }
-}
-
-/* ========================================================================= */
-
-/* Digit value.  */
-/* See Unicode 3.0 book, section 4.6.  */
-
-static int
-get_digit_value (unsigned int ch)
-{
-  if (unicode_attributes[ch].name != NULL
-      && unicode_attributes[ch].digit[0] != '\0')
-    return atoi (unicode_attributes[ch].digit);
-  return -1;
-}
-
-/* Output the unit test for the per-character digit value table.  */
-static void
-output_digit_test (const char *filename, const char *version)
-{
-  FILE *stream;
-  bool need_comma;
-  unsigned int ch;
-
-  stream = fopen (filename, "w");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "cannot open '%s' for writing\n", filename);
-      exit (1);
-    }
-
-  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
-  fprintf (stream, "/* Digit values of Unicode characters.  */\n");
-  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
-          version);
-
-  need_comma = false;
-  for (ch = 0; ch < 0x110000; ch++)
-    {
-      int value = get_digit_value (ch);
-
-      if (!(value >= -1 && value < 10))
-       abort ();
-
-      if (value >= 0)
-       {
-         if (need_comma)
-           fprintf (stream, ",\n");
-         fprintf (stream, "    { 0x%04X, %d }", ch, value);
-         need_comma = true;
-       }
-    }
-  if (need_comma)
-    fprintf (stream, "\n");
-
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error writing to '%s'\n", filename);
-      exit (1);
-    }
-}
-
-/* Output the per-character digit value table.  */
-static void
-output_digit (const char *filename, const char *version)
-{
-  FILE *stream;
-  unsigned int ch, i;
-  struct decdigit_table t;
-  unsigned int level1_offset, level2_offset, level3_offset;
-
-  stream = fopen (filename, "w");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "cannot open '%s' for writing\n", filename);
-      exit (1);
-    }
-
-  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
-  fprintf (stream, "/* Digit values of Unicode characters.  */\n");
-  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
-          version);
-
-  t.p = 7;
-  t.q = 9;
-  decdigit_table_init (&t);
-
-  for (ch = 0; ch < 0x110000; ch++)
-    {
-      int value = 1 + get_digit_value (ch);
-
-      if (!(value >= 0 && value <= 10))
-       abort ();
-
-      decdigit_table_add (&t, ch, value);
-    }
-
-  decdigit_table_finalize (&t);
-
-  /* Offsets in t.result, in memory of this process.  */
-  level1_offset =
-    5 * sizeof (uint32_t);
-  level2_offset =
-    5 * sizeof (uint32_t)
-    + t.level1_size * sizeof (uint32_t);
-  level3_offset =
-    5 * sizeof (uint32_t)
-    + t.level1_size * sizeof (uint32_t)
-    + (t.level2_size << t.q) * sizeof (uint32_t);
-
-  for (i = 0; i < 5; i++)
-    fprintf (stream, "#define digit_header_%d %d\n", i,
-            ((uint32_t *) t.result)[i]);
-  fprintf (stream, "static const\n");
-  fprintf (stream, "struct\n");
-  fprintf (stream, "  {\n");
-  fprintf (stream, "    int level1[%zu];\n", t.level1_size);
-  fprintf (stream, "    short level2[%zu << %d];\n", t.level2_size, t.q);
-  fprintf (stream, "    unsigned char level3[%zu << %d];\n", t.level3_size,
-          t.p - 1);
-  fprintf (stream, "  }\n");
-  fprintf (stream, "u_digit =\n");
-  fprintf (stream, "{\n");
-  fprintf (stream, "  {");
-  if (t.level1_size > 8)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < t.level1_size; i++)
-    {
-      uint32_t offset;
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream, "\n   ");
-      offset = ((uint32_t *) (t.result + level1_offset))[i];
-      if (offset == 0)
-       fprintf (stream, " %5d", -1);
-      else
-       fprintf (stream, " %5zd",
-                (offset - level2_offset) / sizeof (uint32_t));
-      if (i+1 < t.level1_size)
-       fprintf (stream, ",");
-    }
-  if (t.level1_size > 8)
-    fprintf (stream, "\n ");
-  fprintf (stream, " },\n");
-  fprintf (stream, "  {");
-  if (t.level2_size << t.q > 8)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < t.level2_size << t.q; i++)
-    {
-      uint32_t offset;
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream, "\n   ");
-      offset = ((uint32_t *) (t.result + level2_offset))[i];
-      if (offset == 0)
-       fprintf (stream, " %5d", -1);
-      else
-       fprintf (stream, " %5zd",
-                (offset - level3_offset) / sizeof (uint8_t));
-      if (i+1 < t.level2_size << t.q)
-       fprintf (stream, ",");
-    }
-  if (t.level2_size << t.q > 8)
-    fprintf (stream, "\n ");
-  fprintf (stream, " },\n");
-  /* Pack the level3 array.  Each entry needs 4 bits only.  */
-  fprintf (stream, "  {");
-  if (t.level3_size << (t.p - 1) > 8)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < t.level3_size << (t.p - 1); i++)
-    {
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream, "\n   ");
-      fprintf (stream, " 0x%02x",
-              ((uint8_t *) (t.result + level3_offset))[2*i]
-              + (((uint8_t *) (t.result + level3_offset))[2*i+1] << 4));
-      if (i+1 < t.level3_size << (t.p - 1))
-       fprintf (stream, ",");
-    }
-  if (t.level3_size << (t.p - 1) > 8)
-    fprintf (stream, "\n ");
-  fprintf (stream, " }\n");
-  fprintf (stream, "};\n");
-
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error writing to '%s'\n", filename);
-      exit (1);
-    }
-}
-
-/* ========================================================================= */
-
-/* Numeric value.  */
-/* See Unicode 3.0 book, section 4.6.  */
-
-typedef struct { int numerator; int denominator; } uc_fraction_t;
-
-static uc_fraction_t
-get_numeric_value (unsigned int ch)
-{
-  uc_fraction_t value;
-
-  if (unicode_attributes[ch].name != NULL
-      && unicode_attributes[ch].numeric[0] != '\0')
-    {
-      const char *str = unicode_attributes[ch].numeric;
-      /* str is of the form "integer" or "integer/posinteger".  */
-      value.numerator = atoi (str);
-      if (strchr (str, '/') != NULL)
-       value.denominator = atoi (strchr (str, '/') + 1);
-      else
-       value.denominator = 1;
-    }
-  else
-    {
-      value.numerator = 0;
-      value.denominator = 0;
-    }
-  return value;
-}
-
-/* Output the unit test for the per-character numeric value table.  */
-static void
-output_numeric_test (const char *filename, const char *version)
-{
-  FILE *stream;
-  bool need_comma;
-  unsigned int ch;
-
-  stream = fopen (filename, "w");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "cannot open '%s' for writing\n", filename);
-      exit (1);
-    }
-
-  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
-  fprintf (stream, "/* Numeric values of Unicode characters.  */\n");
-  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
-          version);
-
-  need_comma = false;
-  for (ch = 0; ch < 0x110000; ch++)
-    {
-      uc_fraction_t value = get_numeric_value (ch);
-
-      if (value.numerator != 0 || value.denominator != 0)
-       {
-         if (need_comma)
-           fprintf (stream, ",\n");
-         fprintf (stream, "    { 0x%04X, %d, %d }",
-                  ch, value.numerator, value.denominator);
-         need_comma = true;
-       }
-    }
-  if (need_comma)
-    fprintf (stream, "\n");
-
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error writing to '%s'\n", filename);
-      exit (1);
-    }
-}
-
-/* Construction of sparse 3-level tables.  */
-#define TABLE numeric_table
-#define ELEMENT uint8_t
-#define DEFAULT 0
-#define xmalloc malloc
-#define xrealloc realloc
-#include "3level.h"
-
-/* Output the per-character numeric value table.  */
-static void
-output_numeric (const char *filename, const char *version)
-{
-  FILE *stream;
-  uc_fraction_t fractions[128];
-  unsigned int nfractions;
-  unsigned int ch, i, j;
-  struct numeric_table t;
-  unsigned int level1_offset, level2_offset, level3_offset;
-  uint16_t *level3_packed;
-
-  stream = fopen (filename, "w");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "cannot open '%s' for writing\n", filename);
-      exit (1);
-    }
-
-  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
-  fprintf (stream, "/* Numeric values of Unicode characters.  */\n");
-  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
-          version);
-
-  /* Create table of occurring fractions.  */
-  nfractions = 0;
-  for (ch = 0; ch < 0x110000; ch++)
-    {
-      uc_fraction_t value = get_numeric_value (ch);
-
-      for (i = 0; i < nfractions; i++)
-       if (value.numerator == fractions[i].numerator
-           && value.denominator == fractions[i].denominator)
-         break;
-      if (i == nfractions)
-       {
-         if (nfractions == 128)
-           abort ();
-         for (i = 0; i < nfractions; i++)
-           if (value.denominator < fractions[i].denominator
-               || (value.denominator == fractions[i].denominator
-                   && value.numerator < fractions[i].numerator))
-             break;
-         for (j = nfractions; j > i; j--)
-           fractions[j] = fractions[j - 1];
-         fractions[i] = value;
-         nfractions++;
-       }
-    }
-
-  fprintf (stream, "static const uc_fraction_t u_numeric_values[%d] =\n",
-          nfractions);
-  fprintf (stream, "{\n");
-  for (i = 0; i < nfractions; i++)
-    {
-      fprintf (stream, "  { %d, %d }", fractions[i].numerator,
-              fractions[i].denominator);
-      if (i+1 < nfractions)
-       fprintf (stream, ",");
-      fprintf (stream, "\n");
-    }
-  fprintf (stream, "};\n");
-
-  t.p = 7;
-  t.q = 9;
-  numeric_table_init (&t);
-
-  for (ch = 0; ch < 0x110000; ch++)
-    {
-      uc_fraction_t value = get_numeric_value (ch);
-
-      for (i = 0; i < nfractions; i++)
-       if (value.numerator == fractions[i].numerator
-           && value.denominator == fractions[i].denominator)
-         break;
-      if (i == nfractions)
-       abort ();
-
-      numeric_table_add (&t, ch, i);
-    }
-
-  numeric_table_finalize (&t);
-
-  /* Offsets in t.result, in memory of this process.  */
-  level1_offset =
-    5 * sizeof (uint32_t);
-  level2_offset =
-    5 * sizeof (uint32_t)
-    + t.level1_size * sizeof (uint32_t);
-  level3_offset =
-    5 * sizeof (uint32_t)
-    + t.level1_size * sizeof (uint32_t)
-    + (t.level2_size << t.q) * sizeof (uint32_t);
-
-  for (i = 0; i < 5; i++)
-    fprintf (stream, "#define numeric_header_%d %d\n", i,
-            ((uint32_t *) t.result)[i]);
-  fprintf (stream, "static const\n");
-  fprintf (stream, "struct\n");
-  fprintf (stream, "  {\n");
-  fprintf (stream, "    int level1[%zu];\n", t.level1_size);
-  fprintf (stream, "    short level2[%zu << %d];\n", t.level2_size, t.q);
-  fprintf (stream, "    unsigned short level3[%zu * %d + 1];\n", t.level3_size,
-          (1 << t.p) * 7 / 16);
-  fprintf (stream, "  }\n");
-  fprintf (stream, "u_numeric =\n");
-  fprintf (stream, "{\n");
-  fprintf (stream, "  {");
-  if (t.level1_size > 8)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < t.level1_size; i++)
-    {
-      uint32_t offset;
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream, "\n   ");
-      offset = ((uint32_t *) (t.result + level1_offset))[i];
-      if (offset == 0)
-       fprintf (stream, " %5d", -1);
-      else
-       fprintf (stream, " %5zd",
-                (offset - level2_offset) / sizeof (uint32_t));
-      if (i+1 < t.level1_size)
-       fprintf (stream, ",");
-    }
-  if (t.level1_size > 8)
-    fprintf (stream, "\n ");
-  fprintf (stream, " },\n");
-  fprintf (stream, "  {");
-  if (t.level2_size << t.q > 8)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < t.level2_size << t.q; i++)
-    {
-      uint32_t offset;
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream, "\n   ");
-      offset = ((uint32_t *) (t.result + level2_offset))[i];
-      if (offset == 0)
-       fprintf (stream, " %5d", -1);
-      else
-       fprintf (stream, " %5zd",
-                (offset - level3_offset) / sizeof (uint8_t));
-      if (i+1 < t.level2_size << t.q)
-       fprintf (stream, ",");
-    }
-  if (t.level2_size << t.q > 8)
-    fprintf (stream, "\n ");
-  fprintf (stream, " },\n");
-  /* Pack the level3 array.  Each entry needs 7 bits only.  Use 16-bit units,
-     not 32-bit units, in order to make the lookup function easier.  */
-  level3_packed =
-    (uint16_t *)
-    calloc ((t.level3_size << t.p) * 7 / 16 + 1, sizeof (uint16_t));
-  for (i = 0; i < t.level3_size << t.p; i++)
-    {
-      unsigned int j = (i * 7) / 16;
-      unsigned int k = (i * 7) % 16;
-      uint32_t value = ((unsigned char *) (t.result + level3_offset))[i];
-      value = level3_packed[j] | (level3_packed[j+1] << 16) | (value << k);
-      level3_packed[j] = value & 0xffff;
-      level3_packed[j+1] = value >> 16;
-    }
-  fprintf (stream, "  {");
-  if ((t.level3_size << t.p) * 7 / 16 + 1 > 8)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < (t.level3_size << t.p) * 7 / 16 + 1; i++)
-    {
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream, "\n   ");
-      fprintf (stream, " 0x%04x", level3_packed[i]);
-      if (i+1 < (t.level3_size << t.p) * 7 / 16 + 1)
-       fprintf (stream, ",");
-    }
-  if ((t.level3_size << t.p) * 7 / 16 + 1 > 8)
-    fprintf (stream, "\n ");
-  fprintf (stream, " }\n");
-  free (level3_packed);
-  fprintf (stream, "};\n");
-
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error writing to '%s'\n", filename);
-      exit (1);
-    }
-}
-
-/* ========================================================================= */
-
-/* Mirrored.  */
-/* See Unicode 3.0 book, section 4.7,
-       UAX #9.  */
-
-/* List of mirrored character pairs.  This is a subset of the characters
-   having the BidiMirrored property.  */
-static unsigned int mirror_pairs[][2] =
-{
-  { 0x0028, 0x0029 },
-  { 0x003C, 0x003E },
-  { 0x005B, 0x005D },
-  { 0x007B, 0x007D },
-  { 0x00AB, 0x00BB },
-  { 0x2039, 0x203A },
-  { 0x2045, 0x2046 },
-  { 0x207D, 0x207E },
-  { 0x208D, 0x208E },
-  { 0x2208, 0x220B },
-  { 0x220A, 0x220D },
-  { 0x223C, 0x223D },
-  { 0x2243, 0x22CD },
-  { 0x2252, 0x2253 },
-  { 0x2254, 0x2255 },
-  { 0x2264, 0x2265 },
-  { 0x2266, 0x2267 },
-  { 0x226A, 0x226B },
-  { 0x2276, 0x2277 },
-  { 0x2278, 0x2279 },
-  { 0x227A, 0x227B },
-  { 0x227C, 0x227D },
-  { 0x2282, 0x2283 },
-  { 0x2286, 0x2287 },
-  { 0x228F, 0x2290 },
-  { 0x2291, 0x2292 },
-  { 0x22A2, 0x22A3 },
-  { 0x22B0, 0x22B1 },
-  { 0x22B2, 0x22B3 },
-  { 0x22B4, 0x22B5 },
-  { 0x22B6, 0x22B7 },
-  { 0x22C9, 0x22CA },
-  { 0x22CB, 0x22CC },
-  { 0x22D0, 0x22D1 },
-  { 0x22D6, 0x22D7 },
-  { 0x22D8, 0x22D9 },
-  { 0x22DA, 0x22DB },
-  { 0x22DC, 0x22DD },
-  { 0x22DE, 0x22DF },
-  { 0x22F0, 0x22F1 },
-  { 0x2308, 0x2309 },
-  { 0x230A, 0x230B },
-  { 0x2329, 0x232A },
-  { 0x3008, 0x3009 },
-  { 0x300A, 0x300B },
-  { 0x300C, 0x300D },
-  { 0x300E, 0x300F },
-  { 0x3010, 0x3011 },
-  { 0x3014, 0x3015 },
-  { 0x3016, 0x3017 },
-  { 0x3018, 0x3019 },
-  { 0x301A, 0x301B }
-};
-
-static int
-get_mirror_value (unsigned int ch)
-{
-  bool mirrored;
-  unsigned int mirror_char;
-  unsigned int i;
-
-  mirrored = (unicode_attributes[ch].name != NULL
-             && unicode_attributes[ch].mirrored);
-  mirror_char = 0xfffd;
-  for (i = 0; i < sizeof (mirror_pairs) / sizeof (mirror_pairs[0]); i++)
-    if (ch == mirror_pairs[i][0])
-      {
-       mirror_char = mirror_pairs[i][1];
-       break;
-      }
-    else if (ch == mirror_pairs[i][1])
-      {
-       mirror_char = mirror_pairs[i][0];
-       break;
-      }
-  if (mirrored)
-    return (int) mirror_char - (int) ch;
-  else
-    {
-      if (mirror_char != 0xfffd)
-       abort ();
-      return 0;
-    }
-}
-
-/* Construction of sparse 3-level tables.  */
-#define TABLE mirror_table
-#define ELEMENT int32_t
-#define DEFAULT 0
-#define xmalloc malloc
-#define xrealloc realloc
-#include "3level.h"
-
-/* Output the per-character mirror table.  */
-static void
-output_mirror (const char *filename, const char *version)
-{
-  FILE *stream;
-  unsigned int ch, i;
-  struct mirror_table t;
-  unsigned int level1_offset, level2_offset, level3_offset;
-
-  stream = fopen (filename, "w");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "cannot open '%s' for writing\n", filename);
-      exit (1);
-    }
-
-  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
-  fprintf (stream, "/* Mirrored Unicode characters.  */\n");
-  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
-          version);
-
-  t.p = 7;
-  t.q = 9;
-  mirror_table_init (&t);
-
-  for (ch = 0; ch < 0x110000; ch++)
-    {
-      int value = get_mirror_value (ch);
-
-      mirror_table_add (&t, ch, value);
-    }
-
-  mirror_table_finalize (&t);
-
-  /* Offsets in t.result, in memory of this process.  */
-  level1_offset =
-    5 * sizeof (uint32_t);
-  level2_offset =
-    5 * sizeof (uint32_t)
-    + t.level1_size * sizeof (uint32_t);
-  level3_offset =
-    5 * sizeof (uint32_t)
-    + t.level1_size * sizeof (uint32_t)
-    + (t.level2_size << t.q) * sizeof (uint32_t);
-
-  for (i = 0; i < 5; i++)
-    fprintf (stream, "#define mirror_header_%d %d\n", i,
-            ((uint32_t *) t.result)[i]);
-  fprintf (stream, "static const\n");
-  fprintf (stream, "struct\n");
-  fprintf (stream, "  {\n");
-  fprintf (stream, "    int level1[%zu];\n", t.level1_size);
-  fprintf (stream, "    short level2[%zu << %d];\n", t.level2_size, t.q);
-  fprintf (stream, "    int level3[%zu << %d];\n", t.level3_size, t.p);
-  fprintf (stream, "  }\n");
-  fprintf (stream, "u_mirror =\n");
-  fprintf (stream, "{\n");
-  fprintf (stream, "  {");
-  if (t.level1_size > 8)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < t.level1_size; i++)
-    {
-      uint32_t offset;
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream, "\n   ");
-      offset = ((uint32_t *) (t.result + level1_offset))[i];
-      if (offset == 0)
-       fprintf (stream, " %5d", -1);
-      else
-       fprintf (stream, " %5zd",
-                (offset - level2_offset) / sizeof (uint32_t));
-      if (i+1 < t.level1_size)
-       fprintf (stream, ",");
-    }
-  if (t.level1_size > 8)
-    fprintf (stream, "\n ");
-  fprintf (stream, " },\n");
-  fprintf (stream, "  {");
-  if (t.level2_size << t.q > 8)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < t.level2_size << t.q; i++)
-    {
-      uint32_t offset;
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream, "\n   ");
-      offset = ((uint32_t *) (t.result + level2_offset))[i];
-      if (offset == 0)
-       fprintf (stream, " %5d", -1);
-      else
-       fprintf (stream, " %5zd",
-                (offset - level3_offset) / sizeof (int32_t));
-      if (i+1 < t.level2_size << t.q)
-       fprintf (stream, ",");
-    }
-  if (t.level2_size << t.q > 8)
-    fprintf (stream, "\n ");
-  fprintf (stream, " },\n");
-  fprintf (stream, "  {");
-  if (t.level3_size << t.p > 8)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < t.level3_size << t.p; i++)
-    {
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream, "\n   ");
-      fprintf (stream, " %5d", ((int32_t *) (t.result + level3_offset))[i]);
-      if (i+1 < t.level3_size << t.p)
-       fprintf (stream, ",");
-    }
-  if (t.level3_size << t.p > 8)
-    fprintf (stream, "\n ");
-  fprintf (stream, " }\n");
-  fprintf (stream, "};\n");
-
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error writing to '%s'\n", filename);
-      exit (1);
-    }
-}
-
-/* ========================================================================= */
-
-/* Properties.  */
-
-/* Reading PropList.txt and DerivedCoreProperties.txt.  */
-enum
-{
-  /* PropList.txt */
-  PROP_WHITE_SPACE,
-  PROP_BIDI_CONTROL,
-  PROP_JOIN_CONTROL,
-  PROP_DASH,
-  PROP_HYPHEN,
-  PROP_QUOTATION_MARK,
-  PROP_TERMINAL_PUNCTUATION,
-  PROP_OTHER_MATH,
-  PROP_HEX_DIGIT,
-  PROP_ASCII_HEX_DIGIT,
-  PROP_OTHER_ALPHABETIC,
-  PROP_IDEOGRAPHIC,
-  PROP_DIACRITIC,
-  PROP_EXTENDER,
-  PROP_OTHER_LOWERCASE,
-  PROP_OTHER_UPPERCASE,
-  PROP_NONCHARACTER_CODE_POINT,
-  PROP_OTHER_GRAPHEME_EXTEND,
-  PROP_IDS_BINARY_OPERATOR,
-  PROP_IDS_TRINARY_OPERATOR,
-  PROP_RADICAL,
-  PROP_UNIFIED_IDEOGRAPH,
-  PROP_OTHER_DEFAULT_IGNORABLE_CODE_POINT,
-  PROP_DEPRECATED,
-  PROP_SOFT_DOTTED,
-  PROP_LOGICAL_ORDER_EXCEPTION,
-  PROP_OTHER_ID_START,
-  PROP_OTHER_ID_CONTINUE,
-  PROP_STERM,
-  PROP_VARIATION_SELECTOR,
-  PROP_PATTERN_WHITE_SPACE,
-  PROP_PATTERN_SYNTAX,
-  /* DerivedCoreProperties.txt */
-  PROP_MATH,
-  PROP_ALPHABETIC,
-  PROP_LOWERCASE,
-  PROP_UPPERCASE,
-  PROP_ID_START,
-  PROP_ID_CONTINUE,
-  PROP_XID_START,
-  PROP_XID_CONTINUE,
-  PROP_DEFAULT_IGNORABLE_CODE_POINT,
-  PROP_GRAPHEME_EXTEND,
-  PROP_GRAPHEME_BASE,
-  PROP_GRAPHEME_LINK
-};
-unsigned long long unicode_properties[0x110000];
-
-static void
-clear_properties (void)
-{
-  unsigned int i;
-
-  for (i = 0; i < 0x110000; i++)
-    unicode_properties[i] = 0;
-}
-
-/* Stores in unicode_properties[] the properties from the
-   PropList.txt or DerivedCoreProperties.txt file.  */
-static void
-fill_properties (const char *proplist_filename)
-{
-  unsigned int i;
-  FILE *stream;
-
-  stream = fopen (proplist_filename, "r");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "error during fopen of '%s'\n", proplist_filename);
-      exit (1);
-    }
-
-  for (;;)
-    {
-      char buf[200+1];
-      unsigned int i1, i2;
-      char padding[200+1];
-      char propname[200+1];
-      unsigned int propvalue;
-
-      if (fscanf (stream, "%200[^\n]\n", buf) < 1)
-       break;
-
-      if (buf[0] == '\0' || buf[0] == '#')
-       continue;
-
-      if (sscanf (buf, "%X..%X%[ ;]%[^ ]", &i1, &i2, padding, propname) != 4)
-       {
-         if (sscanf (buf, "%X%[ ;]%[^ ]", &i1, padding, propname) != 3)
-           {
-             fprintf (stderr, "parse error in '%s'\n", proplist_filename);
-             exit (1);
-           }
-         i2 = i1;
-       }
-#define PROP(name,value) \
-      if (strcmp (propname, name) == 0) propvalue = value; else
-      /* PropList.txt */
-      PROP ("White_Space", PROP_WHITE_SPACE)
-      PROP ("Bidi_Control", PROP_BIDI_CONTROL)
-      PROP ("Join_Control", PROP_JOIN_CONTROL)
-      PROP ("Dash", PROP_DASH)
-      PROP ("Hyphen", PROP_HYPHEN)
-      PROP ("Quotation_Mark", PROP_QUOTATION_MARK)
-      PROP ("Terminal_Punctuation", PROP_TERMINAL_PUNCTUATION)
-      PROP ("Other_Math", PROP_OTHER_MATH)
-      PROP ("Hex_Digit", PROP_HEX_DIGIT)
-      PROP ("ASCII_Hex_Digit", PROP_ASCII_HEX_DIGIT)
-      PROP ("Other_Alphabetic", PROP_OTHER_ALPHABETIC)
-      PROP ("Ideographic", PROP_IDEOGRAPHIC)
-      PROP ("Diacritic", PROP_DIACRITIC)
-      PROP ("Extender", PROP_EXTENDER)
-      PROP ("Other_Lowercase", PROP_OTHER_LOWERCASE)
-      PROP ("Other_Uppercase", PROP_OTHER_UPPERCASE)
-      PROP ("Noncharacter_Code_Point", PROP_NONCHARACTER_CODE_POINT)
-      PROP ("Other_Grapheme_Extend", PROP_OTHER_GRAPHEME_EXTEND)
-      PROP ("IDS_Binary_Operator", PROP_IDS_BINARY_OPERATOR)
-      PROP ("IDS_Trinary_Operator", PROP_IDS_TRINARY_OPERATOR)
-      PROP ("Radical", PROP_RADICAL)
-      PROP ("Unified_Ideograph", PROP_UNIFIED_IDEOGRAPH)
-      PROP ("Other_Default_Ignorable_Code_Point", PROP_OTHER_DEFAULT_IGNORABLE_CODE_POINT)
-      PROP ("Deprecated", PROP_DEPRECATED)
-      PROP ("Soft_Dotted", PROP_SOFT_DOTTED)
-      PROP ("Logical_Order_Exception", PROP_LOGICAL_ORDER_EXCEPTION)
-      PROP ("Other_ID_Start", PROP_OTHER_ID_START)
-      PROP ("Other_ID_Continue", PROP_OTHER_ID_CONTINUE)
-      PROP ("STerm", PROP_STERM)
-      PROP ("Variation_Selector", PROP_VARIATION_SELECTOR)
-      PROP ("Pattern_White_Space", PROP_PATTERN_WHITE_SPACE)
-      PROP ("Pattern_Syntax", PROP_PATTERN_SYNTAX)
-      /* DerivedCoreProperties.txt */
-      PROP ("Math", PROP_MATH)
-      PROP ("Alphabetic", PROP_ALPHABETIC)
-      PROP ("Lowercase", PROP_LOWERCASE)
-      PROP ("Uppercase", PROP_UPPERCASE)
-      PROP ("ID_Start", PROP_ID_START)
-      PROP ("ID_Continue", PROP_ID_CONTINUE)
-      PROP ("XID_Start", PROP_XID_START)
-      PROP ("XID_Continue", PROP_XID_CONTINUE)
-      PROP ("Default_Ignorable_Code_Point", PROP_DEFAULT_IGNORABLE_CODE_POINT)
-      PROP ("Grapheme_Extend", PROP_GRAPHEME_EXTEND)
-      PROP ("Grapheme_Base", PROP_GRAPHEME_BASE)
-      PROP ("Grapheme_Link", PROP_GRAPHEME_LINK)
-#undef PROP
-       {
-         fprintf (stderr, "unknown property named '%s' in '%s'\n", propname,
-                  proplist_filename);
-         exit (1);
-       }
-      if (!(i1 <= i2 && i2 < 0x110000))
-       abort ();
-
-      for (i = i1; i <= i2; i++)
-       unicode_properties[i] |= 1ULL << propvalue;
-    }
-
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error reading from '%s'\n", proplist_filename);
-      exit (1);
-    }
-}
-
-/* Stores in array the given property from the Unicode 3.0 PropList.txt
-   file.  */
-static void
-fill_property30 (char array[0x110000], const char *proplist_filename, const char *property_name)
-{
-  unsigned int i;
-  FILE *stream;
-  char buf[100+1];
-
-  for (i = 0; i < 0x110000; i++)
-    array[i] = 0;
-
-  stream = fopen (proplist_filename, "r");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "error during fopen of '%s'\n", proplist_filename);
-      exit (1);
-    }
-
-  /* Search for the "Property dump for: ..." line.  */
-  do
-    {
-      if (fscanf (stream, "%100[^\n]\n", buf) < 1)
-       {
-         fprintf (stderr, "no property found in '%s'\n", proplist_filename);
-         exit (1);
-       }
-    }
-  while (strstr (buf, property_name) == NULL);
-
-  for (;;)
-    {
-      unsigned int i1, i2;
-
-      if (fscanf (stream, "%100[^\n]\n", buf) < 1)
-       break;
-      if (buf[0] == '*')
-       break;
-      if (strlen (buf) >= 10 && buf[4] == '.' && buf[5] == '.')
-       {
-         if (sscanf (buf, "%4X..%4X", &i1, &i2) < 2)
-           {
-             fprintf (stderr, "parse error in property in '%s'\n",
-                      proplist_filename);
-             exit (1);
-           }
-       }
-      else if (strlen (buf) >= 4)
-       {
-         if (sscanf (buf, "%4X", &i1) < 1)
-           {
-             fprintf (stderr, "parse error in property in '%s'\n",
-                      proplist_filename);
-             exit (1);
-           }
-         i2 = i1;
-       }
-      else
-       {
-         fprintf (stderr, "parse error in property in '%s'\n",
-                  proplist_filename);
-         exit (1);
-       }
-      if (!(i1 <= i2 && i2 < 0x110000))
-       abort ();
-      for (i = i1; i <= i2; i++)
-       array[i] = 1;
-    }
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error reading from '%s'\n", proplist_filename);
-      exit (1);
-    }
-}
-
-/* Properties from Unicode 3.0 PropList.txt file.  */
-
-/* The paired punctuation property from the PropList.txt file.  */
-char unicode_pairedpunctuation[0x110000];
-
-/* The left of pair property from the PropList.txt file.  */
-char unicode_leftofpair[0x110000];
-
-static void
-fill_properties30 (const char *proplist30_filename)
-{
-  fill_property30 (unicode_pairedpunctuation, proplist30_filename, "(Paired Punctuation)");
-  fill_property30 (unicode_leftofpair, proplist30_filename, "(Left of Pair)");
-}
-
-/* ------------------------------------------------------------------------- */
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_white_space (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_WHITE_SPACE)) != 0);
-}
-
-/* See Unicode 3.0 book, section 4.10,
-       PropList.txt, UCD.html,
-       DerivedCoreProperties.txt, UCD.html.  */
-static bool
-is_property_alphabetic (unsigned int ch)
-{
-  bool result1 =
-    is_category_L (ch)
-    || ((unicode_properties[ch] & (1ULL << PROP_OTHER_ALPHABETIC)) != 0)
-    /* For some reason, the following are listed as having property
-       Alphabetic but not as having property Other_Alphabetic.  */
-    || (ch >= 0x16EE && ch <= 0x16F0) /* RUNIC SYMBOLS */
-    || (ch >= 0x2160 && ch <= 0x2182) /* ROMAN NUMERALS */
-    || (ch >= 0x24D0 && ch <= 0x24E9) /* CIRCLED LATIN SMALL LETTER */
-    || (ch == 0x3007) /* IDEOGRAPHIC NUMBER ZERO */
-    || (ch >= 0x3021 && ch <= 0x3029) /* HANGZHOU NUMERAL */
-    || (ch >= 0x3038 && ch <= 0x303A) /* HANGZHOU NUMERAL */
-    || (ch >= 0x10140 && ch <= 0x10174) /* GREEK ACROPHONICS */
-    || (ch == 0x10341) /* GOTHIC LETTER NINETY */
-    || (ch == 0x1034A) /* GOTHIC LETTER NINE HUNDRED */
-    || (ch >= 0x103D1 && ch <= 0x103D5) /* OLD PERSIAN NUMBERS */
-    || (ch >= 0x12400 && ch <= 0x12462); /* CUNEIFORM NUMERIC SIGNS */
-  bool result2 =
-    ((unicode_properties[ch] & (1ULL << PROP_ALPHABETIC)) != 0);
-
-  if (result1 != result2)
-    abort ();
-  return result1;
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_other_alphabetic (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_OTHER_ALPHABETIC)) != 0);
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_not_a_character (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_NONCHARACTER_CODE_POINT)) != 0);
-}
-
-/* See PropList.txt, UCD.html,
-       DerivedCoreProperties.txt, UCD.html.  */
-static bool
-is_property_default_ignorable_code_point (unsigned int ch)
-{
-  bool result1 =
-    (is_category_Cf (ch)
-     && !(ch >= 0xFFF9 && ch <= 0xFFFB)) /* Annotations */
-    || ((is_category_Cc (ch) || is_category_Cs (ch))
-       && !is_property_white_space (ch))
-    || ((unicode_properties[ch] & (1ULL << PROP_OTHER_DEFAULT_IGNORABLE_CODE_POINT)) != 0)
-    || ((unicode_properties[ch] & (1ULL << PROP_VARIATION_SELECTOR)) != 0)
-    || is_property_not_a_character (ch);
-  bool result2 =
-    ((unicode_properties[ch] & (1ULL << PROP_DEFAULT_IGNORABLE_CODE_POINT)) != 0);
-
-  if (result1 != result2)
-    abort ();
-  return result1;
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_other_default_ignorable_code_point (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_OTHER_DEFAULT_IGNORABLE_CODE_POINT)) != 0);
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_deprecated (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_DEPRECATED)) != 0);
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_logical_order_exception (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_LOGICAL_ORDER_EXCEPTION)) != 0);
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_variation_selector (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_VARIATION_SELECTOR)) != 0);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_private_use (unsigned int ch)
-{
-  /* Determined through "grep 'Private Use,' UnicodeData-3.1.0.txt".  */
-  return (ch >= 0xE000 && ch <= 0xF8FF)
-        || (ch >= 0xF0000 && ch <= 0xFFFFD)
-        || (ch >= 0x100000 && ch <= 0x10FFFD);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_unassigned_code_value (unsigned int ch)
-{
-  return (is_category_Cn (ch) && !is_property_not_a_character (ch));
-}
-
-/* See PropList.txt, UCD.html,
-       DerivedCoreProperties.txt, UCD.html.  */
-static bool
-is_property_uppercase (unsigned int ch)
-{
-  bool result1 =
-    is_category_Lu (ch)
-    || ((unicode_properties[ch] & (1ULL << PROP_OTHER_UPPERCASE)) != 0);
-  bool result2 =
-    ((unicode_properties[ch] & (1ULL << PROP_UPPERCASE)) != 0);
-
-  if (result1 != result2)
-    abort ();
-  return result1;
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_other_uppercase (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_OTHER_UPPERCASE)) != 0);
-}
-
-/* See PropList.txt, UCD.html,
-       DerivedCoreProperties.txt, UCD.html.  */
-static bool
-is_property_lowercase (unsigned int ch)
-{
-  bool result1 =
-    is_category_Ll (ch)
-    || ((unicode_properties[ch] & (1ULL << PROP_OTHER_LOWERCASE)) != 0);
-  bool result2 =
-    ((unicode_properties[ch] & (1ULL << PROP_LOWERCASE)) != 0);
-
-  if (result1 != result2)
-    abort ();
-  return result1;
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_other_lowercase (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_OTHER_LOWERCASE)) != 0);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_titlecase (unsigned int ch)
-{
-  return is_category_Lt (ch);
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_soft_dotted (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_SOFT_DOTTED)) != 0);
-}
-
-/* See DerivedCoreProperties.txt, UCD.html.  */
-static bool
-is_property_id_start (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_ID_START)) != 0);
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_other_id_start (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_OTHER_ID_START)) != 0);
-}
-
-/* See DerivedCoreProperties.txt, UCD.html.  */
-static bool
-is_property_id_continue (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_ID_CONTINUE)) != 0);
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_other_id_continue (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_OTHER_ID_CONTINUE)) != 0);
-}
-
-/* See DerivedCoreProperties.txt, UCD.html.  */
-static bool
-is_property_xid_start (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_XID_START)) != 0);
-}
-
-/* See DerivedCoreProperties.txt, UCD.html.  */
-static bool
-is_property_xid_continue (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_XID_CONTINUE)) != 0);
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_pattern_white_space (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_PATTERN_WHITE_SPACE)) != 0);
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_pattern_syntax (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_PATTERN_SYNTAX)) != 0);
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_join_control (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_JOIN_CONTROL)) != 0);
-}
-
-/* See DerivedCoreProperties.txt, UCD.html.  */
-static bool
-is_property_grapheme_base (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_GRAPHEME_BASE)) != 0);
-}
-
-/* See DerivedCoreProperties.txt, UCD.html.  */
-static bool
-is_property_grapheme_extend (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_GRAPHEME_EXTEND)) != 0);
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_other_grapheme_extend (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_OTHER_GRAPHEME_EXTEND)) != 0);
-}
-
-/* See DerivedCoreProperties.txt, UCD.html.  */
-static bool
-is_property_grapheme_link (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_GRAPHEME_LINK)) != 0);
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_bidi_control (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_BIDI_CONTROL)) != 0);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_bidi_left_to_right (unsigned int ch)
-{
-  return (get_bidi_category (ch) == UC_BIDI_L);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_bidi_hebrew_right_to_left (unsigned int ch)
-{
-  return (get_bidi_category (ch) == UC_BIDI_R);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_bidi_arabic_right_to_left (unsigned int ch)
-{
-  return (get_bidi_category (ch) == UC_BIDI_AL);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_bidi_european_digit (unsigned int ch)
-{
-  return (get_bidi_category (ch) == UC_BIDI_EN);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_bidi_eur_num_separator (unsigned int ch)
-{
-  return (get_bidi_category (ch) == UC_BIDI_ES);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_bidi_eur_num_terminator (unsigned int ch)
-{
-  return (get_bidi_category (ch) == UC_BIDI_ET);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_bidi_arabic_digit (unsigned int ch)
-{
-  return (get_bidi_category (ch) == UC_BIDI_AN);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_bidi_common_separator (unsigned int ch)
-{
-  return (get_bidi_category (ch) == UC_BIDI_CS);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_bidi_block_separator (unsigned int ch)
-{
-  return (get_bidi_category (ch) == UC_BIDI_B);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_bidi_segment_separator (unsigned int ch)
-{
-  return (get_bidi_category (ch) == UC_BIDI_S);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_bidi_whitespace (unsigned int ch)
-{
-  return (get_bidi_category (ch) == UC_BIDI_WS);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_bidi_non_spacing_mark (unsigned int ch)
-{
-  return (get_bidi_category (ch) == UC_BIDI_NSM);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_bidi_boundary_neutral (unsigned int ch)
-{
-  return (get_bidi_category (ch) == UC_BIDI_BN);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_bidi_pdf (unsigned int ch)
-{
-  return (get_bidi_category (ch) == UC_BIDI_PDF);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_bidi_embedding_or_override (unsigned int ch)
-{
-  int category = get_bidi_category (ch);
-  return (category == UC_BIDI_LRE || category == UC_BIDI_LRO
-         || category == UC_BIDI_RLE || category == UC_BIDI_RLO);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_bidi_other_neutral (unsigned int ch)
-{
-  return (get_bidi_category (ch) == UC_BIDI_ON);
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_hex_digit (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_HEX_DIGIT)) != 0);
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_ascii_hex_digit (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_ASCII_HEX_DIGIT)) != 0);
-}
-
-/* See Unicode 3.0 book, section 4.10,
-       PropList.txt, UCD.html.  */
-static bool
-is_property_ideographic (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_IDEOGRAPHIC)) != 0);
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_unified_ideograph (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_UNIFIED_IDEOGRAPH)) != 0);
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_radical (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_RADICAL)) != 0);
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_ids_binary_operator (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_IDS_BINARY_OPERATOR)) != 0);
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_ids_trinary_operator (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_IDS_TRINARY_OPERATOR)) != 0);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_zero_width (unsigned int ch)
-{
-  return is_category_Cf (ch)
-        || (unicode_attributes[ch].name != NULL
-            && strstr (unicode_attributes[ch].name, "ZERO WIDTH") != NULL);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_space (unsigned int ch)
-{
-  return is_category_Zs (ch);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_non_break (unsigned int ch)
-{
-  /* This is exactly the set of characters having line breaking
-     property GL.  */
-  return (ch == 0x00A0 /* NO-BREAK SPACE */
-         || ch == 0x034F /* COMBINING GRAPHEME JOINER */
-         || ch == 0x035C /* COMBINING DOUBLE BREVE BELOW */
-         || ch == 0x035D /* COMBINING DOUBLE BREVE */
-         || ch == 0x035E /* COMBINING DOUBLE MACRON */
-         || ch == 0x035F /* COMBINING DOUBLE MACRON BELOW */
-         || ch == 0x0360 /* COMBINING DOUBLE TILDE */
-         || ch == 0x0361 /* COMBINING DOUBLE INVERTED BREVE */
-         || ch == 0x0362 /* COMBINING DOUBLE RIGHTWARDS ARROW BELOW */
-         || ch == 0x0F08 /* TIBETAN MARK SBRUL SHAD */
-         || ch == 0x0F0C /* TIBETAN MARK DELIMITER TSHEG BSTAR */
-         || ch == 0x0F12 /* TIBETAN MARK RGYA GRAM SHAD */
-         || ch == 0x180E /* MONGOLIAN VOWEL SEPARATOR */
-         || ch == 0x2007 /* FIGURE SPACE */
-         || ch == 0x2011 /* NON-BREAKING HYPHEN */
-         || ch == 0x202F /* NARROW NO-BREAK SPACE */);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_iso_control (unsigned int ch)
-{
-  bool result1 =
-    (unicode_attributes[ch].name != NULL
-     && strcmp (unicode_attributes[ch].name, "<control>") == 0);
-  bool result2 =
-    is_category_Cc (ch);
-
-  if (result1 != result2)
-    abort ();
-  return result1;
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_format_control (unsigned int ch)
-{
-  return (is_category_Cf (ch)
-         && get_bidi_category (ch) == UC_BIDI_BN
-         && !is_property_join_control (ch)
-         && ch != 0xFEFF);
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_dash (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_DASH)) != 0);
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_hyphen (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_HYPHEN)) != 0);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_punctuation (unsigned int ch)
-{
-  return is_category_P (ch);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_line_separator (unsigned int ch)
-{
-  return is_category_Zl (ch);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_paragraph_separator (unsigned int ch)
-{
-  return is_category_Zp (ch);
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_quotation_mark (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_QUOTATION_MARK)) != 0);
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_sentence_terminal (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_STERM)) != 0);
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_terminal_punctuation (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_TERMINAL_PUNCTUATION)) != 0);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_currency_symbol (unsigned int ch)
-{
-  return is_category_Sc (ch);
-}
-
-/* See Unicode 3.0 book, section 4.9,
-       PropList.txt, UCD.html,
-       DerivedCoreProperties.txt, UCD.html.  */
-static bool
-is_property_math (unsigned int ch)
-{
-  bool result1 =
-    is_category_Sm (ch)
-    || ((unicode_properties[ch] & (1ULL << PROP_OTHER_MATH)) != 0);
-  bool result2 =
-    ((unicode_properties[ch] & (1ULL << PROP_MATH)) != 0);
-
-  if (result1 != result2)
-    abort ();
-  return result1;
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_other_math (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_OTHER_MATH)) != 0);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_paired_punctuation (unsigned int ch)
-{
-  return unicode_pairedpunctuation[ch];
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_left_of_pair (unsigned int ch)
-{
-  return unicode_leftofpair[ch];
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_combining (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && (strcmp (unicode_attributes[ch].combining, "0") != 0
-             || is_category_Mc (ch)
-             || is_category_Me (ch)
-             || is_category_Mn (ch)));
-}
-
-#if 0 /* same as is_property_bidi_non_spacing_mark */
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_non_spacing (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && get_bidi_category (ch) == UC_BIDI_NSM);
-}
-#endif
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_composite (unsigned int ch)
-{
-  /* This definition differs from the one in PropList-3.0.1.txt, but is more
-     logical in some sense.  */
-  if (ch >= 0xAC00 && ch <= 0xD7A4) /* Hangul Syllables */
-    return true;
-  if (unicode_attributes[ch].name != NULL
-      && unicode_attributes[ch].decomposition != NULL)
-    {
-      /* Test whether the decomposition contains more than one character,
-        and the first is not a space.  */
-      const char *decomp = unicode_attributes[ch].decomposition;
-      if (decomp[0] == '<')
-       {
-         decomp = strchr (decomp, '>') + 1;
-         if (decomp[0] == ' ')
-           decomp++;
-       }
-      return strchr (decomp, ' ') != NULL && strncmp (decomp, "0020 ", 5) != 0;
-    }
-  return false;
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_decimal_digit (unsigned int ch)
-{
-  return is_category_Nd (ch);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_numeric (unsigned int ch)
-{
-  return ((get_numeric_value (ch)).denominator > 0)
-        || (ch == 0x09F8) /* BENGALI CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR */
-        || (ch == 0x2183); /* ROMAN NUMERAL REVERSED ONE HUNDRED */
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_diacritic (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_DIACRITIC)) != 0);
-}
-
-/* See PropList.txt, UCD.html.  */
-static bool
-is_property_extender (unsigned int ch)
-{
-  return ((unicode_properties[ch] & (1ULL << PROP_EXTENDER)) != 0);
-}
-
-/* See PropList-3.0.1.txt.  */
-static bool
-is_property_ignorable_control (unsigned int ch)
-{
-  return ((is_category_Cc (ch) && get_bidi_category (ch) == UC_BIDI_BN)
-         || is_category_Cf (ch))
-        && ch != 0x0000;
-}
-
-/* ------------------------------------------------------------------------- */
-
-/* Output all properties.  */
-static void
-output_properties (const char *version)
-{
-#define PROPERTY(P) \
-  debug_output_predicate ("pr_" #P ".txt", is_property_ ## P); \
-  output_predicate_test ("test-pr_" #P ".c", is_property_ ## P, "uc_is_property_" #P " (c)"); \
-  output_predicate ("pr_" #P ".h", is_property_ ## P, "u_property_" #P, "Properties", version);
-  PROPERTY(white_space)
-  PROPERTY(alphabetic)
-  PROPERTY(other_alphabetic)
-  PROPERTY(not_a_character)
-  PROPERTY(default_ignorable_code_point)
-  PROPERTY(other_default_ignorable_code_point)
-  PROPERTY(deprecated)
-  PROPERTY(logical_order_exception)
-  PROPERTY(variation_selector)
-  PROPERTY(private_use)
-  PROPERTY(unassigned_code_value)
-  PROPERTY(uppercase)
-  PROPERTY(other_uppercase)
-  PROPERTY(lowercase)
-  PROPERTY(other_lowercase)
-  PROPERTY(titlecase)
-  PROPERTY(soft_dotted)
-  PROPERTY(id_start)
-  PROPERTY(other_id_start)
-  PROPERTY(id_continue)
-  PROPERTY(other_id_continue)
-  PROPERTY(xid_start)
-  PROPERTY(xid_continue)
-  PROPERTY(pattern_white_space)
-  PROPERTY(pattern_syntax)
-  PROPERTY(join_control)
-  PROPERTY(grapheme_base)
-  PROPERTY(grapheme_extend)
-  PROPERTY(other_grapheme_extend)
-  PROPERTY(grapheme_link)
-  PROPERTY(bidi_control)
-  PROPERTY(bidi_left_to_right)
-  PROPERTY(bidi_hebrew_right_to_left)
-  PROPERTY(bidi_arabic_right_to_left)
-  PROPERTY(bidi_european_digit)
-  PROPERTY(bidi_eur_num_separator)
-  PROPERTY(bidi_eur_num_terminator)
-  PROPERTY(bidi_arabic_digit)
-  PROPERTY(bidi_common_separator)
-  PROPERTY(bidi_block_separator)
-  PROPERTY(bidi_segment_separator)
-  PROPERTY(bidi_whitespace)
-  PROPERTY(bidi_non_spacing_mark)
-  PROPERTY(bidi_boundary_neutral)
-  PROPERTY(bidi_pdf)
-  PROPERTY(bidi_embedding_or_override)
-  PROPERTY(bidi_other_neutral)
-  PROPERTY(hex_digit)
-  PROPERTY(ascii_hex_digit)
-  PROPERTY(ideographic)
-  PROPERTY(unified_ideograph)
-  PROPERTY(radical)
-  PROPERTY(ids_binary_operator)
-  PROPERTY(ids_trinary_operator)
-  PROPERTY(zero_width)
-  PROPERTY(space)
-  PROPERTY(non_break)
-  PROPERTY(iso_control)
-  PROPERTY(format_control)
-  PROPERTY(dash)
-  PROPERTY(hyphen)
-  PROPERTY(punctuation)
-  PROPERTY(line_separator)
-  PROPERTY(paragraph_separator)
-  PROPERTY(quotation_mark)
-  PROPERTY(sentence_terminal)
-  PROPERTY(terminal_punctuation)
-  PROPERTY(currency_symbol)
-  PROPERTY(math)
-  PROPERTY(other_math)
-  PROPERTY(paired_punctuation)
-  PROPERTY(left_of_pair)
-  PROPERTY(combining)
-  PROPERTY(composite)
-  PROPERTY(decimal_digit)
-  PROPERTY(numeric)
-  PROPERTY(diacritic)
-  PROPERTY(extender)
-  PROPERTY(ignorable_control)
-#undef PROPERTY
-}
-
-/* ========================================================================= */
-
-/* Scripts.  */
-
-static const char *scripts[256];
-static unsigned int numscripts;
-
-static uint8_t unicode_scripts[0x110000];
-
-static void
-fill_scripts (const char *scripts_filename)
-{
-  FILE *stream;
-  unsigned int i;
-
-  stream = fopen (scripts_filename, "r");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "error during fopen of '%s'\n", scripts_filename);
-      exit (1);
-    }
-
-  numscripts = 0;
-
-  for (i = 0; i < 0x110000; i++)
-    unicode_scripts[i] = (uint8_t)~(uint8_t)0;
-
-  for (;;)
-    {
-      char buf[200+1];
-      unsigned int i1, i2;
-      char padding[200+1];
-      char scriptname[200+1];
-      int script;
-
-      if (fscanf (stream, "%200[^\n]\n", buf) < 1)
-       break;
-
-      if (buf[0] == '\0' || buf[0] == '#')
-       continue;
-
-      if (sscanf (buf, "%X..%X%[ ;]%[^ ]", &i1, &i2, padding, scriptname) != 4)
-       {
-         if (sscanf (buf, "%X%[ ;]%[^ ]", &i1, padding, scriptname) != 3)
-           {
-             fprintf (stderr, "parse error in '%s'\n", scripts_filename);
-             exit (1);
-           }
-         i2 = i1;
-       }
-      if (i2 < i1)
-       abort ();
-      if (i2 >= 0x110000)
-       abort ();
-
-      for (script = numscripts - 1; script >= 0; script--)
-       if (strcmp (scripts[script], scriptname) == 0)
-         break;
-      if (script < 0)
-       {
-         scripts[numscripts] = strdup (scriptname);
-         script = numscripts;
-         numscripts++;
-         if (numscripts == 256)
-           abort ();
-       }
-
-      for (i = i1; i <= i2; i++)
-       {
-         if (unicode_scripts[i] != (uint8_t)~(uint8_t)0)
-           fprintf (stderr, "0x%04X belongs to multiple scripts\n", i);
-         unicode_scripts[i] = script;
-       }
-    }
-
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error reading from '%s'\n", scripts_filename);
-      exit (1);
-    }
-}
-
-/* Construction of sparse 3-level tables.  */
-#define TABLE script_table
-#define ELEMENT uint8_t
-#define DEFAULT (uint8_t)~(uint8_t)0
-#define xmalloc malloc
-#define xrealloc realloc
-#include "3level.h"
-
-static void
-output_scripts (const char *version)
-{
-  const char *filename = "scripts.h";
-  FILE *stream;
-  unsigned int ch, s, i;
-  struct script_table t;
-  unsigned int level1_offset, level2_offset, level3_offset;
-
-  typedef struct
-  {
-    const char *lowercase_name;
-  }
-  scriptinfo_t;
-  scriptinfo_t scriptinfo[256];
-
-  stream = fopen (filename, "w");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "cannot open '%s' for writing\n", filename);
-      exit (1);
-    }
-
-  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
-  fprintf (stream, "/* Unicode scripts.  */\n");
-  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
-          version);
-
-  for (s = 0; s < numscripts; s++)
-    {
-      char *lcp = strdup (scripts[s]);
-      char *cp;
-
-      for (cp = lcp; *cp != '\0'; cp++)
-       if (*cp >= 'A' && *cp <= 'Z')
-         *cp += 'a' - 'A';
-
-      scriptinfo[s].lowercase_name = lcp;
-    }
-
-  for (s = 0; s < numscripts; s++)
-    {
-      fprintf (stream, "static const uc_interval_t script_%s_intervals[] =\n",
-              scriptinfo[s].lowercase_name);
-      fprintf (stream, "{\n");
-      i = 0;
-      for (ch = 0; ch < 0x110000; ch++)
-       if (unicode_scripts[ch] == s)
-         {
-           unsigned int start;
-           unsigned int end;
-
-           start = ch;
-           while (ch + 1 < 0x110000 && unicode_scripts[ch + 1] == s)
-             ch++;
-           end = ch;
-
-           if (i > 0)
-             fprintf (stream, ",\n");
-           if (start == end)
-             fprintf (stream, "  { 0x%04X, 1, 1 }", start);
-           else
-             fprintf (stream, "  { 0x%04X, 1, 0 }, { 0x%04X, 0, 1 }",
-                      start, end);
-           i++;
-         }
-      fprintf (stream, "\n");
-      fprintf (stream, "};\n");
-    }
-
-  fprintf (stream, "static const uc_script_t scripts[%d] =\n", numscripts);
-  fprintf (stream, "{\n");
-  for (s = 0; s < numscripts; s++)
-    {
-      fprintf (stream, "  {\n");
-      fprintf (stream, "    sizeof (script_%s_intervals) / sizeof (uc_interval_t),\n",
-              scriptinfo[s].lowercase_name);
-      fprintf (stream, "    script_%s_intervals,\n",
-              scriptinfo[s].lowercase_name);
-      fprintf (stream, "    \"%s\"\n", scripts[s]);
-      fprintf (stream, "  }");
-      if (s+1 < numscripts)
-       fprintf (stream, ",");
-      fprintf (stream, "\n");
-    }
-  fprintf (stream, "};\n");
-
-  t.p = 7;
-  t.q = 9;
-  script_table_init (&t);
-
-  for (ch = 0; ch < 0x110000; ch++)
-    {
-      unsigned int s = unicode_scripts[ch];
-      if (s != (uint8_t)~(uint8_t)0)
-       script_table_add (&t, ch, s);
-    }
-
-  script_table_finalize (&t);
-
-  /* Offsets in t.result, in memory of this process.  */
-  level1_offset =
-    5 * sizeof (uint32_t);
-  level2_offset =
-    5 * sizeof (uint32_t)
-    + t.level1_size * sizeof (uint32_t);
-  level3_offset =
-    5 * sizeof (uint32_t)
-    + t.level1_size * sizeof (uint32_t)
-    + (t.level2_size << t.q) * sizeof (uint32_t);
-
-  for (i = 0; i < 5; i++)
-    fprintf (stream, "#define script_header_%d %d\n", i,
-            ((uint32_t *) t.result)[i]);
-  fprintf (stream, "static const\n");
-  fprintf (stream, "struct\n");
-  fprintf (stream, "  {\n");
-  fprintf (stream, "    int level1[%zu];\n", t.level1_size);
-  fprintf (stream, "    short level2[%zu << %d];\n", t.level2_size, t.q);
-  fprintf (stream, "    unsigned char level3[%zu << %d];\n", t.level3_size, t.p);
-  fprintf (stream, "  }\n");
-  fprintf (stream, "u_script =\n");
-  fprintf (stream, "{\n");
-  fprintf (stream, "  {");
-  if (t.level1_size > 8)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < t.level1_size; i++)
-    {
-      uint32_t offset;
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream, "\n   ");
-      offset = ((uint32_t *) (t.result + level1_offset))[i];
-      if (offset == 0)
-       fprintf (stream, " %5d", -1);
-      else
-       fprintf (stream, " %5zd",
-                (offset - level2_offset) / sizeof (uint32_t));
-      if (i+1 < t.level1_size)
-       fprintf (stream, ",");
-    }
-  if (t.level1_size > 8)
-    fprintf (stream, "\n ");
-  fprintf (stream, " },\n");
-  fprintf (stream, "  {");
-  if (t.level2_size << t.q > 8)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < t.level2_size << t.q; i++)
-    {
-      uint32_t offset;
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream, "\n   ");
-      offset = ((uint32_t *) (t.result + level2_offset))[i];
-      if (offset == 0)
-       fprintf (stream, " %5d", -1);
-      else
-       fprintf (stream, " %5zd",
-                (offset - level3_offset) / sizeof (uint8_t));
-      if (i+1 < t.level2_size << t.q)
-       fprintf (stream, ",");
-    }
-  if (t.level2_size << t.q > 8)
-    fprintf (stream, "\n ");
-  fprintf (stream, " },\n");
-  fprintf (stream, "  {");
-  if (t.level3_size << t.p > 8)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < t.level3_size << t.p; i++)
-    {
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream, "\n   ");
-      fprintf (stream, " %3d", ((uint8_t *) (t.result + level3_offset))[i]);
-      if (i+1 < t.level3_size << t.p)
-       fprintf (stream, ",");
-    }
-  if (t.level3_size << t.p > 8)
-    fprintf (stream, "\n ");
-  fprintf (stream, " }\n");
-  fprintf (stream, "};\n");
-
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error writing to '%s'\n", filename);
-      exit (1);
-    }
-}
-
-static void
-output_scripts_byname (const char *version)
-{
-  const char *filename = "scripts_byname.gperf";
-  FILE *stream;
-  unsigned int s;
-
-  stream = fopen (filename, "w");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "cannot open '%s' for writing\n", filename);
-      exit (1);
-    }
-
-  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
-  fprintf (stream, "/* Unicode scripts.  */\n");
-  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
-          version);
-  fprintf (stream, "struct named_script { const char *name; unsigned int index; };\n");
-  fprintf (stream, "%%struct-type\n");
-  fprintf (stream, "%%language=ANSI-C\n");
-  fprintf (stream, "%%define hash-function-name scripts_hash\n");
-  fprintf (stream, "%%define lookup-function-name uc_script_lookup\n");
-  fprintf (stream, "%%readonly-tables\n");
-  fprintf (stream, "%%global-table\n");
-  fprintf (stream, "%%define word-array-name script_names\n");
-  fprintf (stream, "%%%%\n");
-  for (s = 0; s < numscripts; s++)
-    fprintf (stream, "%s, %u\n", scripts[s], s);
-
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error writing to '%s'\n", filename);
-      exit (1);
-    }
-}
-
-/* ========================================================================= */
-
-/* Blocks.  */
-
-typedef struct { unsigned int start; unsigned int end; const char *name; }
-  block_t;
-static block_t blocks[256];
-static unsigned int numblocks;
-
-static void
-fill_blocks (const char *blocks_filename)
-{
-  FILE *stream;
-
-  stream = fopen (blocks_filename, "r");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "error during fopen of '%s'\n", blocks_filename);
-      exit (1);
-    }
-
-  for (;;)
-    {
-      char buf[200+1];
-      unsigned int i1, i2;
-      char padding[200+1];
-      char blockname[200+1];
-
-      if (fscanf (stream, "%200[^\n]\n", buf) < 1)
-       break;
-
-      if (buf[0] == '\0' || buf[0] == '#')
-       continue;
-
-      if (sscanf (buf, "%X..%X%[ ;]%[^\r]", &i1, &i2, padding, blockname) != 4)
-       {
-         fprintf (stderr, "parse error in '%s'\n", blocks_filename);
-         exit (1);
-       }
-      blocks[numblocks].start = i1;
-      blocks[numblocks].end = i2;
-      blocks[numblocks].name = strdup (blockname);
-      /* It must be sorted.  */
-      if (numblocks > 0 && !(blocks[numblocks-1].end < blocks[numblocks].start))
-       abort ();
-      numblocks++;
-      if (numblocks == 256)
-       abort ();
-    }
-
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error reading from '%s'\n", blocks_filename);
-      exit (1);
-    }
-}
-
-/* Return the smallest block index among the blocks for characters >= ch.  */
-static unsigned int
-block_first_index (unsigned int ch)
-{
-  /* Binary search.  */
-  unsigned int lo = 0;
-  unsigned int hi = numblocks;
-  /* Invariants:
-     All blocks[i], i < lo, have blocks[i].end < ch,
-     all blocks[i], i >= hi, have blocks[i].end >= ch.  */
-  while (lo < hi)
-    {
-      unsigned int mid = (lo + hi) / 2; /* >= lo, < hi */
-      if (blocks[mid].end < ch)
-       lo = mid + 1;
-      else
-       hi = mid;
-    }
-  return hi;
-}
-
-/* Return the largest block index among the blocks for characters <= ch,
-   plus 1.  */
-static unsigned int
-block_last_index (unsigned int ch)
-{
-  /* Binary search.  */
-  unsigned int lo = 0;
-  unsigned int hi = numblocks;
-  /* Invariants:
-     All blocks[i], i < lo, have blocks[i].start <= ch,
-     all blocks[i], i >= hi, have blocks[i].start > ch.  */
-  while (lo < hi)
-    {
-      unsigned int mid = (lo + hi) / 2; /* >= lo, < hi */
-      if (blocks[mid].start <= ch)
-       lo = mid + 1;
-      else
-       hi = mid;
-    }
-  return hi;
-}
-
-static void
-output_blocks (const char *version)
-{
-  const char *filename = "blocks.h";
-  const unsigned int shift = 8; /* bits to shift away for array access */
-  const unsigned int threshold = 0x30000; /* cut-off table here to save space */
-  FILE *stream;
-  unsigned int i;
-  unsigned int i1;
-
-  stream = fopen (filename, "w");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "cannot open '%s' for writing\n", filename);
-      exit (1);
-    }
-
-  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
-  fprintf (stream, "/* Unicode blocks.  */\n");
-  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
-          version);
-
-  fprintf (stream, "static const uc_block_t blocks[] =\n");
-  fprintf (stream, "{\n");
-  for (i = 0; i < numblocks; i++)
-    {
-      fprintf (stream, "  { 0x%04X, 0x%04X, \"%s\" }", blocks[i].start,
-              blocks[i].end, blocks[i].name);
-      if (i+1 < numblocks)
-       fprintf (stream, ",");
-      fprintf (stream, "\n");
-    }
-  fprintf (stream, "};\n");
-  fprintf (stream, "#define blocks_level1_shift %d\n", shift);
-  fprintf (stream, "#define blocks_level1_threshold 0x%04X\n", threshold);
-  fprintf (stream, "static const uint8_t blocks_level1[%d * 2] =\n",
-          threshold >> shift);
-  fprintf (stream, "{\n");
-  for (i1 = 0; i1 < (threshold >> shift); i1++)
-    {
-      unsigned int first_index = block_first_index (i1 << shift);
-      unsigned int last_index = block_last_index (((i1 + 1) << shift) - 1);
-      fprintf (stream, "  %3d, %3d", first_index, last_index);
-      if (i1+1 < (threshold >> shift))
-       fprintf (stream, ",");
-      fprintf (stream, "\n");
-    }
-  fprintf (stream, "};\n");
-  fprintf (stream, "#define blocks_upper_first_index %d\n",
-          block_first_index (threshold));
-  fprintf (stream, "#define blocks_upper_last_index %d\n",
-          block_last_index (0x10FFFF));
-
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error writing to '%s'\n", filename);
-      exit (1);
-    }
-}
-
-/* ========================================================================= */
-
-/* C and Java syntax.  */
-
-enum
-{
-  UC_IDENTIFIER_START,    /* valid as first or subsequent character */
-  UC_IDENTIFIER_VALID,    /* valid as subsequent character only */
-  UC_IDENTIFIER_INVALID,  /* not valid */
-  UC_IDENTIFIER_IGNORABLE /* ignorable (Java only) */
-};
-
-/* ISO C 99 section 6.4.(3).  */
-static bool
-is_c_whitespace (unsigned int ch)
-{
-  return (ch == ' ' /* space */
-         || ch == '\t' /* horizontal tab */
-         || ch == '\n' || ch == '\r' /* new-line */
-         || ch == '\v' /* vertical tab */
-         || ch == '\f'); /* form-feed */
-}
-
-/* ISO C 99 section 6.4.2.1 and appendix D.  */
-static int
-c_ident_category (unsigned int ch)
-{
-  /* Section 6.4.2.1.  */
-  if (ch >= '0' && ch <= '9')
-    return UC_IDENTIFIER_VALID;
-  if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '_')
-    return UC_IDENTIFIER_START;
-  /* Appendix D.  */
-  if (0
-      /* Latin */
-      || (ch == 0x00AA)
-      || (ch == 0x00BA)
-      || (ch >= 0x00C0 && ch <= 0x00D6)
-      || (ch >= 0x00D8 && ch <= 0x00F6)
-      || (ch >= 0x00F8 && ch <= 0x01F5)
-      || (ch >= 0x01FA && ch <= 0x0217)
-      || (ch >= 0x0250 && ch <= 0x02A8)
-      || (ch >= 0x1E00 && ch <= 0x1E9B)
-      || (ch >= 0x1EA0 && ch <= 0x1EF9)
-      || (ch == 0x207F)
-      /* Greek */
-      || (ch == 0x0386)
-      || (ch >= 0x0388 && ch <= 0x038A)
-      || (ch == 0x038C)
-      || (ch >= 0x038E && ch <= 0x03A1)
-      || (ch >= 0x03A3 && ch <= 0x03CE)
-      || (ch >= 0x03D0 && ch <= 0x03D6)
-      || (ch == 0x03DA)
-      || (ch == 0x03DC)
-      || (ch == 0x03DE)
-      || (ch == 0x03E0)
-      || (ch >= 0x03E2 && ch <= 0x03F3)
-      || (ch >= 0x1F00 && ch <= 0x1F15)
-      || (ch >= 0x1F18 && ch <= 0x1F1D)
-      || (ch >= 0x1F20 && ch <= 0x1F45)
-      || (ch >= 0x1F48 && ch <= 0x1F4D)
-      || (ch >= 0x1F50 && ch <= 0x1F57)
-      || (ch == 0x1F59)
-      || (ch == 0x1F5B)
-      || (ch == 0x1F5D)
-      || (ch >= 0x1F5F && ch <= 0x1F7D)
-      || (ch >= 0x1F80 && ch <= 0x1FB4)
-      || (ch >= 0x1FB6 && ch <= 0x1FBC)
-      || (ch >= 0x1FC2 && ch <= 0x1FC4)
-      || (ch >= 0x1FC6 && ch <= 0x1FCC)
-      || (ch >= 0x1FD0 && ch <= 0x1FD3)
-      || (ch >= 0x1FD6 && ch <= 0x1FDB)
-      || (ch >= 0x1FE0 && ch <= 0x1FEC)
-      || (ch >= 0x1FF2 && ch <= 0x1FF4)
-      || (ch >= 0x1FF6 && ch <= 0x1FFC)
-      /* Cyrillic */
-      || (ch >= 0x0401 && ch <= 0x040C)
-      || (ch >= 0x040E && ch <= 0x044F)
-      || (ch >= 0x0451 && ch <= 0x045C)
-      || (ch >= 0x045E && ch <= 0x0481)
-      || (ch >= 0x0490 && ch <= 0x04C4)
-      || (ch >= 0x04C7 && ch <= 0x04C8)
-      || (ch >= 0x04CB && ch <= 0x04CC)
-      || (ch >= 0x04D0 && ch <= 0x04EB)
-      || (ch >= 0x04EE && ch <= 0x04F5)
-      || (ch >= 0x04F8 && ch <= 0x04F9)
-      /* Armenian */
-      || (ch >= 0x0531 && ch <= 0x0556)
-      || (ch >= 0x0561 && ch <= 0x0587)
-      /* Hebrew */
-      || (ch >= 0x05B0 && ch <= 0x05B9)
-      || (ch >= 0x05BB && ch <= 0x05BD)
-      || (ch == 0x05BF)
-      || (ch >= 0x05C1 && ch <= 0x05C2)
-      || (ch >= 0x05D0 && ch <= 0x05EA)
-      || (ch >= 0x05F0 && ch <= 0x05F2)
-      /* Arabic */
-      || (ch >= 0x0621 && ch <= 0x063A)
-      || (ch >= 0x0640 && ch <= 0x0652)
-      || (ch >= 0x0670 && ch <= 0x06B7)
-      || (ch >= 0x06BA && ch <= 0x06BE)
-      || (ch >= 0x06C0 && ch <= 0x06CE)
-      || (ch >= 0x06D0 && ch <= 0x06DC)
-      || (ch >= 0x06E5 && ch <= 0x06E8)
-      || (ch >= 0x06EA && ch <= 0x06ED)
-      /* Devanagari */
-      || (ch >= 0x0901 && ch <= 0x0903)
-      || (ch >= 0x0905 && ch <= 0x0939)
-      || (ch >= 0x093E && ch <= 0x094D)
-      || (ch >= 0x0950 && ch <= 0x0952)
-      || (ch >= 0x0958 && ch <= 0x0963)
-      /* Bengali */
-      || (ch >= 0x0981 && ch <= 0x0983)
-      || (ch >= 0x0985 && ch <= 0x098C)
-      || (ch >= 0x098F && ch <= 0x0990)
-      || (ch >= 0x0993 && ch <= 0x09A8)
-      || (ch >= 0x09AA && ch <= 0x09B0)
-      || (ch == 0x09B2)
-      || (ch >= 0x09B6 && ch <= 0x09B9)
-      || (ch >= 0x09BE && ch <= 0x09C4)
-      || (ch >= 0x09C7 && ch <= 0x09C8)
-      || (ch >= 0x09CB && ch <= 0x09CD)
-      || (ch >= 0x09DC && ch <= 0x09DD)
-      || (ch >= 0x09DF && ch <= 0x09E3)
-      || (ch >= 0x09F0 && ch <= 0x09F1)
-      /* Gurmukhi */
-      || (ch == 0x0A02)
-      || (ch >= 0x0A05 && ch <= 0x0A0A)
-      || (ch >= 0x0A0F && ch <= 0x0A10)
-      || (ch >= 0x0A13 && ch <= 0x0A28)
-      || (ch >= 0x0A2A && ch <= 0x0A30)
-      || (ch >= 0x0A32 && ch <= 0x0A33)
-      || (ch >= 0x0A35 && ch <= 0x0A36)
-      || (ch >= 0x0A38 && ch <= 0x0A39)
-      || (ch >= 0x0A3E && ch <= 0x0A42)
-      || (ch >= 0x0A47 && ch <= 0x0A48)
-      || (ch >= 0x0A4B && ch <= 0x0A4D)
-      || (ch >= 0x0A59 && ch <= 0x0A5C)
-      || (ch == 0x0A5E)
-      || (ch == 0x0A74)
-      /* Gujarati */
-      || (ch >= 0x0A81 && ch <= 0x0A83)
-      || (ch >= 0x0A85 && ch <= 0x0A8B)
-      || (ch == 0x0A8D)
-      || (ch >= 0x0A8F && ch <= 0x0A91)
-      || (ch >= 0x0A93 && ch <= 0x0AA8)
-      || (ch >= 0x0AAA && ch <= 0x0AB0)
-      || (ch >= 0x0AB2 && ch <= 0x0AB3)
-      || (ch >= 0x0AB5 && ch <= 0x0AB9)
-      || (ch >= 0x0ABD && ch <= 0x0AC5)
-      || (ch >= 0x0AC7 && ch <= 0x0AC9)
-      || (ch >= 0x0ACB && ch <= 0x0ACD)
-      || (ch == 0x0AD0)
-      || (ch == 0x0AE0)
-      /* Oriya */
-      || (ch >= 0x0B01 && ch <= 0x0B03)
-      || (ch >= 0x0B05 && ch <= 0x0B0C)
-      || (ch >= 0x0B0F && ch <= 0x0B10)
-      || (ch >= 0x0B13 && ch <= 0x0B28)
-      || (ch >= 0x0B2A && ch <= 0x0B30)
-      || (ch >= 0x0B32 && ch <= 0x0B33)
-      || (ch >= 0x0B36 && ch <= 0x0B39)
-      || (ch >= 0x0B3E && ch <= 0x0B43)
-      || (ch >= 0x0B47 && ch <= 0x0B48)
-      || (ch >= 0x0B4B && ch <= 0x0B4D)
-      || (ch >= 0x0B5C && ch <= 0x0B5D)
-      || (ch >= 0x0B5F && ch <= 0x0B61)
-      /* Tamil */
-      || (ch >= 0x0B82 && ch <= 0x0B83)
-      || (ch >= 0x0B85 && ch <= 0x0B8A)
-      || (ch >= 0x0B8E && ch <= 0x0B90)
-      || (ch >= 0x0B92 && ch <= 0x0B95)
-      || (ch >= 0x0B99 && ch <= 0x0B9A)
-      || (ch == 0x0B9C)
-      || (ch >= 0x0B9E && ch <= 0x0B9F)
-      || (ch >= 0x0BA3 && ch <= 0x0BA4)
-      || (ch >= 0x0BA8 && ch <= 0x0BAA)
-      || (ch >= 0x0BAE && ch <= 0x0BB5)
-      || (ch >= 0x0BB7 && ch <= 0x0BB9)
-      || (ch >= 0x0BBE && ch <= 0x0BC2)
-      || (ch >= 0x0BC6 && ch <= 0x0BC8)
-      || (ch >= 0x0BCA && ch <= 0x0BCD)
-      /* Telugu */
-      || (ch >= 0x0C01 && ch <= 0x0C03)
-      || (ch >= 0x0C05 && ch <= 0x0C0C)
-      || (ch >= 0x0C0E && ch <= 0x0C10)
-      || (ch >= 0x0C12 && ch <= 0x0C28)
-      || (ch >= 0x0C2A && ch <= 0x0C33)
-      || (ch >= 0x0C35 && ch <= 0x0C39)
-      || (ch >= 0x0C3E && ch <= 0x0C44)
-      || (ch >= 0x0C46 && ch <= 0x0C48)
-      || (ch >= 0x0C4A && ch <= 0x0C4D)
-      || (ch >= 0x0C60 && ch <= 0x0C61)
-      /* Kannada */
-      || (ch >= 0x0C82 && ch <= 0x0C83)
-      || (ch >= 0x0C85 && ch <= 0x0C8C)
-      || (ch >= 0x0C8E && ch <= 0x0C90)
-      || (ch >= 0x0C92 && ch <= 0x0CA8)
-      || (ch >= 0x0CAA && ch <= 0x0CB3)
-      || (ch >= 0x0CB5 && ch <= 0x0CB9)
-      || (ch >= 0x0CBE && ch <= 0x0CC4)
-      || (ch >= 0x0CC6 && ch <= 0x0CC8)
-      || (ch >= 0x0CCA && ch <= 0x0CCD)
-      || (ch == 0x0CDE)
-      || (ch >= 0x0CE0 && ch <= 0x0CE1)
-      /* Malayalam */
-      || (ch >= 0x0D02 && ch <= 0x0D03)
-      || (ch >= 0x0D05 && ch <= 0x0D0C)
-      || (ch >= 0x0D0E && ch <= 0x0D10)
-      || (ch >= 0x0D12 && ch <= 0x0D28)
-      || (ch >= 0x0D2A && ch <= 0x0D39)
-      || (ch >= 0x0D3E && ch <= 0x0D43)
-      || (ch >= 0x0D46 && ch <= 0x0D48)
-      || (ch >= 0x0D4A && ch <= 0x0D4D)
-      || (ch >= 0x0D60 && ch <= 0x0D61)
-      /* Thai */
-      || (ch >= 0x0E01 && ch <= 0x0E3A)
-      || (ch >= 0x0E40 && ch <= 0x0E5B)
-      /* Lao */
-      || (ch >= 0x0E81 && ch <= 0x0E82)
-      || (ch == 0x0E84)
-      || (ch >= 0x0E87 && ch <= 0x0E88)
-      || (ch == 0x0E8A)
-      || (ch == 0x0E8D)
-      || (ch >= 0x0E94 && ch <= 0x0E97)
-      || (ch >= 0x0E99 && ch <= 0x0E9F)
-      || (ch >= 0x0EA1 && ch <= 0x0EA3)
-      || (ch == 0x0EA5)
-      || (ch == 0x0EA7)
-      || (ch >= 0x0EAA && ch <= 0x0EAB)
-      || (ch >= 0x0EAD && ch <= 0x0EAE)
-      || (ch >= 0x0EB0 && ch <= 0x0EB9)
-      || (ch >= 0x0EBB && ch <= 0x0EBD)
-      || (ch >= 0x0EC0 && ch <= 0x0EC4)
-      || (ch == 0x0EC6)
-      || (ch >= 0x0EC8 && ch <= 0x0ECD)
-      || (ch >= 0x0EDC && ch <= 0x0EDD)
-      /* Tibetan */
-      || (ch == 0x0F00)
-      || (ch >= 0x0F18 && ch <= 0x0F19)
-      || (ch == 0x0F35)
-      || (ch == 0x0F37)
-      || (ch == 0x0F39)
-      || (ch >= 0x0F3E && ch <= 0x0F47)
-      || (ch >= 0x0F49 && ch <= 0x0F69)
-      || (ch >= 0x0F71 && ch <= 0x0F84)
-      || (ch >= 0x0F86 && ch <= 0x0F8B)
-      || (ch >= 0x0F90 && ch <= 0x0F95)
-      || (ch == 0x0F97)
-      || (ch >= 0x0F99 && ch <= 0x0FAD)
-      || (ch >= 0x0FB1 && ch <= 0x0FB7)
-      || (ch == 0x0FB9)
-      /* Georgian */
-      || (ch >= 0x10A0 && ch <= 0x10C5)
-      || (ch >= 0x10D0 && ch <= 0x10F6)
-      /* Hiragana */
-      || (ch >= 0x3041 && ch <= 0x3093)
-      || (ch >= 0x309B && ch <= 0x309C)
-      /* Katakana */
-      || (ch >= 0x30A1 && ch <= 0x30F6)
-      || (ch >= 0x30FB && ch <= 0x30FC)
-      /* Bopomofo */
-      || (ch >= 0x3105 && ch <= 0x312C)
-      /* CJK Unified Ideographs */
-      || (ch >= 0x4E00 && ch <= 0x9FA5)
-      /* Hangul */
-      || (ch >= 0xAC00 && ch <= 0xD7A3)
-      /* Digits */
-      || (ch >= 0x0660 && ch <= 0x0669)
-      || (ch >= 0x06F0 && ch <= 0x06F9)
-      || (ch >= 0x0966 && ch <= 0x096F)
-      || (ch >= 0x09E6 && ch <= 0x09EF)
-      || (ch >= 0x0A66 && ch <= 0x0A6F)
-      || (ch >= 0x0AE6 && ch <= 0x0AEF)
-      || (ch >= 0x0B66 && ch <= 0x0B6F)
-      || (ch >= 0x0BE7 && ch <= 0x0BEF)
-      || (ch >= 0x0C66 && ch <= 0x0C6F)
-      || (ch >= 0x0CE6 && ch <= 0x0CEF)
-      || (ch >= 0x0D66 && ch <= 0x0D6F)
-      || (ch >= 0x0E50 && ch <= 0x0E59)
-      || (ch >= 0x0ED0 && ch <= 0x0ED9)
-      || (ch >= 0x0F20 && ch <= 0x0F33)
-      /* Special characters */
-      || (ch == 0x00B5)
-      || (ch == 0x00B7)
-      || (ch >= 0x02B0 && ch <= 0x02B8)
-      || (ch == 0x02BB)
-      || (ch >= 0x02BD && ch <= 0x02C1)
-      || (ch >= 0x02D0 && ch <= 0x02D1)
-      || (ch >= 0x02E0 && ch <= 0x02E4)
-      || (ch == 0x037A)
-      || (ch == 0x0559)
-      || (ch == 0x093D)
-      || (ch == 0x0B3D)
-      || (ch == 0x1FBE)
-      || (ch >= 0x203F && ch <= 0x2040)
-      || (ch == 0x2102)
-      || (ch == 0x2107)
-      || (ch >= 0x210A && ch <= 0x2113)
-      || (ch == 0x2115)
-      || (ch >= 0x2118 && ch <= 0x211D)
-      || (ch == 0x2124)
-      || (ch == 0x2126)
-      || (ch == 0x2128)
-      || (ch >= 0x212A && ch <= 0x2131)
-      || (ch >= 0x2133 && ch <= 0x2138)
-      || (ch >= 0x2160 && ch <= 0x2182)
-      || (ch >= 0x3005 && ch <= 0x3007)
-      || (ch >= 0x3021 && ch <= 0x3029)
-     )
-    return UC_IDENTIFIER_START;
-  return UC_IDENTIFIER_INVALID;
-}
-
-/* The Java Language Specification, 3rd edition, §3.6.
-   http://java.sun.com/docs/books/jls/third_edition/html/lexical.html#95710  */
-static bool
-is_java_whitespace (unsigned int ch)
-{
-  return (ch == ' ' || ch == '\t' || ch == '\f'
-         || ch == '\n' || ch == '\r');
-}
-
-/* The Java Language Specification, 3rd edition, §3.8.
-   http://java.sun.com/docs/books/jls/third_edition/html/lexical.html#40625
-   and Character.isJavaIdentifierStart and Character.isJavaIdentifierPart  */
-static int
-java_ident_category (unsigned int ch)
-{
-  /* FIXME: Check this against Sun's JDK implementation.  */
-  if (is_category_L (ch) /* = Character.isLetter(ch) */
-      || is_category_Nl (ch) /* = Character.getType(ch)==LETTER_NUMBER */
-      || is_category_Sc (ch) /* currency symbol */
-      || is_category_Pc (ch) /* connector punctuation */
-     )
-    return UC_IDENTIFIER_START;
-  if (is_category_Nd (ch) /* digit */
-      || is_category_Mc (ch) /* combining mark */
-      || is_category_Mn (ch) /* non-spacing mark */
-     )
-    return UC_IDENTIFIER_VALID;
-  if ((ch >= 0x0000 && ch <= 0x0008)
-      || (ch >= 0x000E && ch <= 0x001B)
-      || (ch >= 0x007F && ch <= 0x009F)
-      || is_category_Cf (ch) /* = Character.getType(ch)==FORMAT */
-     )
-    return UC_IDENTIFIER_IGNORABLE;
-  return UC_IDENTIFIER_INVALID;
-}
-
-/* Construction of sparse 3-level tables.  */
-#define TABLE identsyntax_table
-#define ELEMENT uint8_t
-#define DEFAULT UC_IDENTIFIER_INVALID
-#define xmalloc malloc
-#define xrealloc realloc
-#include "3level.h"
-
-/* Output an identifier syntax categorization in a three-level bitmap.  */
-static void
-output_ident_category (const char *filename, int (*predicate) (unsigned int), const char *name, const char *version)
-{
-  FILE *stream;
-  unsigned int ch, i;
-  struct identsyntax_table t;
-  unsigned int level1_offset, level2_offset, level3_offset;
-
-  stream = fopen (filename, "w");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "cannot open '%s' for writing\n", filename);
-      exit (1);
-    }
-
-  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
-  fprintf (stream, "/* Language syntax properties of Unicode characters.  */\n");
-  fprintf (stream, "/* Generated automatically by gen-ctype.c for Unicode %s.  */\n",
-          version);
-
-  t.p = 7; /* or 8 */
-  t.q = 5; /* or 4 */
-  identsyntax_table_init (&t);
-
-  for (ch = 0; ch < 0x110000; ch++)
-    {
-      int syntaxcode = predicate (ch);
-      if (syntaxcode != UC_IDENTIFIER_INVALID)
-       identsyntax_table_add (&t, ch, syntaxcode);
-    }
-
-  identsyntax_table_finalize (&t);
-
-  /* Offsets in t.result, in memory of this process.  */
-  level1_offset =
-    5 * sizeof (uint32_t);
-  level2_offset =
-    5 * sizeof (uint32_t)
-    + t.level1_size * sizeof (uint32_t);
-  level3_offset =
-    5 * sizeof (uint32_t)
-    + t.level1_size * sizeof (uint32_t)
-    + (t.level2_size << t.q) * sizeof (uint32_t);
-
-  for (i = 0; i < 5; i++)
-    fprintf (stream, "#define identsyntax_header_%d %d\n", i,
-            ((uint32_t *) t.result)[i]);
-  fprintf (stream, "static const\n");
-  fprintf (stream, "struct\n");
-  fprintf (stream, "  {\n");
-  fprintf (stream, "    int level1[%zu];\n", t.level1_size);
-  fprintf (stream, "    short level2[%zu << %d];\n", t.level2_size, t.q);
-  fprintf (stream, "    unsigned short level3[%zu * %d];\n", t.level3_size,
-          (1 << t.p) * 2 / 16);
-  fprintf (stream, "  }\n");
-  fprintf (stream, "%s =\n", name);
-  fprintf (stream, "{\n");
-  fprintf (stream, "  {");
-  if (t.level1_size > 8)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < t.level1_size; i++)
-    {
-      uint32_t offset;
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream, "\n   ");
-      offset = ((uint32_t *) (t.result + level1_offset))[i];
-      if (offset == 0)
-       fprintf (stream, " %5d", -1);
-      else
-       fprintf (stream, " %5zd",
-                (offset - level2_offset) / sizeof (uint32_t));
-      if (i+1 < t.level1_size)
-       fprintf (stream, ",");
-    }
-  if (t.level1_size > 8)
-    fprintf (stream, "\n ");
-  fprintf (stream, " },\n");
-  fprintf (stream, "  {");
-  if (t.level2_size << t.q > 8)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < t.level2_size << t.q; i++)
-    {
-      uint32_t offset;
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream, "\n   ");
-      offset = ((uint32_t *) (t.result + level2_offset))[i];
-      if (offset == 0)
-       fprintf (stream, " %5d", -1);
-      else
-       fprintf (stream, " %5zd",
-                (offset - level3_offset) / sizeof (uint8_t));
-      if (i+1 < t.level2_size << t.q)
-       fprintf (stream, ",");
-    }
-  if (t.level2_size << t.q > 8)
-    fprintf (stream, "\n ");
-  fprintf (stream, " },\n");
-  /* Pack the level3 array.  Each entry needs 2 bits only.  */
-  fprintf (stream, "  {");
-  if ((t.level3_size << t.p) * 2 / 16 > 8)
-    fprintf (stream, "\n   ");
-  for (i = 0; i < (t.level3_size << t.p) * 2 / 16; i++)
-    {
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream, "\n   ");
-      fprintf (stream, " 0x%04x",
-              (((uint8_t *) (t.result + level3_offset))[8 * i] << 0)
-              | (((uint8_t *) (t.result + level3_offset))[8 * i + 1] << 2)
-              | (((uint8_t *) (t.result + level3_offset))[8 * i + 2] << 4)
-              | (((uint8_t *) (t.result + level3_offset))[8 * i + 3] << 6)
-              | (((uint8_t *) (t.result + level3_offset))[8 * i + 4] << 8)
-              | (((uint8_t *) (t.result + level3_offset))[8 * i + 5] << 10)
-              | (((uint8_t *) (t.result + level3_offset))[8 * i + 6] << 12)
-              | (((uint8_t *) (t.result + level3_offset))[8 * i + 7] << 14));
-      if (i+1 < (t.level3_size << t.p) * 2 / 16)
-       fprintf (stream, ",");
-    }
-  if ((t.level3_size << t.p) * 2 / 16 > 8)
-    fprintf (stream, "\n ");
-  fprintf (stream, " }\n");
-  fprintf (stream, "};\n");
-
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error writing to '%s'\n", filename);
-      exit (1);
-    }
-}
-
-static void
-output_ident_properties (const char *version)
-{
-#define PROPERTY(P) \
-  debug_output_predicate ("sy_" #P ".txt", is_ ## P); \
-  output_predicate_test ("test-sy_" #P ".c", is_ ## P, "uc_is_" #P " (c)"); \
-  output_predicate ("sy_" #P ".h", is_ ## P, "u_" #P, "Language syntax properties", version);
-  PROPERTY(c_whitespace)
-  PROPERTY(java_whitespace)
-#undef PROPERTY
-
-  output_ident_category ("sy_c_ident.h", c_ident_category, "u_c_ident", version);
-  output_ident_category ("sy_java_ident.h", java_ident_category, "u_java_ident", version);
-}
-
-/* ========================================================================= */
-
-/* Like ISO C <ctype.h> and <wctype.h>.  Compatible to glibc's
-   glibc/localedata/locales/i18n file, generated by
-   glibc/localedata/gen-unicode-ctype.c.  */
-
-/* Character mappings.  */
-
-static unsigned int
-to_upper (unsigned int ch)
-{
-  if (unicode_attributes[ch].name != NULL
-      && unicode_attributes[ch].upper != NONE)
-    return unicode_attributes[ch].upper;
-  else
-    return ch;
-}
-
-static unsigned int
-to_lower (unsigned int ch)
-{
-  if (unicode_attributes[ch].name != NULL
-      && unicode_attributes[ch].lower != NONE)
-    return unicode_attributes[ch].lower;
-  else
-    return ch;
-}
-
-static unsigned int
-to_title (unsigned int ch)
-{
-  if (unicode_attributes[ch].name != NULL
-      && unicode_attributes[ch].title != NONE)
-    return unicode_attributes[ch].title;
-  else
-    return ch;
-}
-
-/* Character class properties.  */
-
-static bool
-is_upper (unsigned int ch)
-{
-  return (to_lower (ch) != ch);
-}
-
-static bool
-is_lower (unsigned int ch)
-{
-  return (to_upper (ch) != ch)
-        /* <U00DF> is lowercase, but without simple to_upper mapping.  */
-        || (ch == 0x00DF);
-}
-
-static bool
-is_alpha (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && ((unicode_attributes[ch].category[0] == 'L'
-              /* Theppitak Karoonboonyanan <thep@links.nectec.or.th> says
-                 <U0E2F>, <U0E46> should belong to is_punct.  */
-              && (ch != 0x0E2F) && (ch != 0x0E46))
-             /* Theppitak Karoonboonyanan <thep@links.nectec.or.th> says
-                <U0E31>, <U0E34>..<U0E3A>, <U0E47>..<U0E4E> are is_alpha.  */
-             || (ch == 0x0E31)
-             || (ch >= 0x0E34 && ch <= 0x0E3A)
-             || (ch >= 0x0E47 && ch <= 0x0E4E)
-             /* Avoid warning for <U0345>.  */
-             || (ch == 0x0345)
-             /* Avoid warnings for <U2160>..<U217F>.  */
-             || (unicode_attributes[ch].category[0] == 'N'
-                 && unicode_attributes[ch].category[1] == 'l')
-             /* Avoid warnings for <U24B6>..<U24E9>.  */
-             || (unicode_attributes[ch].category[0] == 'S'
-                 && unicode_attributes[ch].category[1] == 'o'
-                 && strstr (unicode_attributes[ch].name, " LETTER ")
-                    != NULL)
-             /* Consider all the non-ASCII digits as alphabetic.
-                ISO C 99 forbids us to have them in category "digit",
-                but we want iswalnum to return true on them.  */
-             || (unicode_attributes[ch].category[0] == 'N'
-                 && unicode_attributes[ch].category[1] == 'd'
-                 && !(ch >= 0x0030 && ch <= 0x0039))));
-}
-
-static bool
-is_digit (unsigned int ch)
-{
-#if 0
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'N'
-         && unicode_attributes[ch].category[1] == 'd');
-  /* Note: U+0BE7..U+0BEF and U+1369..U+1371 are digit systems without
-     a zero.  Must add <0> in front of them by hand.  */
-#else
-  /* SUSV2 gives us some freedom for the "digit" category, but ISO C 99
-     takes it away:
-     7.25.2.1.5:
-        The iswdigit function tests for any wide character that corresponds
-        to a decimal-digit character (as defined in 5.2.1).
-     5.2.1:
-        the 10 decimal digits 0 1 2 3 4 5 6 7 8 9
-   */
-  return (ch >= 0x0030 && ch <= 0x0039);
-#endif
-}
-
-static bool
-is_outdigit (unsigned int ch)
-{
-  return (ch >= 0x0030 && ch <= 0x0039);
-}
-
-static bool
-is_alnum (unsigned int ch)
-{
-  return is_alpha (ch) || is_digit (ch);
-}
-
-static bool
-is_blank (unsigned int ch)
-{
-  return (ch == 0x0009 /* '\t' */
-         /* Category Zs without mention of "<noBreak>" */
-         || (unicode_attributes[ch].name != NULL
-             && unicode_attributes[ch].category[0] == 'Z'
-             && unicode_attributes[ch].category[1] == 's'
-             && !strstr (unicode_attributes[ch].decomposition, "<noBreak>")));
-}
-
-static bool
-is_space (unsigned int ch)
-{
-  /* Don't make U+00A0 a space. Non-breaking space means that all programs
-     should treat it like a punctuation character, not like a space. */
-  return (ch == 0x0020 /* ' ' */
-         || ch == 0x000C /* '\f' */
-         || ch == 0x000A /* '\n' */
-         || ch == 0x000D /* '\r' */
-         || ch == 0x0009 /* '\t' */
-         || ch == 0x000B /* '\v' */
-         /* Categories Zl, Zp, and Zs without mention of "<noBreak>" */
-         || (unicode_attributes[ch].name != NULL
-             && unicode_attributes[ch].category[0] == 'Z'
-             && (unicode_attributes[ch].category[1] == 'l'
-                 || unicode_attributes[ch].category[1] == 'p'
-                 || (unicode_attributes[ch].category[1] == 's'
-                     && !strstr (unicode_attributes[ch].decomposition,
-                                 "<noBreak>")))));
-}
-
-static bool
-is_cntrl (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && (strcmp (unicode_attributes[ch].name, "<control>") == 0
-             /* Categories Zl and Zp */
-             || (unicode_attributes[ch].category[0] == 'Z'
-                 && (unicode_attributes[ch].category[1] == 'l'
-                     || unicode_attributes[ch].category[1] == 'p'))));
-}
-
-static bool
-is_xdigit (unsigned int ch)
-{
-#if 0
-  return is_digit (ch)
-        || (ch >= 0x0041 && ch <= 0x0046)
-        || (ch >= 0x0061 && ch <= 0x0066);
-#else
-  /* SUSV2 gives us some freedom for the "xdigit" category, but ISO C 99
-     takes it away:
-     7.25.2.1.12:
-        The iswxdigit function tests for any wide character that corresponds
-        to a hexadecimal-digit character (as defined in 6.4.4.1).
-     6.4.4.1:
-        hexadecimal-digit: one of 0 1 2 3 4 5 6 7 8 9 a b c d e f A B C D E F
-   */
-  return (ch >= 0x0030 && ch <= 0x0039)
-        || (ch >= 0x0041 && ch <= 0x0046)
-        || (ch >= 0x0061 && ch <= 0x0066);
-#endif
-}
-
-static bool
-is_graph (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && strcmp (unicode_attributes[ch].name, "<control>")
-         && !is_space (ch));
-}
-
-static bool
-is_print (unsigned int ch)
-{
-  return (unicode_attributes[ch].name != NULL
-         && strcmp (unicode_attributes[ch].name, "<control>")
-         /* Categories Zl and Zp */
-         && !(unicode_attributes[ch].name != NULL
-              && unicode_attributes[ch].category[0] == 'Z'
-              && (unicode_attributes[ch].category[1] == 'l'
-                  || unicode_attributes[ch].category[1] == 'p')));
-}
-
-static bool
-is_punct (unsigned int ch)
-{
-#if 0
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'P');
-#else
-  /* The traditional POSIX definition of punctuation is every graphic,
-     non-alphanumeric character.  */
-  return (is_graph (ch) && !is_alpha (ch) && !is_digit (ch));
-#endif
-}
-
-/* Output all properties.  */
-static void
-output_old_ctype (const char *version)
-{
-#define PROPERTY(P) \
-  debug_output_predicate ("ctype_" #P ".txt", is_ ## P); \
-  output_predicate_test ("test-ctype_" #P ".c", is_ ## P, "uc_is_" #P " (c)"); \
-  output_predicate ("ctype_" #P ".h", is_ ## P, "u_is_" #P, "ISO C <ctype.h> like properties", version);
-  PROPERTY(alnum)
-  PROPERTY(alpha)
-  PROPERTY(cntrl)
-  PROPERTY(digit)
-  PROPERTY(graph)
-  PROPERTY(lower)
-  PROPERTY(print)
-  PROPERTY(punct)
-  PROPERTY(space)
-  PROPERTY(upper)
-  PROPERTY(xdigit)
-  PROPERTY(blank)
-#undef PROPERTY
-}
-
-#if 0
-
-static bool
-is_combining (unsigned int ch)
-{
-  /* Up to Unicode 3.0.1 we took the Combining property from the PropList.txt
-     file. In 3.0.1 it was identical to the union of the general categories
-     "Mn", "Mc", "Me". In Unicode 3.1 this property has been dropped from the
-     PropList.txt file, so we take the latter definition.  */
-  return (unicode_attributes[ch].name != NULL
-         && unicode_attributes[ch].category[0] == 'M'
-         && (unicode_attributes[ch].category[1] == 'n'
-             || unicode_attributes[ch].category[1] == 'c'
-             || unicode_attributes[ch].category[1] == 'e'));
-}
-
-static bool
-is_combining_level3 (unsigned int ch)
-{
-  return is_combining (ch)
-        && !(unicode_attributes[ch].combining[0] != '\0'
-             && unicode_attributes[ch].combining[0] != '0'
-             && strtoul (unicode_attributes[ch].combining, NULL, 10) >= 200);
-}
-
-/* Return the UCS symbol string for a Unicode character.  */
-static const char *
-ucs_symbol (unsigned int i)
-{
-  static char buf[11+1];
-
-  sprintf (buf, (i < 0x10000 ? "<U%04X>" : "<U%08X>"), i);
-  return buf;
-}
-
-/* Return the UCS symbol range string for a Unicode characters interval.  */
-static const char *
-ucs_symbol_range (unsigned int low, unsigned int high)
-{
-  static char buf[24+1];
-
-  strcpy (buf, ucs_symbol (low));
-  strcat (buf, "..");
-  strcat (buf, ucs_symbol (high));
-  return buf;
-}
-
-/* Output a character class (= property) table.  */
-
-static void
-output_charclass (FILE *stream, const char *classname,
-                 bool (*func) (unsigned int))
-{
-  char table[0x110000];
-  unsigned int i;
-  bool need_semicolon;
-  const int max_column = 75;
-  int column;
-
-  for (i = 0; i < 0x110000; i++)
-    table[i] = (int) func (i);
-
-  fprintf (stream, "%s ", classname);
-  need_semicolon = false;
-  column = 1000;
-  for (i = 0; i < 0x110000; )
-    {
-      if (!table[i])
-       i++;
-      else
-       {
-         unsigned int low, high;
-         char buf[25];
-
-         low = i;
-         do
-           i++;
-         while (i < 0x110000 && table[i]);
-         high = i - 1;
-
-         if (low == high)
-           strcpy (buf, ucs_symbol (low));
-         else
-           strcpy (buf, ucs_symbol_range (low, high));
-
-         if (need_semicolon)
-           {
-             fprintf (stream, ";");
-             column++;
-           }
-
-         if (column + strlen (buf) > max_column)
-           {
-             fprintf (stream, "/\n   ");
-             column = 3;
-           }
-
-         fprintf (stream, "%s", buf);
-         column += strlen (buf);
-         need_semicolon = true;
-       }
-    }
-  fprintf (stream, "\n");
-}
-
-/* Output a character mapping table.  */
-
-static void
-output_charmap (FILE *stream, const char *mapname,
-               unsigned int (*func) (unsigned int))
-{
-  char table[0x110000];
-  unsigned int i;
-  bool need_semicolon;
-  const int max_column = 75;
-  int column;
-
-  for (i = 0; i < 0x110000; i++)
-    table[i] = (func (i) != i);
-
-  fprintf (stream, "%s ", mapname);
-  need_semicolon = false;
-  column = 1000;
-  for (i = 0; i < 0x110000; i++)
-    if (table[i])
-      {
-       char buf[25+1];
-
-       strcpy (buf, "(");
-       strcat (buf, ucs_symbol (i));
-       strcat (buf, ",");
-       strcat (buf, ucs_symbol (func (i)));
-       strcat (buf, ")");
-
-       if (need_semicolon)
-         {
-           fprintf (stream, ";");
-           column++;
-         }
-
-       if (column + strlen (buf) > max_column)
-         {
-           fprintf (stream, "/\n   ");
-           column = 3;
-         }
-
-       fprintf (stream, "%s", buf);
-       column += strlen (buf);
-       need_semicolon = true;
-      }
-  fprintf (stream, "\n");
-}
-
-/* Output the width table.  */
-
-static void
-output_widthmap (FILE *stream)
-{
-}
-
-/* Output the tables to the given file.  */
-
-static void
-output_tables (const char *filename, const char *version)
-{
-  FILE *stream;
-  unsigned int ch;
-
-  stream = fopen (filename, "w");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "cannot open '%s' for writing\n", filename);
-      exit (1);
-    }
-
-  fprintf (stream, "escape_char /\n");
-  fprintf (stream, "comment_char %%\n");
-  fprintf (stream, "\n");
-  fprintf (stream, "%% Generated automatically by gen-unicode-ctype for Unicode %s.\n",
-          version);
-  fprintf (stream, "\n");
-
-  fprintf (stream, "LC_IDENTIFICATION\n");
-  fprintf (stream, "title     \"Unicode %s FDCC-set\"\n", version);
-  fprintf (stream, "source    \"UnicodeData.txt, PropList.txt\"\n");
-  fprintf (stream, "address   \"\"\n");
-  fprintf (stream, "contact   \"\"\n");
-  fprintf (stream, "email     \"bug-glibc@gnu.org\"\n");
-  fprintf (stream, "tel       \"\"\n");
-  fprintf (stream, "fax       \"\"\n");
-  fprintf (stream, "language  \"\"\n");
-  fprintf (stream, "territory \"Earth\"\n");
-  fprintf (stream, "revision  \"%s\"\n", version);
-  {
-    time_t now;
-    char date[11];
-    now = time (NULL);
-    strftime (date, sizeof (date), "%Y-%m-%d", gmtime (&now));
-    fprintf (stream, "date      \"%s\"\n", date);
-  }
-  fprintf (stream, "category  \"unicode:2001\";LC_CTYPE\n");
-  fprintf (stream, "END LC_IDENTIFICATION\n");
-  fprintf (stream, "\n");
-
-  /* Verifications. */
-  for (ch = 0; ch < 0x110000; ch++)
-    {
-      /* toupper restriction: "Only characters specified for the keywords
-        lower and upper shall be specified.  */
-      if (to_upper (ch) != ch && !(is_lower (ch) || is_upper (ch)))
-       fprintf (stderr,
-                "%s is not upper|lower but toupper(0x%04X) = 0x%04X\n",
-                ucs_symbol (ch), ch, to_upper (ch));
-
-      /* tolower restriction: "Only characters specified for the keywords
-        lower and upper shall be specified.  */
-      if (to_lower (ch) != ch && !(is_lower (ch) || is_upper (ch)))
-       fprintf (stderr,
-                "%s is not upper|lower but tolower(0x%04X) = 0x%04X\n",
-                ucs_symbol (ch), ch, to_lower (ch));
-
-      /* alpha restriction: "Characters classified as either upper or lower
-        shall automatically belong to this class.  */
-      if ((is_lower (ch) || is_upper (ch)) && !is_alpha (ch))
-       fprintf (stderr, "%s is upper|lower but not alpha\n", ucs_symbol (ch));
-
-      /* alpha restriction: "No character specified for the keywords cntrl,
-        digit, punct or space shall be specified."  */
-      if (is_alpha (ch) && is_cntrl (ch))
-       fprintf (stderr, "%s is alpha and cntrl\n", ucs_symbol (ch));
-      if (is_alpha (ch) && is_digit (ch))
-       fprintf (stderr, "%s is alpha and digit\n", ucs_symbol (ch));
-      if (is_alpha (ch) && is_punct (ch))
-       fprintf (stderr, "%s is alpha and punct\n", ucs_symbol (ch));
-      if (is_alpha (ch) && is_space (ch))
-       fprintf (stderr, "%s is alpha and space\n", ucs_symbol (ch));
-
-      /* space restriction: "No character specified for the keywords upper,
-        lower, alpha, digit, graph or xdigit shall be specified."
-        upper, lower, alpha already checked above.  */
-      if (is_space (ch) && is_digit (ch))
-       fprintf (stderr, "%s is space and digit\n", ucs_symbol (ch));
-      if (is_space (ch) && is_graph (ch))
-       fprintf (stderr, "%s is space and graph\n", ucs_symbol (ch));
-      if (is_space (ch) && is_xdigit (ch))
-       fprintf (stderr, "%s is space and xdigit\n", ucs_symbol (ch));
-
-      /* cntrl restriction: "No character specified for the keywords upper,
-        lower, alpha, digit, punct, graph, print or xdigit shall be
-        specified."  upper, lower, alpha already checked above.  */
-      if (is_cntrl (ch) && is_digit (ch))
-       fprintf (stderr, "%s is cntrl and digit\n", ucs_symbol (ch));
-      if (is_cntrl (ch) && is_punct (ch))
-       fprintf (stderr, "%s is cntrl and punct\n", ucs_symbol (ch));
-      if (is_cntrl (ch) && is_graph (ch))
-       fprintf (stderr, "%s is cntrl and graph\n", ucs_symbol (ch));
-      if (is_cntrl (ch) && is_print (ch))
-       fprintf (stderr, "%s is cntrl and print\n", ucs_symbol (ch));
-      if (is_cntrl (ch) && is_xdigit (ch))
-       fprintf (stderr, "%s is cntrl and xdigit\n", ucs_symbol (ch));
-
-      /* punct restriction: "No character specified for the keywords upper,
-        lower, alpha, digit, cntrl, xdigit or as the <space> character shall
-        be specified."  upper, lower, alpha, cntrl already checked above.  */
-      if (is_punct (ch) && is_digit (ch))
-       fprintf (stderr, "%s is punct and digit\n", ucs_symbol (ch));
-      if (is_punct (ch) && is_xdigit (ch))
-       fprintf (stderr, "%s is punct and xdigit\n", ucs_symbol (ch));
-      if (is_punct (ch) && (ch == 0x0020))
-       fprintf (stderr, "%s is punct\n", ucs_symbol (ch));
-
-      /* graph restriction: "No character specified for the keyword cntrl
-        shall be specified."  Already checked above.  */
-
-      /* print restriction: "No character specified for the keyword cntrl
-        shall be specified."  Already checked above.  */
-
-      /* graph - print relation: differ only in the <space> character.
-        How is this possible if there are more than one space character?!
-        I think susv2/xbd/locale.html should speak of "space characters",
-        not "space character".  */
-      if (is_print (ch) && !(is_graph (ch) || /* ch == 0x0020 */ is_space (ch)))
-       fprintf (stderr,
-                "%s is print but not graph|<space>\n", ucs_symbol (ch));
-      if (!is_print (ch) && (is_graph (ch) || ch == 0x0020))
-       fprintf (stderr,
-                "%s is graph|<space> but not print\n", ucs_symbol (ch));
-    }
-
-  fprintf (stream, "LC_CTYPE\n");
-  output_charclass (stream, "upper", is_upper);
-  output_charclass (stream, "lower", is_lower);
-  output_charclass (stream, "alpha", is_alpha);
-  output_charclass (stream, "digit", is_digit);
-  output_charclass (stream, "outdigit", is_outdigit);
-  output_charclass (stream, "blank", is_blank);
-  output_charclass (stream, "space", is_space);
-  output_charclass (stream, "cntrl", is_cntrl);
-  output_charclass (stream, "punct", is_punct);
-  output_charclass (stream, "xdigit", is_xdigit);
-  output_charclass (stream, "graph", is_graph);
-  output_charclass (stream, "print", is_print);
-  output_charclass (stream, "class \"combining\";", is_combining);
-  output_charclass (stream, "class \"combining_level3\";", is_combining_level3);
-  output_charmap (stream, "toupper", to_upper);
-  output_charmap (stream, "tolower", to_lower);
-  output_charmap (stream, "map \"totitle\";", to_title);
-  output_widthmap (stream);
-  fprintf (stream, "END LC_CTYPE\n");
-
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error writing to '%s'\n", filename);
-      exit (1);
-    }
-}
-
-#endif
-
-int
-main (int argc, char * argv[])
-{
-  const char *unicodedata_filename;
-  const char *proplist_filename;
-  const char *derivedproplist_filename;
-  const char *scripts_filename;
-  const char *blocks_filename;
-  const char *proplist30_filename;
-  const char *version;
-
-  if (argc != 8)
-    {
-      fprintf (stderr, "Usage: %s UnicodeData.txt PropList.txt DerivedCoreProperties.txt Scripts.txt Blocks.txt PropList-3.0.1.txt version\n",
-              argv[0]);
-      exit (1);
-    }
-
-  unicodedata_filename = argv[1];
-  proplist_filename = argv[2];
-  derivedproplist_filename = argv[3];
-  scripts_filename = argv[4];
-  blocks_filename = argv[5];
-  proplist30_filename = argv[6];
-  version = argv[7];
-
-  fill_attributes (unicodedata_filename);
-  clear_properties ();
-  fill_properties (proplist_filename);
-  fill_properties (derivedproplist_filename);
-  fill_properties30 (proplist30_filename);
-  fill_scripts (scripts_filename);
-  fill_blocks (blocks_filename);
-
-  output_categories (version);
-  output_category ("categ_of.h", version);
-  output_combclass ("combining.h", version);
-  output_bidi_category ("bidi_of.h", version);
-  output_decimal_digit_test ("test-decdigit.h", version);
-  output_decimal_digit ("decdigit.h", version);
-  output_digit_test ("test-digit.h", version);
-  output_digit ("digit.h", version);
-  output_numeric_test ("test-numeric.h", version);
-  output_numeric ("numeric.h", version);
-  output_mirror ("mirror.h", version);
-  output_properties (version);
-  output_scripts (version);
-  output_scripts_byname (version);
-  output_blocks (version);
-  output_ident_properties (version);
-  output_old_ctype (version);
-
-  return 0;
-}
-
-/*
- * For Emacs M-x compile
- * Local Variables:
- * compile-command: "
-   gcc -O -Wall gen-ctype.c -o gen-ctype && \
-   ./gen-ctype \
-        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/UnicodeData.txt \
-        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/PropList.txt \
-        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/DerivedCoreProperties.txt \
-        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/Scripts.txt \
-        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/Blocks.txt \
-        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/3.0.1/PropList-3.0.1.txt \
-        5.0.0
-   "
- * End:
- */
diff --git a/lib/unilbrk/gen-lbrk.c b/lib/unilbrk/gen-lbrk.c

deleted file mode 100644 (file)

index f8f9371..0000000
--- a/lib/unilbrk/gen-lbrk.c
+++ /dev/null
@@ -1,1497 +0,0 @@
-/* Generate a Unicode conforming Line Break Properties tables from a
-   UnicodeData file.
-   Copyright (C) 2000-2002, 2004, 2008 Free Software Foundation, Inc.
-   Written by Bruno Haible <bruno@clisp.org>, 2000-2002.
-
-   This program is free software: you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3 of the License, or
-   (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
-
-/* Usage example:
-     $ gen-lbrk /usr/local/share/Unidata/UnicodeData.txt \
-               /usr/local/share/Unidata/EastAsianWidth.txt \
-               /usr/local/share/Unidata/LineBreak.txt \
-               5.0.0
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <string.h>
-#include <time.h>
-
-/* This structure represents one line in the UnicodeData.txt file.  */
-struct unicode_attribute
-{
-  const char *name;           /* Character name */
-  const char *category;       /* General category */
-  const char *combining;      /* Canonical combining classes */
-  const char *bidi;           /* Bidirectional category */
-  const char *decomposition;  /* Character decomposition mapping */
-  const char *decdigit;       /* Decimal digit value */
-  const char *digit;          /* Digit value */
-  const char *numeric;        /* Numeric value */
-  int mirrored;               /* mirrored */
-  const char *oldname;        /* Old Unicode 1.0 name */
-  const char *comment;        /* Comment */
-  unsigned int upper;         /* Uppercase mapping */
-  unsigned int lower;         /* Lowercase mapping */
-  unsigned int title;         /* Titlecase mapping */
-};
-
-/* Missing fields are represented with "" for strings, and NONE for
-   characters.  */
-#define NONE (~(unsigned int)0)
-
-/* The entire contents of the UnicodeData.txt file.  */
-struct unicode_attribute unicode_attributes [0x110000];
-
-/* Stores in unicode_attributes[i] the values from the given fields.  */
-static void
-fill_attribute (unsigned int i,
-               const char *field1, const char *field2,
-               const char *field3, const char *field4,
-               const char *field5, const char *field6,
-               const char *field7, const char *field8,
-               const char *field9, const char *field10,
-               const char *field11, const char *field12,
-               const char *field13, const char *field14)
-{
-  struct unicode_attribute * uni;
-
-  if (i >= 0x110000)
-    {
-      fprintf (stderr, "index too large\n");
-      exit (1);
-    }
-  uni = &unicode_attributes[i];
-  /* Copy the strings.  */
-  uni->name          = strdup (field1);
-  uni->category      = (field2[0] == '\0' ? "" : strdup (field2));
-  uni->combining     = (field3[0] == '\0' ? "" : strdup (field3));
-  uni->bidi          = (field4[0] == '\0' ? "" : strdup (field4));
-  uni->decomposition = (field5[0] == '\0' ? "" : strdup (field5));
-  uni->decdigit      = (field6[0] == '\0' ? "" : strdup (field6));
-  uni->digit         = (field7[0] == '\0' ? "" : strdup (field7));
-  uni->numeric       = (field8[0] == '\0' ? "" : strdup (field8));
-  uni->mirrored      = (field9[0] == 'Y');
-  uni->oldname       = (field10[0] == '\0' ? "" : strdup (field10));
-  uni->comment       = (field11[0] == '\0' ? "" : strdup (field11));
-  uni->upper = (field12[0] =='\0' ? NONE : strtoul (field12, NULL, 16));
-  uni->lower = (field13[0] =='\0' ? NONE : strtoul (field13, NULL, 16));
-  uni->title = (field14[0] =='\0' ? NONE : strtoul (field14, NULL, 16));
-}
-
-/* Maximum length of a field in the UnicodeData.txt file.  */
-#define FIELDLEN 120
-
-/* Reads the next field from STREAM.  The buffer BUFFER has size FIELDLEN.
-   Reads up to (but excluding) DELIM.
-   Returns 1 when a field was successfully read, otherwise 0.  */
-static int
-getfield (FILE *stream, char *buffer, int delim)
-{
-  int count = 0;
-  int c;
-
-  for (; (c = getc (stream)), (c != EOF && c != delim); )
-    {
-      /* The original unicode.org UnicodeData.txt file happens to have
-        CR/LF line terminators.  Silently convert to LF.  */
-      if (c == '\r')
-       continue;
-
-      /* Put c into the buffer.  */
-      if (++count >= FIELDLEN - 1)
-       {
-         fprintf (stderr, "field too long\n");
-         exit (1);
-       }
-      *buffer++ = c;
-    }
-
-  if (c == EOF)
-    return 0;
-
-  *buffer = '\0';
-  return 1;
-}
-
-/* Stores in unicode_attributes[] the entire contents of the UnicodeData.txt
-   file.  */
-static void
-fill_attributes (const char *unicodedata_filename)
-{
-  unsigned int i, j;
-  FILE *stream;
-  char field0[FIELDLEN];
-  char field1[FIELDLEN];
-  char field2[FIELDLEN];
-  char field3[FIELDLEN];
-  char field4[FIELDLEN];
-  char field5[FIELDLEN];
-  char field6[FIELDLEN];
-  char field7[FIELDLEN];
-  char field8[FIELDLEN];
-  char field9[FIELDLEN];
-  char field10[FIELDLEN];
-  char field11[FIELDLEN];
-  char field12[FIELDLEN];
-  char field13[FIELDLEN];
-  char field14[FIELDLEN];
-  int lineno = 0;
-
-  for (i = 0; i < 0x110000; i++)
-    unicode_attributes[i].name = NULL;
-
-  stream = fopen (unicodedata_filename, "r");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "error during fopen of '%s'\n", unicodedata_filename);
-      exit (1);
-    }
-
-  for (;;)
-    {
-      int n;
-
-      lineno++;
-      n = getfield (stream, field0, ';');
-      n += getfield (stream, field1, ';');
-      n += getfield (stream, field2, ';');
-      n += getfield (stream, field3, ';');
-      n += getfield (stream, field4, ';');
-      n += getfield (stream, field5, ';');
-      n += getfield (stream, field6, ';');
-      n += getfield (stream, field7, ';');
-      n += getfield (stream, field8, ';');
-      n += getfield (stream, field9, ';');
-      n += getfield (stream, field10, ';');
-      n += getfield (stream, field11, ';');
-      n += getfield (stream, field12, ';');
-      n += getfield (stream, field13, ';');
-      n += getfield (stream, field14, '\n');
-      if (n == 0)
-       break;
-      if (n != 15)
-       {
-         fprintf (stderr, "short line in'%s':%d\n",
-                  unicodedata_filename, lineno);
-         exit (1);
-       }
-      i = strtoul (field0, NULL, 16);
-      if (field1[0] == '<'
-         && strlen (field1) >= 9
-         && !strcmp (field1 + strlen(field1) - 8, ", First>"))
-       {
-         /* Deal with a range. */
-         lineno++;
-         n = getfield (stream, field0, ';');
-         n += getfield (stream, field1, ';');
-         n += getfield (stream, field2, ';');
-         n += getfield (stream, field3, ';');
-         n += getfield (stream, field4, ';');
-         n += getfield (stream, field5, ';');
-         n += getfield (stream, field6, ';');
-         n += getfield (stream, field7, ';');
-         n += getfield (stream, field8, ';');
-         n += getfield (stream, field9, ';');
-         n += getfield (stream, field10, ';');
-         n += getfield (stream, field11, ';');
-         n += getfield (stream, field12, ';');
-         n += getfield (stream, field13, ';');
-         n += getfield (stream, field14, '\n');
-         if (n != 15)
-           {
-             fprintf (stderr, "missing end range in '%s':%d\n",
-                      unicodedata_filename, lineno);
-             exit (1);
-           }
-         if (!(field1[0] == '<'
-               && strlen (field1) >= 8
-               && !strcmp (field1 + strlen (field1) - 7, ", Last>")))
-           {
-             fprintf (stderr, "missing end range in '%s':%d\n",
-                      unicodedata_filename, lineno);
-             exit (1);
-           }
-         field1[strlen (field1) - 7] = '\0';
-         j = strtoul (field0, NULL, 16);
-         for (; i <= j; i++)
-           fill_attribute (i, field1+1, field2, field3, field4, field5,
-                              field6, field7, field8, field9, field10,
-                              field11, field12, field13, field14);
-       }
-      else
-       {
-         /* Single character line */
-         fill_attribute (i, field1, field2, field3, field4, field5,
-                            field6, field7, field8, field9, field10,
-                            field11, field12, field13, field14);
-       }
-    }
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error reading from '%s'\n", unicodedata_filename);
-      exit (1);
-    }
-}
-
-/* The width property from the EastAsianWidth.txt file.
-   Each is NULL (unassigned) or "N", "A", "H", "W", "F", "Na".  */
-const char * unicode_width[0x110000];
-
-/* Stores in unicode_width[] the width property from the EastAsianWidth.txt
-   file.  */
-static void
-fill_width (const char *width_filename)
-{
-  unsigned int i, j;
-  FILE *stream;
-  char field0[FIELDLEN];
-  char field1[FIELDLEN];
-  char field2[FIELDLEN];
-  int lineno = 0;
-
-  for (i = 0; i < 0x110000; i++)
-    unicode_width[i] = (unicode_attributes[i].name != NULL ? "N" : NULL);
-
-  stream = fopen (width_filename, "r");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "error during fopen of '%s'\n", width_filename);
-      exit (1);
-    }
-
-  for (;;)
-    {
-      int n;
-      int c;
-
-      lineno++;
-      c = getc (stream);
-      if (c == EOF)
-       break;
-      if (c == '#')
-       {
-         do c = getc (stream); while (c != EOF && c != '\n');
-         continue;
-       }
-      ungetc (c, stream);
-      n = getfield (stream, field0, ';');
-      n += getfield (stream, field1, ' ');
-      n += getfield (stream, field2, '\n');
-      if (n == 0)
-       break;
-      if (n != 3)
-       {
-         fprintf (stderr, "short line in '%s':%d\n", width_filename, lineno);
-         exit (1);
-       }
-      i = strtoul (field0, NULL, 16);
-      if (strstr (field0, "..") != NULL)
-       {
-         /* Deal with a range.  */
-         j = strtoul (strstr (field0, "..") + 2, NULL, 16);
-         for (; i <= j; i++)
-           unicode_width[i] = strdup (field1);
-       }
-      else
-       {
-         /* Single character line.  */
-         unicode_width[i] = strdup (field1);
-       }
-    }
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error reading from '%s'\n", width_filename);
-      exit (1);
-    }
-}
-
-/* Line breaking classification.  */
-
-enum
-{
-  /* Values >= 24 are resolved at run time. */
-  LBP_BK = 24, /* mandatory break */
-/*LBP_CR,         carriage return - not used here because it's a DOSism */
-/*LBP_LF,         line feed - not used here because it's a DOSism */
-  LBP_CM = 25, /* attached characters and combining marks */
-/*LBP_NL,         next line - not used here because it's equivalent to LBP_BK */
-/*LBP_SG,         surrogates - not used here because they are not characters */
-  LBP_WJ =  0, /* word joiner */
-  LBP_ZW = 26, /* zero width space */
-  LBP_GL =  1, /* non-breaking (glue) */
-  LBP_SP = 27, /* space */
-  LBP_B2 =  2, /* break opportunity before and after */
-  LBP_BA =  3, /* break opportunity after */
-  LBP_BB =  4, /* break opportunity before */
-  LBP_HY =  5, /* hyphen */
-  LBP_CB = 28, /* contingent break opportunity */
-  LBP_CL =  6, /* closing punctuation */
-  LBP_EX =  7, /* exclamation/interrogation */
-  LBP_IN =  8, /* inseparable */
-  LBP_NS =  9, /* non starter */
-  LBP_OP = 10, /* opening punctuation */
-  LBP_QU = 11, /* ambiguous quotation */
-  LBP_IS = 12, /* infix separator (numeric) */
-  LBP_NU = 13, /* numeric */
-  LBP_PO = 14, /* postfix (numeric) */
-  LBP_PR = 15, /* prefix (numeric) */
-  LBP_SY = 16, /* symbols allowing breaks */
-  LBP_AI = 29, /* ambiguous (alphabetic or ideograph) */
-  LBP_AL = 17, /* ordinary alphabetic and symbol characters */
-  LBP_H2 = 18, /* Hangul LV syllable */
-  LBP_H3 = 19, /* Hangul LVT syllable */
-  LBP_ID = 20, /* ideographic */
-  LBP_JL = 21, /* Hangul L Jamo */
-  LBP_JV = 22, /* Hangul V Jamo */
-  LBP_JT = 23, /* Hangul T Jamo */
-  LBP_SA = 30, /* complex context (South East Asian) */
-  LBP_XX = 31  /* unknown */
-};
-
-/* Returns the line breaking classification for ch, as a bit mask.  */
-static int
-get_lbp (unsigned int ch)
-{
-  int attr = 0;
-
-  if (unicode_attributes[ch].name != NULL)
-    {
-      /* mandatory break */
-      if (ch == 0x000A || ch == 0x000D || ch == 0x0085 /* newline */
-         || ch == 0x000C /* form feed */
-         || ch == 0x000B /* line tabulation */
-         || ch == 0x2028 /* LINE SEPARATOR */
-         || ch == 0x2029 /* PARAGRAPH SEPARATOR */)
-       attr |= 1 << LBP_BK;
-
-      if (ch == 0x2060 /* WORD JOINER */
-         || ch == 0xFEFF /* ZERO WIDTH NO-BREAK SPACE */)
-       attr |= 1 << LBP_WJ;
-
-      /* zero width space */
-      if (ch == 0x200B /* ZERO WIDTH SPACE */)
-       attr |= 1 << LBP_ZW;
-
-      /* non-breaking (glue) */
-      if (ch == 0x00A0 /* NO-BREAK SPACE */
-         || ch == 0x202F /* NARROW NO-BREAK SPACE */
-         || ch == 0x180E /* MONGOLIAN VOWEL SEPARATOR */
-         || ch == 0x034F /* COMBINING GRAPHEME JOINER */
-         || ch == 0x2007 /* FIGURE SPACE */
-         || ch == 0x2011 /* NON-BREAKING HYPHEN */
-         || ch == 0x0F08 /* TIBETAN MARK SBRUL SHAD */
-         || ch == 0x0F0C /* TIBETAN MARK DELIMITER TSHEG BSTAR */
-         || ch == 0x0F12 /* TIBETAN MARK RGYA GRAM SHAD */
-         || (ch >= 0x035C && ch <= 0x0362) /* COMBINING DOUBLE ... */)
-       attr |= 1 << LBP_GL;
-
-      /* space */
-      if (ch == 0x0020 /* SPACE */)
-       attr |= 1 << LBP_SP;
-
-      /* break opportunity before and after */
-      if (ch == 0x2014 /* EM DASH */)
-       attr |= 1 << LBP_B2;
-
-      /* break opportunity after */
-      if (ch == 0x1680 /* OGHAM SPACE MARK */
-         || ch == 0x2000 /* EN QUAD */
-         || ch == 0x2001 /* EM QUAD */
-         || ch == 0x2002 /* EN SPACE */
-         || ch == 0x2003 /* EM SPACE */
-         || ch == 0x2004 /* THREE-PER-EM SPACE */
-         || ch == 0x2005 /* FOUR-PER-EM SPACE */
-         || ch == 0x2006 /* SIX-PER-EM SPACE */
-         || ch == 0x2008 /* PUNCTUATION SPACE */
-         || ch == 0x2009 /* THIN SPACE */
-         || ch == 0x200A /* HAIR SPACE */
-         || ch == 0x205F /* MEDIUM MATHEMATICAL SPACE */
-         || ch == 0x0009 /* tab */
-         || ch == 0x00AD /* SOFT HYPHEN */
-         || ch == 0x058A /* ARMENIAN HYPHEN */
-         || ch == 0x2010 /* HYPHEN */
-         || ch == 0x2012 /* FIGURE DASH */
-         || ch == 0x2013 /* EN DASH */
-         || ch == 0x05BE /* HEBREW PUNCTUATION MAQAF */
-         || ch == 0x0F0B /* TIBETAN MARK INTERSYLLABIC TSHEG */
-         || ch == 0x1361 /* ETHIOPIC WORDSPACE */
-         || ch == 0x17D8 /* KHMER SIGN BEYYAL */
-         || ch == 0x17DA /* KHMER SIGN KOOMUUT */
-         || ch == 0x2027 /* HYPHENATION POINT */
-         || ch == 0x007C /* VERTICAL LINE */
-         || ch == 0x16EB /* RUNIC SINGLE PUNCTUATION */
-         || ch == 0x16EC /* RUNIC MULTIPLE PUNCTUATION */
-         || ch == 0x16ED /* RUNIC CROSS PUNCTUATION */
-         || ch == 0x2056 /* THREE DOT PUNCTUATION */
-         || ch == 0x2058 /* FOUR DOT PUNCTUATION */
-         || ch == 0x2059 /* FIVE DOT PUNCTUATION */
-         || ch == 0x205A /* TWO DOT PUNCTUATION */
-         || ch == 0x205B /* FOUR DOT MARK */
-         || ch == 0x205D /* TRICOLON */
-         || ch == 0x205E /* VERTICAL FOUR DOTS */
-         || ch == 0x2E19 /* PALM BRANCH */
-         || ch == 0x2E2A /* TWO DOTS OVER ONE DOT PUNCTUATION */
-         || ch == 0x2E2B /* ONE DOT OVER TWO DOTS PUNCTUATION */
-         || ch == 0x2E2C /* SQUARED FOUR DOT PUNCTUATION */
-         || ch == 0x2E2D /* FIVE DOT PUNCTUATION */
-         || ch == 0x2E30 /* RING POINT */
-         || ch == 0x10100 /* AEGEAN WORD SEPARATOR LINE */
-         || ch == 0x10101 /* AEGEAN WORD SEPARATOR DOT */
-         || ch == 0x10102 /* AEGEAN CHECK MARK */
-         || ch == 0x1039F /* UGARITIC WORD DIVIDER */
-         || ch == 0x103D0 /* OLD PERSIAN WORD DIVIDER */
-         || ch == 0x1091F /* PHOENICIAN WORD SEPARATOR */
-         || ch == 0x12470 /* CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER */
-         || ch == 0x0964 /* DEVANAGARI DANDA */
-         || ch == 0x0965 /* DEVANAGARI DOUBLE DANDA */
-         || ch == 0x0E5A /* THAI CHARACTER ANGKHANKHU */
-         || ch == 0x0E5B /* THAI CHARACTER KHOMUT */
-         || ch == 0x104A /* MYANMAR SIGN LITTLE SECTION */
-         || ch == 0x104B /* MYANMAR SIGN SECTION */
-         || ch == 0x1735 /* PHILIPPINE SINGLE PUNCTUATION */
-         || ch == 0x1736 /* PHILIPPINE DOUBLE PUNCTUATION */
-         || ch == 0x17D4 /* KHMER SIGN KHAN */
-         || ch == 0x17D5 /* KHMER SIGN BARIYOOSAN */
-         || ch == 0x1B5E /* BALINESE CARIK SIKI */
-         || ch == 0x1B5F /* BALINESE CARIK PAREREN */
-         || ch == 0xA8CE /* SAURASHTRA DANDA */
-         || ch == 0xA8CF /* SAURASHTRA DOUBLE DANDA */
-         || ch == 0xAA5D /* CHAM PUNCTUATION DANDA */
-         || ch == 0xAA5E /* CHAM PUNCTUATION DOUBLE DANDA */
-         || ch == 0xAA5F /* CHAM PUNCTUATION TRIPLE DANDA */
-         || ch == 0x10A56 /* KHAROSHTHI PUNCTUATION DANDA */
-         || ch == 0x10A57 /* KHAROSHTHI PUNCTUATION DOUBLE DANDA */
-         || ch == 0x0F34 /* TIBETAN MARK BSDUS RTAGS */
-         || ch == 0x0F7F /* TIBETAN SIGN RNAM BCAD */
-         || ch == 0x0F85 /* TIBETAN MARK PALUTA */
-         || ch == 0x0FBE /* TIBETAN KU RU KHA */
-         || ch == 0x0FBF /* TIBETAN KU RU KHA BZHI MIG CAN */
-         || ch == 0x0FD2 /* TIBETAN MARK NYIS TSHEG */
-#if !REVISION_22
-         || ch == 0x1802 /* MONGOLIAN COMMA */
-         || ch == 0x1803 /* MONGOLIAN FULL STOP */
-#endif
-         || ch == 0x1804 /* MONGOLIAN COLON */
-         || ch == 0x1805 /* MONGOLIAN FOUR DOTS */
-#if !REVISION_22
-         || ch == 0x1808 /* MONGOLIAN MANCHU COMMA */
-         || ch == 0x1809 /* MONGOLIAN MANCHU FULL STOP */
-#endif
-         || ch == 0x1B5A /* BALINESE PANTI */
-         || ch == 0x1B5B /* BALINESE PAMADA */
-         || ch == 0x1B5C /* BALINESE WINDU */
-         || ch == 0x1B5D /* BALINESE CARIK PAMUNGKAH */
-         || ch == 0x1B60 /* BALINESE PAMENENG */
-         || ch == 0x1C3B /* LEPCHA PUNCTUATION TA-ROL */
-         || ch == 0x1C3C /* LEPCHA PUNCTUATION NYET THYOOM TA-ROL */
-         || ch == 0x1C3D /* LEPCHA PUNCTUATION CER-WA */
-         || ch == 0x1C3E /* LEPCHA PUNCTUATION TSHOOK CER-WA */
-         || ch == 0x1C3F /* LEPCHA PUNCTUATION TSHOOK */
-         || ch == 0x1C7E /* OL CHIKI PUNCTUATION MUCAAD */
-         || ch == 0x1C7F /* OL CHIKI PUNCTUATION DOUBLE MUCAAD */
-#if !REVISION_22
-         || ch == 0x2CF9 /* COPTIC OLD NUBIAN FULL STOP */
-#endif
-         || ch == 0x2CFA /* COPTIC OLD NUBIAN DIRECT QUESTION MARK */
-         || ch == 0x2CFB /* COPTIC OLD NUBIAN INDIRECT QUESTION MARK */
-         || ch == 0x2CFC /* COPTIC OLD NUBIAN VERSE DIVIDER */
-#if !REVISION_22
-         || ch == 0x2CFE /* COPTIC FULL STOP */
-#endif
-         || ch == 0x2CFF /* COPTIC MORPHOLOGICAL DIVIDER */
-         || (ch >= 0x2E0E && ch <= 0x2E15) /* EDITORIAL CORONIS .. UPWARDS ANCORA */
-         || ch == 0x2E17 /* DOUBLE OBLIQUE HYPHEN */
-         || ch == 0xA60D /* VAI COMMA */
-         || ch == 0xA60F /* VAI QUESTION MARK */
-         || ch == 0xA92E /* KAYAH LI SIGN CWI */
-         || ch == 0xA92F /* KAYAH LI SIGN SHYA */
-         || ch == 0x10A50 /* KHAROSHTHI PUNCTUATION DOT */
-         || ch == 0x10A51 /* KHAROSHTHI PUNCTUATION SMALL CIRCLE */
-         || ch == 0x10A52 /* KHAROSHTHI PUNCTUATION CIRCLE */
-         || ch == 0x10A53 /* KHAROSHTHI PUNCTUATION CRESCENT BAR */
-         || ch == 0x10A54 /* KHAROSHTHI PUNCTUATION MANGALAM */
-         || ch == 0x10A55 /* KHAROSHTHI PUNCTUATION LOTUS */
-         /* Extra characters for compatibility with Unicode LineBreak.txt.  */
-#if !REVISION_22
-         || ch == 0x1A1E /* BUGINESE PALLAWA */
-#endif
-         || ch == 0x12471 /* CUNEIFORM PUNCTUATION SIGN VERTICAL COLON */
-         || ch == 0x12472 /* CUNEIFORM PUNCTUATION SIGN DIAGONAL COLON */
-         || ch == 0x12473 /* CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON */)
-       attr |= 1 << LBP_BA;
-
-      /* break opportunity before */
-      if (ch == 0x00B4 /* ACUTE ACCENT */
-#if REVISION_22
-         || ch == 0x1FFD /* GREEK OXIA */
-         || ch == 0x02DF /* MODIFIER LETTER CROSS ACCENT */
-#endif
-         || ch == 0x02C8 /* MODIFIER LETTER VERTICAL LINE */
-         || ch == 0x02CC /* MODIFIER LETTER LOW VERTICAL LINE */
-         || ch == 0x0F01 /* TIBETAN MARK GTER YIG MGO TRUNCATED A */
-         || ch == 0x0F02 /* TIBETAN MARK GTER YIG MGO -UM RNAM BCAD MA */
-         || ch == 0x0F03 /* TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA */
-         || ch == 0x0F04 /* TIBETAN MARK INITIAL YIG MGO MDUN MA */
-         || ch == 0x0F06 /* TIBETAN MARK CARET YIG MGO PHUR SHAD MA */
-         || ch == 0x0F07 /* TIBETAN MARK YIG MGO TSHEG SHAD MA */
-         || ch == 0x0F09 /* TIBETAN MARK BSKUR YIG MGO */
-         || ch == 0x0F0A /* TIBETAN MARK BKA- SHOG YIG MGO */
-         || ch == 0x0FD0 /* TIBETAN MARK BSKA- SHOG GI MGO RGYAN */
-         || ch == 0x0FD1 /* TIBETAN MARK MNYAM YIG GI MGO RGYAN */
-         || ch == 0x0FD3 /* TIBETAN MARK INITIAL BRDA RNYING YIG MGO MDUN MA */
-         || ch == 0xA874 /* PHAGS-PA SINGLE HEAD MARK */
-         || ch == 0xA875 /* PHAGS-PA DOUBLE HEAD MARK */
-         || ch == 0x1806 /* MONGOLIAN TODO SOFT HYPHEN */)
-       attr |= 1 << LBP_BB;
-
-      /* hyphen */
-      if (ch == 0x002D /* HYPHEN-MINUS */)
-       attr |= 1 << LBP_HY;
-
-      /* contingent break opportunity */
-      if (ch == 0xFFFC /* OBJECT REPLACEMENT CHARACTER */)
-       attr |= 1 << LBP_CB;
-
-      /* closing punctuation */
-      if ((unicode_attributes[ch].category[0] == 'P'
-          && unicode_attributes[ch].category[1] == 'e')
-         || ch == 0x3001 /* IDEOGRAPHIC COMMA */
-         || ch == 0x3002 /* IDEOGRAPHIC FULL STOP */
-         || ch == 0xFE11 /* PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC COMMA */
-         || ch == 0xFE12 /* PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC FULL STOP */
-         || ch == 0xFE50 /* SMALL COMMA */
-         || ch == 0xFE52 /* SMALL FULL STOP */
-         || ch == 0xFF0C /* FULLWIDTH COMMA */
-         || ch == 0xFF0E /* FULLWIDTH FULL STOP */
-         || ch == 0xFF61 /* HALFWIDTH IDEOGRAPHIC FULL STOP */
-         || ch == 0xFF64 /* HALFWIDTH IDEOGRAPHIC COMMA */)
-       attr |= 1 << LBP_CL;
-
-      /* exclamation/interrogation */
-      if (ch == 0x0021 /* EXCLAMATION MARK */
-         || ch == 0x003F /* QUESTION MARK */
-         || ch == 0x05C6 /* HEBREW PUNCTUATION NUN HAFUKHA */
-#if !REVISION_22
-         || ch == 0x060C /* ARABIC COMMA */
-#endif
-         || ch == 0x061B /* ARABIC SEMICOLON */
-         || ch == 0x061E /* ARABIC TRIPLE DOT PUNCTUATION MARK */
-         || ch == 0x061F /* ARABIC QUESTION MARK */
-#if !REVISION_22
-         || ch == 0x066A /* ARABIC PERCENT SIGN */
-#endif
-         || ch == 0x06D4 /* ARABIC FULL STOP */
-         || ch == 0x07F9 /* NKO EXCLAMATION MARK */
-         || ch == 0x0F0D /* TIBETAN MARK SHAD */
-         || ch == 0x0F0E /* TIBETAN MARK NYIS SHAD */
-         || ch == 0x0F0F /* TIBETAN MARK TSHEG SHAD */
-         || ch == 0x0F10 /* TIBETAN MARK NYIS TSHEG SHAD */
-         || ch == 0x0F11 /* TIBETAN MARK RIN CHEN SPUNGS SHAD */
-         || ch == 0x0F14 /* TIBETAN MARK GTER TSHEG */
-#if REVISION_22
-         || ch == 0x1802 /* MONGOLIAN COMMA */
-         || ch == 0x1803 /* MONGOLIAN FULL STOP */
-         || ch == 0x1808 /* MONGOLIAN MANCHU COMMA */
-         || ch == 0x1809 /* MONGOLIAN MANCHU FULL STOP */
-#endif
-         || ch == 0x1944 /* LIMBU EXCLAMATION MARK */
-         || ch == 0x1945 /* LIMBU QUESTION MARK */
-         || ch == 0x2762 /* HEAVY EXCLAMATION MARK ORNAMENT */
-         || ch == 0x2763 /* HEAVY HEART EXCLAMATION MARK ORNAMENT */
-#if REVISION_22
-         || ch == 0x2CF9 /* COPTIC OLD NUBIAN FULL STOP */
-         || ch == 0x2CFE /* COPTIC FULL STOP */
-#endif
-         || ch == 0x2E2E /* REVERSED QUESTION MARK */
-         || ch == 0xA60C /* VAI SYLLABLE LENGTHENER */
-         || ch == 0xA60E /* VAI FULL STOP */
-         || ch == 0xA876 /* PHAGS-PA MARK SHAD */
-         || ch == 0xA877 /* PHAGS-PA MARK DOUBLE SHAD */
-         || ch == 0xFE15 /* PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK */
-         || ch == 0xFE16 /* PRESENTATION FORM FOR VERTICAL QUESTION MARK */
-         || ch == 0xFE56 /* SMALL QUESTION MARK */
-         || ch == 0xFE57 /* SMALL EXCLAMATION MARK */
-         || ch == 0xFF01 /* FULLWIDTH EXCLAMATION MARK */
-         || ch == 0xFF1F /* FULLWIDTH QUESTION MARK */)
-       attr |= 1 << LBP_EX;
-
-      /* inseparable */
-      if (ch == 0x2024 /* ONE DOT LEADER */
-         || ch == 0x2025 /* TWO DOT LEADER */
-         || ch == 0x2026 /* HORIZONTAL ELLIPSIS */
-         || ch == 0xFE19 /* PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS */)
-       attr |= 1 << LBP_IN;
-
-      /* non starter */
-      if (ch == 0x17D6 /* KHMER SIGN CAMNUC PII KUUH */
-         || ch == 0x203C /* DOUBLE EXCLAMATION MARK */
-         || ch == 0x203D /* INTERROBANG */
-         || ch == 0x2047 /* DOUBLE QUESTION MARK */
-         || ch == 0x2048 /* QUESTION EXCLAMATION MARK */
-         || ch == 0x2049 /* EXCLAMATION QUESTION MARK */
-         || ch == 0x3005 /* IDEOGRAPHIC ITERATION MARK */
-         || ch == 0x301C /* WAVE DASH */
-         || ch == 0x303C /* MASU MARK */
-         || ch == 0x303B /* VERTICAL IDEOGRAPHIC ITERATION MARK */
-         || ch == 0x309B /* KATAKANA-HIRAGANA VOICED SOUND MARK */
-         || ch == 0x309C /* KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK */
-         || ch == 0x309D /* HIRAGANA ITERATION MARK */
-         || ch == 0x309E /* HIRAGANA VOICED ITERATION MARK */
-         || ch == 0x30A0 /* KATAKANA-HIRAGANA DOUBLE HYPHEN */
-         || ch == 0x30FB /* KATAKANA MIDDLE DOT */
-         || ch == 0x30FC /* KATAKANA-HIRAGANA PROLONGED SOUND MARK */
-         || ch == 0x30FD /* KATAKANA ITERATION MARK */
-         || ch == 0x30FE /* KATAKANA VOICED ITERATION MARK */
-         || ch == 0xA015 /* YI SYLLABLE WU */
-         || ch == 0xFE54 /* SMALL SEMICOLON */
-         || ch == 0xFE55 /* SMALL COLON */
-         || ch == 0xFF1A /* FULLWIDTH COLON */
-         || ch == 0xFF1B /* FULLWIDTH SEMICOLON */
-         || ch == 0xFF65 /* HALFWIDTH KATAKANA MIDDLE DOT */
-         || ch == 0xFF70 /* HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK */
-         || ch == 0xFF9E /* HALFWIDTH KATAKANA VOICED SOUND MARK */
-         || ch == 0xFF9F /* HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK */
-         || strstr (unicode_attributes[ch].name, "HIRAGANA LETTER SMALL ") != NULL
-         || strstr (unicode_attributes[ch].name, "KATAKANA LETTER SMALL ") != NULL)
-       attr |= 1 << LBP_NS;
-
-      /* opening punctuation */
-      if ((unicode_attributes[ch].category[0] == 'P'
-          && unicode_attributes[ch].category[1] == 's')
-#if REVISION_22
-         || ch == 0x00A1 /* INVERTED EXCLAMATION MARK */
-         || ch == 0x00BF /* INVERTED QUESTION MARK */
-#endif
-         || ch == 0x2E18 /* INVERTED INTERROBANG */)
-       attr |= 1 << LBP_OP;
-
-      /* ambiguous quotation */
-      if ((unicode_attributes[ch].category[0] == 'P'
-          && (unicode_attributes[ch].category[1] == 'f'
-              || unicode_attributes[ch].category[1] == 'i'))
-         || ch == 0x0022 /* QUOTATION MARK */
-         || ch == 0x0027 /* APOSTROPHE */
-         || ch == 0x275B /* HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT */
-         || ch == 0x275C /* HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT */
-         || ch == 0x275D /* HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT */
-         || ch == 0x275E /* HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT */
-         || ch == 0x2E00 /* RIGHT ANGLE SUBSTITUTION MARKER */
-         || ch == 0x2E01 /* RIGHT ANGLE DOTTED SUBSTITUTION MARKER */
-         || ch == 0x2E06 /* RAISED INTERPOLATION MARKER */
-         || ch == 0x2E07 /* RAISED DOTTED INTERPOLATION MARKER */
-         || ch == 0x2E08 /* DOTTED TRANSPOSITION MARKER */
-         || ch == 0x2E0B /* RAISED SQUARE */)
-       attr |= 1 << LBP_QU;
-
-      /* infix separator (numeric) */
-      if (ch == 0x002C /* COMMA */
-         || ch == 0x002E /* FULL STOP */
-         || ch == 0x003A /* COLON */
-         || ch == 0x003B /* SEMICOLON */
-         || ch == 0x037E /* GREEK QUESTION MARK */
-         || ch == 0x0589 /* ARMENIAN FULL STOP */
-#if REVISION_22
-         || ch == 0x060C /* ARABIC COMMA */
-#endif
-         || ch == 0x060D /* ARABIC DATE SEPARATOR */
-         || ch == 0x07F8 /* NKO COMMA */
-         || ch == 0x2044 /* FRACTION SLASH */
-         || ch == 0xFE10 /* PRESENTATION FORM FOR VERTICAL COMMA */
-         || ch == 0xFE13 /* PRESENTATION FORM FOR VERTICAL COLON */
-         || ch == 0xFE14 /* PRESENTATION FORM FOR VERTICAL SEMICOLON */)
-       attr |= 1 << LBP_IS;
-
-      /* numeric */
-      if ((unicode_attributes[ch].category[0] == 'N'
-          && unicode_attributes[ch].category[1] == 'd'
-          && strstr (unicode_attributes[ch].name, "FULLWIDTH") == NULL)
-         || ch == 0x066B /* ARABIC DECIMAL SEPARATOR */
-         || ch == 0x066C /* ARABIC THOUSANDS SEPARATOR */)
-       attr |= 1 << LBP_NU;
-
-      /* postfix (numeric) */
-      if (ch == 0x0025 /* PERCENT SIGN */
-         || ch == 0x00A2 /* CENT SIGN */
-         || ch == 0x00B0 /* DEGREE SIGN */
-         || ch == 0x060B /* AFGHANI SIGN */
-#if REVISION_22
-         || ch == 0x066A /* ARABIC PERCENT SIGN */
-#endif
-         || ch == 0x2030 /* PER MILLE SIGN */
-         || ch == 0x2031 /* PER TEN THOUSAND SIGN */
-         || ch == 0x2032 /* PRIME */
-         || ch == 0x2033 /* DOUBLE PRIME */
-         || ch == 0x2034 /* TRIPLE PRIME */
-         || ch == 0x2035 /* REVERSED PRIME */
-         || ch == 0x2036 /* REVERSED DOUBLE PRIME */
-         || ch == 0x2037 /* REVERSED TRIPLE PRIME */
-         || ch == 0x20A7 /* PESETA SIGN */
-         || ch == 0x2103 /* DEGREE CELSIUS */
-         || ch == 0x2109 /* DEGREE FAHRENHEIT */
-         || ch == 0xFDFC /* RIAL SIGN */
-         || ch == 0xFE6A /* SMALL PERCENT SIGN */
-         || ch == 0xFF05 /* FULLWIDTH PERCENT SIGN */
-         || ch == 0xFFE0 /* FULLWIDTH DIGIT ZERO */)
-       attr |= 1 << LBP_PO;
-
-      /* prefix (numeric) */
-      if ((unicode_attributes[ch].category[0] == 'S'
-          && unicode_attributes[ch].category[1] == 'c')
-         || ch == 0x002B /* PLUS SIGN */
-         || ch == 0x005C /* REVERSE SOLIDUS */
-         || ch == 0x00B1 /* PLUS-MINUS SIGN */
-         || ch == 0x2116 /* NUMERO SIGN */
-         || ch == 0x2212 /* MINUS SIGN */
-         || ch == 0x2213 /* MINUS-OR-PLUS SIGN */)
-       if (!(attr & (1 << LBP_PO)))
-         attr |= 1 << LBP_PR;
-
-      /* symbols allowing breaks */
-      if (ch == 0x002F /* SOLIDUS */)
-       attr |= 1 << LBP_SY;
-
-      if (ch >= 0xAC00 && ch <= 0xD7A3 && ((ch - 0xAC00) % 28) == 0)
-       attr |= 1 << LBP_H2;
-
-      if (ch >= 0xAC00 && ch <= 0xD7A3 && ((ch - 0xAC00) % 28) != 0)
-       attr |= 1 << LBP_H3;
-
-      if ((ch >= 0x1100 && ch <= 0x1159) || ch == 0x115F)
-       attr |= 1 << LBP_JL;
-
-      if (ch >= 0x1160 && ch <= 0x11A2)
-       attr |= 1 << LBP_JV;
-
-      if (ch >= 0x11A8 && ch <= 0x11F9)
-       attr |= 1 << LBP_JT;
-
-      /* complex context (South East Asian) */
-      if (((unicode_attributes[ch].category[0] == 'C'
-           && unicode_attributes[ch].category[1] == 'f')
-          || (unicode_attributes[ch].category[0] == 'L'
-              && (unicode_attributes[ch].category[1] == 'm'
-                  || unicode_attributes[ch].category[1] == 'o'))
-          || (unicode_attributes[ch].category[0] == 'M'
-              && (unicode_attributes[ch].category[1] == 'c'
-                  || unicode_attributes[ch].category[1] == 'n'))
-          /* Extra characters for compatibility with Unicode LineBreak.txt.  */
-          || ch == 0x19DE /* NEW TAI LUE SIGN LAE */
-          || ch == 0x19DF /* NEW TAI LUE SIGN LAEV */)
-         && ((ch >= 0x0E00 && ch <= 0x0EFF)
-             || (ch >= 0x1000 && ch <= 0x109F)
-             || (ch >= 0x1780 && ch <= 0x17FF)
-             || (ch >= 0x1950 && ch <= 0x19DF)))
-       attr |= 1 << LBP_SA;
-
-      /* attached characters and combining marks */
-      if ((unicode_attributes[ch].category[0] == 'M'
-          && (unicode_attributes[ch].category[1] == 'c'
-              || unicode_attributes[ch].category[1] == 'e'
-              || unicode_attributes[ch].category[1] == 'n'))
-         || (unicode_attributes[ch].category[0] == 'C'
-             && (unicode_attributes[ch].category[1] == 'c'
-                 || unicode_attributes[ch].category[1] == 'f')))
-       if (!(attr & ((1 << LBP_BK) | (1 << LBP_BA) | (1 << LBP_GL) | (1 << LBP_SA) | (1 << LBP_WJ) | (1 << LBP_ZW))))
-         attr |= 1 << LBP_CM;
-
-      /* ideographic */
-      if ((ch >= 0x2E80 && ch <= 0x2FFF) /* CJK RADICAL, KANGXI RADICAL, IDEOGRAPHIC DESCRIPTION */
-         || ch == 0x3000 /* IDEOGRAPHIC SPACE */
-         || (ch >= 0x3040 && ch <= 0x309F) /* HIRAGANA */
-         || (ch >= 0x30A0 && ch <= 0x30FF) /* KATAKANA */
-         || (ch >= 0x3400 && ch <= 0x4DB5) /* CJK Ideograph Extension A */
-         || (ch >= 0x4E00 && ch <= 0x9FBB) /* CJK Ideograph */
-         || (ch >= 0xF900 && ch <= 0xFAD9) /* CJK COMPATIBILITY IDEOGRAPH */
-         || (ch >= 0xA000 && ch <= 0xA48F) /* YI SYLLABLE */
-         || (ch >= 0xA490 && ch <= 0xA4CF) /* YI RADICAL */
-         || ch == 0xFE62 /* SMALL PLUS SIGN */
-         || ch == 0xFE63 /* SMALL HYPHEN-MINUS */
-         || ch == 0xFE64 /* SMALL LESS-THAN SIGN */
-         || ch == 0xFE65 /* SMALL GREATER-THAN SIGN */
-         || ch == 0xFE66 /* SMALL EQUALS SIGN */
-         || (ch >= 0xFF10 && ch <= 0xFF19) /* FULLWIDTH DIGIT */
-         || (ch >= 0x20000 && ch <= 0x2A6D6) /* CJK Ideograph Extension B */
-         || (ch >= 0x2F800 && ch <= 0x2FA1D) /* CJK COMPATIBILITY IDEOGRAPH */
-         || strstr (unicode_attributes[ch].name, "FULLWIDTH LATIN ") != NULL
-         || (ch >= 0x3000 && ch <= 0x33FF
-             && !(attr & ((1 << LBP_CM) | (1 << LBP_NS) | (1 << LBP_OP) | (1 << LBP_CL))))
-         /* Extra characters for compatibility with Unicode LineBreak.txt.  */
-         || ch == 0xFE30 /* PRESENTATION FORM FOR VERTICAL TWO DOT LEADER */
-         || ch == 0xFE31 /* PRESENTATION FORM FOR VERTICAL EM DASH */
-         || ch == 0xFE32 /* PRESENTATION FORM FOR VERTICAL EN DASH */
-         || ch == 0xFE33 /* PRESENTATION FORM FOR VERTICAL LOW LINE */
-         || ch == 0xFE34 /* PRESENTATION FORM FOR VERTICAL WAVY LOW LINE */
-         || ch == 0xFE45 /* SESAME DOT */
-         || ch == 0xFE46 /* WHITE SESAME DOT */
-         || ch == 0xFE49 /* DASHED OVERLINE */
-         || ch == 0xFE4A /* CENTRELINE OVERLINE */
-         || ch == 0xFE4B /* WAVY OVERLINE */
-         || ch == 0xFE4C /* DOUBLE WAVY OVERLINE */
-         || ch == 0xFE4D /* DASHED LOW LINE */
-         || ch == 0xFE4E /* CENTRELINE LOW LINE */
-         || ch == 0xFE4F /* WAVY LOW LINE */
-         || ch == 0xFE51 /* SMALL IDEOGRAPHIC COMMA */
-         || ch == 0xFE58 /* SMALL EM DASH */
-         || ch == 0xFE5F /* SMALL NUMBER SIGN */
-         || ch == 0xFE60 /* SMALL AMPERSAND */
-         || ch == 0xFE61 /* SMALL ASTERISK */
-         || ch == 0xFE68 /* SMALL REVERSE SOLIDUS */
-         || ch == 0xFE6B /* SMALL COMMERCIAL AT */
-         || ch == 0xFF02 /* FULLWIDTH QUOTATION MARK */
-         || ch == 0xFF03 /* FULLWIDTH NUMBER SIGN */
-         || ch == 0xFF06 /* FULLWIDTH AMPERSAND */
-         || ch == 0xFF07 /* FULLWIDTH APOSTROPHE */
-         || ch == 0xFF0A /* FULLWIDTH ASTERISK */
-         || ch == 0xFF0B /* FULLWIDTH PLUS SIGN */
-         || ch == 0xFF0D /* FULLWIDTH HYPHEN-MINUS */
-         || ch == 0xFF0F /* FULLWIDTH SOLIDUS */
-         || ch == 0xFF1C /* FULLWIDTH LESS-THAN SIGN */
-         || ch == 0xFF1D /* FULLWIDTH EQUALS SIGN */
-         || ch == 0xFF1E /* FULLWIDTH GREATER-THAN SIGN */
-         || ch == 0xFF20 /* FULLWIDTH COMMERCIAL AT */
-         || ch == 0xFF3C /* FULLWIDTH REVERSE SOLIDUS */
-         || ch == 0xFF3E /* FULLWIDTH CIRCUMFLEX ACCENT */
-         || ch == 0xFF3F /* FULLWIDTH LOW LINE */
-         || ch == 0xFF40 /* FULLWIDTH GRAVE ACCENT */
-         || ch == 0xFF5C /* FULLWIDTH VERTICAL LINE */
-         || ch == 0xFF5E /* FULLWIDTH TILDE */
-         || ch == 0xFFE2 /* FULLWIDTH NOT SIGN */
-         || ch == 0xFFE3 /* FULLWIDTH MACRON */
-         || ch == 0xFFE4 /* FULLWIDTH BROKEN BAR */)
-       if (!(attr & ((1 << LBP_NS) | (1 << LBP_CM))))
-         {
-           /* ambiguous (ideograph) ? */
-           if ((unicode_width[ch] != NULL
-                && unicode_width[ch][0] == 'A'
-                && ch >= 0x2000)
-               || ch == 0x24EA /* CIRCLED DIGIT ZERO */
-               || (ch >= 0x2780 && ch <= 0x2793) /* DINGBAT ... CIRCLED DIGIT ... */)
-             attr |= 1 << LBP_AI;
-           else
-             attr |= 1 << LBP_ID;
-         }
-
-      /* ordinary alphabetic and symbol characters */
-      if ((unicode_attributes[ch].category[0] == 'L'
-          && (unicode_attributes[ch].category[1] == 'u'
-              || unicode_attributes[ch].category[1] == 'l'
-              || unicode_attributes[ch].category[1] == 't'
-              || unicode_attributes[ch].category[1] == 'm'
-              || unicode_attributes[ch].category[1] == 'o'))
-         || (unicode_attributes[ch].category[0] == 'S'
-             && (unicode_attributes[ch].category[1] == 'm'
-                 || unicode_attributes[ch].category[1] == 'k'
-                 || unicode_attributes[ch].category[1] == 'o'))
-         || (unicode_attributes[ch].category[0] == 'N'
-             && (unicode_attributes[ch].category[1] == 'l'
-                 || unicode_attributes[ch].category[1] == 'o'))
-         || (unicode_attributes[ch].category[0] == 'P'
-             && (unicode_attributes[ch].category[1] == 'c'
-                 || unicode_attributes[ch].category[1] == 'd'
-                 || unicode_attributes[ch].category[1] == 'o'))
-         || ch == 0x0600 /* ARABIC NUMBER SIGN */
-         || ch == 0x0601 /* ARABIC SIGN SANAH */
-         || ch == 0x0602 /* ARABIC FOOTNOTE MARKER */
-         || ch == 0x0603 /* ARABIC SIGN SAFHA */
-         || ch == 0x06DD /* ARABIC END OF AYAH */
-         || ch == 0x070F /* SYRIAC ABBREVIATION MARK */
-         || ch == 0x2061 /* FUNCTION APPLICATION */
-         || ch == 0x2062 /* INVISIBLE TIMES */
-         || ch == 0x2063 /* INVISIBLE SEPARATOR */
-         || ch == 0x2064 /* INVISIBLE PLUS */)
-       if (!(attr & ((1 << LBP_GL) | (1 << LBP_B2) | (1 << LBP_BA) | (1 << LBP_BB) | (1 << LBP_HY) | (1 << LBP_CB) | (1 << LBP_CL) | (1 << LBP_EX) | (1 << LBP_IN) | (1 << LBP_NS) | (1 << LBP_OP) | (1 << LBP_QU) | (1 << LBP_IS) | (1 << LBP_NU) | (1 << LBP_PO) | (1 << LBP_PR) | (1 << LBP_SY) | (1 << LBP_H2) | (1 << LBP_H3) | (1 << LBP_JL) | (1 << LBP_JV) | (1 << LBP_JT) | (1 << LBP_SA) | (1 << LBP_ID))))
-         {
-           /* ambiguous (alphabetic) ? */
-           if ((unicode_width[ch] != NULL
-                && unicode_width[ch][0] == 'A'
-                && ch >= 0x2000
-                /* Extra exceptions for compatibility with Unicode LineBreak.txt.  */
-                && ch != 0x2022 /* BULLET */
-                && ch != 0x203E /* OVERLINE */
-                && ch != 0x2126 /* OHM SIGN */
-                && ch != 0x2153 /* VULGAR FRACTION ONE THIRD */
-                && ch != 0x215C /* VULGAR FRACTION THREE EIGHTHS */
-                && ch != 0x215D /* VULGAR FRACTION FIVE EIGHTHS */
-                && ch != 0x21B8 /* NORTH WEST ARROW TO LONG BAR */
-                && ch != 0x21B9 /* LEFTWARDS ARROW TO BAR OVER RIGHTWARDS ARROW TO BAR */
-                && ch != 0x21E7 /* UPWARDS WHITE ARROW */
-                && ch != 0x24FF /* NEGATIVE CIRCLED DIGIT ZERO */
-                && ch != 0x273D /* HEAVY TEARDROP-SPOKED ASTERISK */)
-#if !REVISION_22
-               || ch == 0x00A1 /* INVERTED EXCLAMATION MARK */
-               || ch == 0x00A7 /* SECTION SIGN */
-               || ch == 0x00A8 /* DIAERESIS */
-               || ch == 0x00AA /* FEMININE ORDINAL INDICATOR */
-               || ch == 0x00B2 /* SUPERSCRIPT TWO */
-               || ch == 0x00B3 /* SUPERSCRIPT THREE */
-               || ch == 0x00B6 /* PILCROW SIGN */
-               || ch == 0x00B7 /* MIDDLE DOT */
-               || ch == 0x00B8 /* CEDILLA */
-               || ch == 0x00B9 /* SUPERSCRIPT ONE */
-               || ch == 0x00BA /* MASCULINE ORDINAL INDICATOR */
-               || ch == 0x00BC /* VULGAR FRACTION ONE QUARTER */
-               || ch == 0x00BD /* VULGAR FRACTION ONE HALF */
-               || ch == 0x00BE /* VULGAR FRACTION THREE QUARTERS */
-               || ch == 0x00BF /* INVERTED QUESTION MARK */
-               || ch == 0x00D7 /* MULTIPLICATION SIGN */
-               || ch == 0x00F7 /* DIVISION SIGN */
-               || ch == 0x02C7 /* CARON */
-               || ch == 0x02C9 /* MODIFIER LETTER MACRON */
-               || ch == 0x02CA /* MODIFIER LETTER ACUTE ACCENT */
-               || ch == 0x02CB /* MODIFIER LETTER GRAVE ACCENT */
-               || ch == 0x02CD /* MODIFIER LETTER LOW MACRON */
-               || ch == 0x02D0 /* MODIFIER LETTER TRIANGULAR COLON */
-               || ch == 0x02D8 /* BREVE */
-               || ch == 0x02D9 /* DOT ABOVE */
-               || ch == 0x02DA /* RING ABOVE */
-               || ch == 0x02DB /* OGONEK */
-               || ch == 0x02DD /* DOUBLE ACUTE ACCENT */
-#endif
-               || ch == 0x24EA /* CIRCLED DIGIT ZERO */
-               || (ch >= 0x2780 && ch <= 0x2793) /* DINGBAT ... CIRCLED DIGIT ... */
-               /* Extra characters for compatibility with Unicode LineBreak.txt.  */
-               || ch == 0x2155 /* VULGAR FRACTION ONE FIFTH */
-               || ch == 0x2574 /* BOX DRAWINGS LIGHT LEFT */
-               || ch == 0x2616 /* WHITE SHOGI PIECE */
-               || ch == 0x2617 /* BLACK SHOGI PIECE */)
-             attr |= 1 << LBP_AI;
-           else
-             attr |= 1 << LBP_AL;
-           attr &= ~(1 << LBP_CM);
-         }
-    }
-
-  if (attr == 0)
-    /* unknown */
-    attr |= 1 << LBP_XX;
-
-  return attr;
-}
-
-/* Output the line breaking properties in a human readable format.  */
-static void
-debug_output_lbp (FILE *stream)
-{
-  unsigned int i;
-
-  for (i = 0; i < 0x110000; i++)
-    {
-      int attr = get_lbp (i);
-      if (attr != 1 << LBP_XX)
-       {
-         fprintf (stream, "0x%04X", i);
-#define PRINT_BIT(attr,bit) \
-  if (attr & (1 << bit)) fprintf (stream, " " #bit);
-         PRINT_BIT(attr,LBP_BK);
-         PRINT_BIT(attr,LBP_CM);
-         PRINT_BIT(attr,LBP_WJ);
-         PRINT_BIT(attr,LBP_ZW);
-         PRINT_BIT(attr,LBP_GL);
-         PRINT_BIT(attr,LBP_SP);
-         PRINT_BIT(attr,LBP_B2);
-         PRINT_BIT(attr,LBP_BA);
-         PRINT_BIT(attr,LBP_BB);
-         PRINT_BIT(attr,LBP_HY);
-         PRINT_BIT(attr,LBP_CB);
-         PRINT_BIT(attr,LBP_CL);
-         PRINT_BIT(attr,LBP_EX);
-         PRINT_BIT(attr,LBP_IN);
-         PRINT_BIT(attr,LBP_NS);
-         PRINT_BIT(attr,LBP_OP);
-         PRINT_BIT(attr,LBP_QU);
-         PRINT_BIT(attr,LBP_IS);
-         PRINT_BIT(attr,LBP_NU);
-         PRINT_BIT(attr,LBP_PO);
-         PRINT_BIT(attr,LBP_PR);
-         PRINT_BIT(attr,LBP_SY);
-         PRINT_BIT(attr,LBP_AI);
-         PRINT_BIT(attr,LBP_AL);
-         PRINT_BIT(attr,LBP_H2);
-         PRINT_BIT(attr,LBP_H3);
-         PRINT_BIT(attr,LBP_ID);
-         PRINT_BIT(attr,LBP_JL);
-         PRINT_BIT(attr,LBP_JV);
-         PRINT_BIT(attr,LBP_JT);
-         PRINT_BIT(attr,LBP_SA);
-         PRINT_BIT(attr,LBP_XX);
-#undef PRINT_BIT
-         fprintf (stream, "\n");
-       }
-    }
-}
-
-static void
-debug_output_tables (const char *filename)
-{
-  FILE *stream;
-
-  stream = fopen (filename, "w");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "cannot open '%s' for writing\n", filename);
-      exit (1);
-    }
-
-  debug_output_lbp (stream);
-
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error writing to '%s'\n", filename);
-      exit (1);
-    }
-}
-
-/* The line breaking property from the LineBreak.txt file.  */
-int unicode_org_lbp[0x110000];
-
-/* Stores in unicode_org_lbp[] the line breaking property from the
-   LineBreak.txt file.  */
-static void
-fill_org_lbp (const char *linebreak_filename)
-{
-  unsigned int i, j;
-  FILE *stream;
-  char field0[FIELDLEN];
-  char field1[FIELDLEN];
-  char field2[FIELDLEN];
-  int lineno = 0;
-
-  for (i = 0; i < 0x110000; i++)
-    unicode_org_lbp[i] = LBP_XX;
-
-  stream = fopen (linebreak_filename, "r");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "error during fopen of '%s'\n", linebreak_filename);
-      exit (1);
-    }
-
-  for (;;)
-    {
-      int n;
-      int c;
-      int value;
-
-      lineno++;
-      c = getc (stream);
-      if (c == EOF)
-       break;
-      if (c == '#')
-       {
-         do c = getc (stream); while (c != EOF && c != '\n');
-         continue;
-       }
-      ungetc (c, stream);
-      n = getfield (stream, field0, ';');
-      n += getfield (stream, field1, ' ');
-      n += getfield (stream, field2, '\n');
-      if (n == 0)
-       break;
-      if (n != 3)
-       {
-         fprintf (stderr, "short line in '%s':%d\n", linebreak_filename,
-                  lineno);
-         exit (1);
-       }
-#define TRY(bit) else if (strcmp (field1, #bit + 4) == 0) value = bit;
-      if (false) {}
-      TRY(LBP_BK)
-      TRY(LBP_CM)
-      TRY(LBP_WJ)
-      TRY(LBP_ZW)
-      TRY(LBP_GL)
-      TRY(LBP_SP)
-      TRY(LBP_B2)
-      TRY(LBP_BA)
-      TRY(LBP_BB)
-      TRY(LBP_HY)
-      TRY(LBP_CB)
-      TRY(LBP_CL)
-      TRY(LBP_EX)
-      TRY(LBP_IN)
-      TRY(LBP_NS)
-      TRY(LBP_OP)
-      TRY(LBP_QU)
-      TRY(LBP_IS)
-      TRY(LBP_NU)
-      TRY(LBP_PO)
-      TRY(LBP_PR)
-      TRY(LBP_SY)
-      TRY(LBP_AI)
-      TRY(LBP_AL)
-      TRY(LBP_H2)
-      TRY(LBP_H3)
-      TRY(LBP_ID)
-      TRY(LBP_JL)
-      TRY(LBP_JV)
-      TRY(LBP_JT)
-      TRY(LBP_SA)
-      TRY(LBP_XX)
-#undef TRY
-      else if (strcmp (field1, "LF") == 0) value = LBP_BK;
-      else if (strcmp (field1, "CR") == 0) value = LBP_BK;
-      else if (strcmp (field1, "NL") == 0) value = LBP_BK;
-      else if (strcmp (field1, "SG") == 0) value = LBP_XX;
-      else
-       {
-         fprintf (stderr, "unknown property value \"%s\" in '%s':%d\n",
-                  field1, linebreak_filename, lineno);
-         exit (1);
-       }
-      i = strtoul (field0, NULL, 16);
-      if (strstr (field0, "..") != NULL)
-       {
-         /* Deal with a range.  */
-         j = strtoul (strstr (field0, "..") + 2, NULL, 16);
-         for (; i <= j; i++)
-           unicode_org_lbp[i] = value;
-       }
-      else
-       {
-         /* Single character line.  */
-         unicode_org_lbp[i] = value;
-       }
-    }
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error reading from '%s'\n", linebreak_filename);
-      exit (1);
-    }
-}
-
-/* Output the line breaking properties in a human readable format.  */
-static void
-debug_output_org_lbp (FILE *stream)
-{
-  unsigned int i;
-
-  for (i = 0; i < 0x110000; i++)
-    {
-      int attr = unicode_org_lbp[i];
-      if (attr != LBP_XX)
-       {
-         fprintf (stream, "0x%04X", i);
-#define PRINT_BIT(attr,bit) \
-  if (attr == bit) fprintf (stream, " " #bit);
-         PRINT_BIT(attr,LBP_BK);
-         PRINT_BIT(attr,LBP_CM);
-         PRINT_BIT(attr,LBP_WJ);
-         PRINT_BIT(attr,LBP_ZW);
-         PRINT_BIT(attr,LBP_GL);
-         PRINT_BIT(attr,LBP_SP);
-         PRINT_BIT(attr,LBP_B2);
-         PRINT_BIT(attr,LBP_BA);
-         PRINT_BIT(attr,LBP_BB);
-         PRINT_BIT(attr,LBP_HY);
-         PRINT_BIT(attr,LBP_CB);
-         PRINT_BIT(attr,LBP_CL);
-         PRINT_BIT(attr,LBP_EX);
-         PRINT_BIT(attr,LBP_IN);
-         PRINT_BIT(attr,LBP_NS);
-         PRINT_BIT(attr,LBP_OP);
-         PRINT_BIT(attr,LBP_QU);
-         PRINT_BIT(attr,LBP_IS);
-         PRINT_BIT(attr,LBP_NU);
-         PRINT_BIT(attr,LBP_PO);
-         PRINT_BIT(attr,LBP_PR);
-         PRINT_BIT(attr,LBP_SY);
-         PRINT_BIT(attr,LBP_AI);
-         PRINT_BIT(attr,LBP_AL);
-         PRINT_BIT(attr,LBP_H2);
-         PRINT_BIT(attr,LBP_H3);
-         PRINT_BIT(attr,LBP_ID);
-         PRINT_BIT(attr,LBP_JL);
-         PRINT_BIT(attr,LBP_JV);
-         PRINT_BIT(attr,LBP_JT);
-         PRINT_BIT(attr,LBP_SA);
-         PRINT_BIT(attr,LBP_XX);
-#undef PRINT_BIT
-         fprintf (stream, "\n");
-       }
-    }
-}
-
-static void
-debug_output_org_tables (const char *filename)
-{
-  FILE *stream;
-
-  stream = fopen (filename, "w");
-  if (stream == NULL)
-    {
-      fprintf (stderr, "cannot open '%s' for writing\n", filename);
-      exit (1);
-    }
-
-  debug_output_org_lbp (stream);
-
-  if (ferror (stream) || fclose (stream))
-    {
-      fprintf (stderr, "error writing to '%s'\n", filename);
-      exit (1);
-    }
-}
-
-/* Construction of sparse 3-level tables.  */
-#define TABLE lbp_table
-#define ELEMENT unsigned char
-#define DEFAULT LBP_XX
-#define xmalloc malloc
-#define xrealloc realloc
-#include "3level.h"
-
-static void
-output_lbp (FILE *stream1, FILE *stream2)
-{
-  unsigned int i;
-  struct lbp_table t;
-  unsigned int level1_offset, level2_offset, level3_offset;
-
-  t.p = 7;
-  t.q = 9;
-  lbp_table_init (&t);
-
-  for (i = 0; i < 0x110000; i++)
-    {
-      int attr = get_lbp (i);
-
-      /* Now attr should contain exactly one bit.  */
-      if (attr == 0 || ((attr & (attr - 1)) != 0))
-       abort ();
-
-      if (attr != 1 << LBP_XX)
-       {
-         unsigned int log2_attr;
-         for (log2_attr = 0; attr > 1; attr >>= 1, log2_attr++);
-
-         lbp_table_add (&t, i, log2_attr);
-       }
-    }
-
-  lbp_table_finalize (&t);
-
-  level1_offset =
-    5 * sizeof (uint32_t);
-  level2_offset =
-    5 * sizeof (uint32_t)
-    + t.level1_size * sizeof (uint32_t);
-  level3_offset =
-    5 * sizeof (uint32_t)
-    + t.level1_size * sizeof (uint32_t)
-    + (t.level2_size << t.q) * sizeof (uint32_t);
-
-  for (i = 0; i < 5; i++)
-    fprintf (stream1, "#define lbrkprop_header_%d %d\n", i,
-            ((uint32_t *) t.result)[i]);
-  fprintf (stream1, "\n");
-  fprintf (stream1, "typedef struct\n");
-  fprintf (stream1, "  {\n");
-  fprintf (stream1, "    int level1[%d];\n", t.level1_size);
-  fprintf (stream1, "    int level2[%d << %d];\n", t.level2_size, t.q);
-  fprintf (stream1, "    unsigned char level3[%d << %d];\n", t.level3_size, t.p);
-  fprintf (stream1, "  }\n");
-  fprintf (stream1, "lbrkprop_t;\n");
-  fprintf (stream1, "extern const lbrkprop_t unilbrkprop;\n");
-
-  fprintf (stream2, "const lbrkprop_t unilbrkprop =\n");
-  fprintf (stream2, "{\n");
-  fprintf (stream2, "  {");
-  if (t.level1_size > 8)
-    fprintf (stream2, "\n   ");
-  for (i = 0; i < t.level1_size; i++)
-    {
-      uint32_t offset;
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream2, "\n   ");
-      offset = ((uint32_t *) (t.result + level1_offset))[i];
-      fprintf (stream2, " %5d%s",
-              offset == 0 ? -1 : (offset - level2_offset) / sizeof (uint32_t),
-              (i+1 < t.level1_size ? "," : ""));
-    }
-  if (t.level1_size > 8)
-    fprintf (stream2, "\n ");
-  fprintf (stream2, " },\n");
-  fprintf (stream2, "  {");
-  if (t.level2_size << t.q > 8)
-    fprintf (stream2, "\n   ");
-  for (i = 0; i < t.level2_size << t.q; i++)
-    {
-      uint32_t offset;
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream2, "\n   ");
-      offset = ((uint32_t *) (t.result + level2_offset))[i];
-      fprintf (stream2, " %5d%s",
-              offset == 0 ? -1 : (offset - level3_offset) / sizeof (uint8_t),
-              (i+1 < t.level2_size << t.q ? "," : ""));
-    }
-  if (t.level2_size << t.q > 8)
-    fprintf (stream2, "\n ");
-  fprintf (stream2, " },\n");
-  fprintf (stream2, "  {");
-  if (t.level3_size << t.p > 8)
-    fprintf (stream2, "\n   ");
-  for (i = 0; i < t.level3_size << t.p; i++)
-    {
-      unsigned char value = ((unsigned char *) (t.result + level3_offset))[i];
-      const char *value_string;
-      switch (value)
-       {
-#define CASE(x) case x: value_string = #x; break;
-         CASE(LBP_BK);
-         CASE(LBP_CM);
-         CASE(LBP_WJ);
-         CASE(LBP_ZW);
-         CASE(LBP_GL);
-         CASE(LBP_SP);
-         CASE(LBP_B2);
-         CASE(LBP_BA);
-         CASE(LBP_BB);
-         CASE(LBP_HY);
-         CASE(LBP_CB);
-         CASE(LBP_CL);
-         CASE(LBP_EX);
-         CASE(LBP_IN);
-         CASE(LBP_NS);
-         CASE(LBP_OP);
-         CASE(LBP_QU);
-         CASE(LBP_IS);
-         CASE(LBP_NU);
-         CASE(LBP_PO);
-         CASE(LBP_PR);
-         CASE(LBP_SY);
-         CASE(LBP_AI);
-         CASE(LBP_AL);
-         CASE(LBP_H2);
-         CASE(LBP_H3);
-         CASE(LBP_ID);
-         CASE(LBP_JL);
-         CASE(LBP_JV);
-         CASE(LBP_JT);
-         CASE(LBP_SA);
-         CASE(LBP_XX);
-#undef CASE
-         default:
-           abort ();
-       }
-      if (i > 0 && (i % 8) == 0)
-       fprintf (stream2, "\n   ");
-      fprintf (stream2, " %s%s", value_string,
-              (i+1 < t.level3_size << t.p ? "," : ""));
-    }
-  if (t.level3_size << t.p > 8)
-    fprintf (stream2, "\n ");
-  fprintf (stream2, " }\n");
-  fprintf (stream2, "};\n");
-}
-
-static void
-output_tables (const char *filename1, const char *filename2, const char *version)
-{
-  const char *filenames[2];
-  FILE *streams[2];
-  size_t i;
-
-  filenames[0] = filename1;
-  filenames[1] = filename2;
-
-  for (i = 0; i < 2; i++)
-    {
-      streams[i] = fopen (filenames[i], "w");
-      if (streams[i] == NULL)
-       {
-         fprintf (stderr, "cannot open '%s' for writing\n", filenames[i]);
-         exit (1);
-       }
-    }
-
-  for (i = 0; i < 2; i++)
-    {
-      FILE *stream = streams[i];
-
-      fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
-      fprintf (stream, "/* Line breaking properties of Unicode characters.  */\n");
-      fprintf (stream, "/* Generated automatically by gen-lbrk for Unicode %s.  */\n",
-              version);
-      fprintf (stream, "\n");
-
-      /* Put a GPL header on it.  The gnulib module is under LGPL (although it
-        still carries the GPL header), and it's gnulib-tool which replaces the
-        GPL header with an LGPL header.  */
-      fprintf (stream, "/* Copyright (C) 2000-2002, 2004, 2008 Free Software Foundation, Inc.\n");
-      fprintf (stream, "\n");
-      fprintf (stream, "   This program is free software: you can redistribute it and/or modify\n");
-      fprintf (stream, "   it under the terms of the GNU General Public License as published by\n");
-      fprintf (stream, "   the Free Software Foundation; either version 3 of the License, or\n");
-      fprintf (stream, "   (at your option) any later version.\n");
-      fprintf (stream, "\n");
-      fprintf (stream, "   This program is distributed in the hope that it will be useful,\n");
-      fprintf (stream, "   but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
-      fprintf (stream, "   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n");
-      fprintf (stream, "   GNU General Public License for more details.\n");
-      fprintf (stream, "\n");
-      fprintf (stream, "   You should have received a copy of the GNU General Public License\n");
-      fprintf (stream, "   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */\n");
-      fprintf (stream, "\n");
-    }
-
-  output_lbp (streams[0], streams[1]);
-
-  for (i = 0; i < 2; i++)
-    {
-      if (ferror (streams[i]) || fclose (streams[i]))
-       {
-         fprintf (stderr, "error writing to '%s'\n", filenames[i]);
-         exit (1);
-       }
-    }
-}
-
-int
-main (int argc, char * argv[])
-{
-  if (argc != 5)
-    {
-      fprintf (stderr, "Usage: %s UnicodeData.txt EastAsianWidth.txt LineBreak.txt version\n",
-              argv[0]);
-      exit (1);
-    }
-
-  fill_attributes (argv[1]);
-  fill_width (argv[2]);
-  fill_org_lbp (argv[3]);
-
-  debug_output_tables ("lbrkprop.txt");
-  debug_output_org_tables ("lbrkprop_org.txt");
-
-  output_tables ("lbrkprop1.h", "lbrkprop2.h", argv[4]);
-
-  return 0;
-}
-
-/*
- * For Emacs M-x compile
- * Local Variables:
- * compile-command: "
-   gcc -O -Wall -I../unictype gen-lbrk.c -o gen-lbrk && \
-   ./gen-lbrk \
-        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/UnicodeData.txt \
-        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/EastAsianWidth.txt \
-        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/LineBreak.txt \
-        5.0.0
-   "
- * End:
- */
diff --git a/modules/gen-uni-tables b/modules/gen-uni-tables

new file mode 100644 (file)

index 0000000..11f4c94
--- /dev/null
+++ b/modules/gen-uni-tables
@@ -0,0 +1,24 @@
+Description:
+Generates the tables in lib/unictype/* and lib/unilbrk/*.
+
+Files:
+lib/gen-uni-tables.c
+lib/unictype/3level.h
+lib/unictype/3levelbit.h
+
+Depends-on:
+memcmp
+strdup
+
+configure.ac:
+
+Makefile.am:
+
+Include:
+
+License:
+GPLed build tool
+
+Maintainer:
+Bruno Haible
+
diff --git a/modules/unictype/gen-ctype b/modules/unictype/gen-ctype

deleted file mode 100644 (file)

index b13c204..0000000
--- a/modules/unictype/gen-ctype
+++ /dev/null
@@ -1,24 +0,0 @@
-Description:
-Generates the tables in lib/unictype/*.
-
-Files:
-lib/unictype/gen-ctype.c
-lib/unictype/3level.h
-lib/unictype/3levelbit.h
-
-Depends-on:
-memcmp
-strdup
-
-configure.ac:
-
-Makefile.am:
-
-Include:
-
-License:
-GPLed build tool
-
-Maintainer:
-Bruno Haible
-
diff --git a/modules/unilbrk/gen-lbrk b/modules/unilbrk/gen-lbrk

deleted file mode 100644 (file)

index caa4086..0000000
--- a/modules/unilbrk/gen-lbrk
+++ /dev/null
@@ -1,23 +0,0 @@
-Description:
-Generates the tables in lib/unilbrk/*.
-
-Files:
-lib/unilbrk/gen-lbrk.c
-lib/unictype/3level.h
-
-Depends-on:
-memcmp
-strdup
-
-configure.ac:
-
-Makefile.am:
-
-Include:
-
-License:
-GPLed build tool
-
-Maintainer:
-Bruno Haible
-
author	Bruno Haible <bruno@clisp.org>
	Sun, 8 Feb 2009 02:02:06 +0000 (03:02 +0100)
committer	Bruno Haible <bruno@clisp.org>
	Sun, 8 Feb 2009 02:02:06 +0000 (03:02 +0100)
ChangeLog		patch \| blob \| history
lib/gen-uni-tables.c	[new file with mode: 0644]	patch \| blob
lib/unictype/gen-ctype.c	[deleted file]	patch \| blob \| history
lib/unilbrk/gen-lbrk.c	[deleted file]	patch \| blob \| history
modules/gen-uni-tables	[new file with mode: 0644]	patch \| blob
modules/unictype/gen-ctype	[deleted file]	patch \| blob \| history
modules/unilbrk/gen-lbrk	[deleted file]	patch \| blob \| history