2011-03-26 Bruno Haible <bruno@clisp.org>
+ unictype/bidiclass-byname: Recognize long names as well.
+ * lib/unictype.in.h (uc_bidi_class_byname): Allow argument to be a long
+ name.
+ * lib/unictype/bidi_byname.c: Include <string.h>,
+ unictype/bidi_byname.h.
+ (uc_bidi_class_byname): Use uc_bidi_class_lookup.
+ * lib/unictype/bidi_byname.gperf: New file.
+ * modules/unictype/bidiclass-byname (Files): Add
+ lib/unictype/bidi_byname.gperf.
+ (Depends-on): Add gperf.
+ (Makefile.am): Add rule for generating unictype/bidi_byname.h.
+ * tests/unictype/test-bidi_byname.c (main): Test the recognition of
+ long names.
+
Tests for module 'unictype/bidiclass-longname'.
* modules/unictype/bidiclass-longname-tests: New file.
* tests/unictype/test-bidi_longname.c: New file.
extern const char *
uc_bidi_class_long_name (int bidi_class);
-/* Return the bidi class given by name, e.g. "LRE". */
+/* Return the bidi class given by name, e.g. "LRE", or by long name, e.g.
+ "Left-to-Right Embedding". */
extern int
uc_bidi_class_byname (const char *bidi_class_name);
/* Same; obsolete function name. */
/* Specification. */
#include "unictype.h"
+#include <string.h>
+
+#include "unictype/bidi_byname.h"
+
int
uc_bidi_class_byname (const char *bidi_class_name)
{
- switch (bidi_class_name[0])
+ size_t len;
+
+ len = strlen (bidi_class_name);
+ if (len <= MAX_WORD_LENGTH)
{
- case 'A':
- switch (bidi_class_name[1])
- {
- case 'L':
- if (bidi_class_name[2] == '\0')
- return UC_BIDI_AL;
- break;
- case 'N':
- if (bidi_class_name[2] == '\0')
- return UC_BIDI_AN;
- break;
- }
- break;
- case 'B':
- switch (bidi_class_name[1])
- {
- case '\0':
- return UC_BIDI_B;
- case 'N':
- if (bidi_class_name[2] == '\0')
- return UC_BIDI_BN;
- break;
- }
- break;
- case 'C':
- switch (bidi_class_name[1])
- {
- case 'S':
- if (bidi_class_name[2] == '\0')
- return UC_BIDI_CS;
- break;
- }
- break;
- case 'E':
- switch (bidi_class_name[1])
- {
- case 'N':
- if (bidi_class_name[2] == '\0')
- return UC_BIDI_EN;
- break;
- case 'S':
- if (bidi_class_name[2] == '\0')
- return UC_BIDI_ES;
- break;
- case 'T':
- if (bidi_class_name[2] == '\0')
- return UC_BIDI_ET;
- break;
- }
- break;
- case 'L':
- switch (bidi_class_name[1])
- {
- case '\0':
- return UC_BIDI_L;
- case 'R':
- switch (bidi_class_name[2])
- {
- case 'E':
- if (bidi_class_name[3] == '\0')
- return UC_BIDI_LRE;
- break;
- case 'O':
- if (bidi_class_name[3] == '\0')
- return UC_BIDI_LRO;
- break;
- }
- break;
- }
- break;
- case 'N':
- switch (bidi_class_name[1])
- {
- case 'S':
- switch (bidi_class_name[2])
- {
- case 'M':
- if (bidi_class_name[3] == '\0')
- return UC_BIDI_NSM;
- break;
- }
- break;
- }
- break;
- case 'O':
- switch (bidi_class_name[1])
- {
- case 'N':
- if (bidi_class_name[2] == '\0')
- return UC_BIDI_ON;
- break;
- }
- break;
- case 'P':
- switch (bidi_class_name[1])
- {
- case 'D':
- switch (bidi_class_name[2])
- {
- case 'F':
- if (bidi_class_name[3] == '\0')
- return UC_BIDI_PDF;
- break;
- }
- break;
- }
- break;
- case 'R':
- switch (bidi_class_name[1])
- {
- case '\0':
- return UC_BIDI_R;
- case 'L':
- switch (bidi_class_name[2])
- {
- case 'E':
- if (bidi_class_name[3] == '\0')
- return UC_BIDI_RLE;
- break;
- case 'O':
- if (bidi_class_name[3] == '\0')
- return UC_BIDI_RLO;
+ char buf[MAX_WORD_LENGTH + 1];
+ const struct named_bidi_class *found;
+
+ /* Copy bidi_class_name into buf, converting '_' and '-' to ' '. */
+ {
+ const char *p = bidi_class_name;
+ char *q = buf;
+
+ for (;; p++, q++)
+ {
+ char c = *p;
+
+ if (c == '_' || c == '-')
+ c = ' ';
+ *q = c;
+ if (c == '\0')
break;
- }
- break;
- }
- break;
- case 'S':
- if (bidi_class_name[1] == '\0')
- return UC_BIDI_S;
- break;
- case 'W':
- switch (bidi_class_name[1])
- {
- case 'S':
- if (bidi_class_name[2] == '\0')
- return UC_BIDI_WS;
- break;
- }
- break;
+ }
+ }
+ /* Here q == buf + len. */
+
+ /* Do a hash table lookup, with case-insensitive comparison. */
+ found = uc_bidi_class_lookup (buf, len);
+ if (found != NULL)
+ return found->bidi_class;
}
/* Invalid bidi class name. */
return -1;
--- /dev/null
+/* Bidi classes of Unicode characters. */
+struct named_bidi_class { int name; int bidi_class; };
+%struct-type
+%ignore-case
+%language=ANSI-C
+%define hash-function-name bidi_class_hash
+%define lookup-function-name uc_bidi_class_lookup
+%readonly-tables
+%global-table
+%define word-array-name bidi_class_names
+%pic
+%define string-pool-name bidi_class_stringpool
+%%
+AL, UC_BIDI_AL
+AN, UC_BIDI_AN
+B, UC_BIDI_B
+BN, UC_BIDI_BN
+CS, UC_BIDI_CS
+EN, UC_BIDI_EN
+ES, UC_BIDI_ES
+ET, UC_BIDI_ET
+L, UC_BIDI_L
+LRE, UC_BIDI_LRE
+LRO, UC_BIDI_LRO
+NSM, UC_BIDI_NSM
+ON, UC_BIDI_ON
+PDF, UC_BIDI_PDF
+R, UC_BIDI_R
+RLE, UC_BIDI_RLE
+RLO, UC_BIDI_RLO
+S, UC_BIDI_S
+WS, UC_BIDI_WS
+Arabic Letter, UC_BIDI_AL
+ArabicLetter, UC_BIDI_AL
+Arabic Number, UC_BIDI_AN
+ArabicNumber, UC_BIDI_AN
+Paragraph Separator, UC_BIDI_B
+ParagraphSeparator, UC_BIDI_B
+Boundary Neutral, UC_BIDI_BN
+BoundaryNeutral, UC_BIDI_BN
+Common Separator, UC_BIDI_CS
+CommonSeparator, UC_BIDI_CS
+European Number, UC_BIDI_EN
+EuropeanNumber, UC_BIDI_EN
+European Separator, UC_BIDI_ES
+EuropeanSeparator, UC_BIDI_ES
+European Terminator, UC_BIDI_ET
+EuropeanTerminator, UC_BIDI_ET
+Left To Right, UC_BIDI_L
+LeftToRight, UC_BIDI_L
+Left To Right Embedding, UC_BIDI_LRE
+LeftToRightEmbedding, UC_BIDI_LRE
+Left To Right Override, UC_BIDI_LRO
+LeftToRightOverride, UC_BIDI_LRO
+Nonspacing Mark, UC_BIDI_NSM
+NonspacingMark, UC_BIDI_NSM
+Other Neutral, UC_BIDI_ON
+OtherNeutral, UC_BIDI_ON
+Pop Directional Format, UC_BIDI_PDF
+PopDirectionalFormat, UC_BIDI_PDF
+Right To Left, UC_BIDI_R
+RightToLeft, UC_BIDI_R
+Right To Left Embedding, UC_BIDI_RLE
+RightToLeftEmbedding, UC_BIDI_RLE
+Right To Left Override, UC_BIDI_RLO
+RightToLeftOverride, UC_BIDI_RLO
+Segment Separator, UC_BIDI_S
+SegmentSeparator, UC_BIDI_S
+White Space, UC_BIDI_WS
+WhiteSpace, UC_BIDI_WS
Files:
lib/unictype/bidi_byname.c
+lib/unictype/bidi_byname.gperf
Depends-on:
unictype/base
+gperf
configure.ac:
gl_LIBUNISTRING_MODULE([0.9.4], [unictype/bidiclass-byname])
lib_SOURCES += unictype/bidi_byname.c
endif
+unictype/bidi_byname.h: unictype/bidi_byname.gperf
+ $(GPERF) -m 10 $(srcdir)/unictype/bidi_byname.gperf > $(srcdir)/unictype/bidi_byname.h-t
+ mv $(srcdir)/unictype/bidi_byname.h-t $(srcdir)/unictype/bidi_byname.h
+BUILT_SOURCES += unictype/bidi_byname.h
+MOSTLYCLEANFILES += unictype/bidi_byname.h-t
+MAINTAINERCLEANFILES += unictype/bidi_byname.h
+EXTRA_DIST += unictype/bidi_byname.h
+
Include:
"unictype.h"
ASSERT (uc_bidi_class_byname ("S") == UC_BIDI_S);
ASSERT (uc_bidi_class_byname ("WS") == UC_BIDI_WS);
ASSERT (uc_bidi_class_byname ("ON") == UC_BIDI_ON);
+
+ ASSERT (uc_bidi_class_byname ("ARABIC LETTER") == UC_BIDI_AL);
+ ASSERT (uc_bidi_class_byname ("Arabic Letter") == UC_BIDI_AL);
+ ASSERT (uc_bidi_class_byname ("Arabic_Letter") == UC_BIDI_AL);
+ ASSERT (uc_bidi_class_byname ("ArabicLetter") == UC_BIDI_AL);
+ ASSERT (uc_bidi_class_byname ("ARABIC NUMBER") == UC_BIDI_AN);
+ ASSERT (uc_bidi_class_byname ("Arabic Number") == UC_BIDI_AN);
+ ASSERT (uc_bidi_class_byname ("Arabic_Number") == UC_BIDI_AN);
+ ASSERT (uc_bidi_class_byname ("ArabicNumber") == UC_BIDI_AN);
+ ASSERT (uc_bidi_class_byname ("PARAGRAPH SEPARATOR") == UC_BIDI_B);
+ ASSERT (uc_bidi_class_byname ("Paragraph Separator") == UC_BIDI_B);
+ ASSERT (uc_bidi_class_byname ("Paragraph_Separator") == UC_BIDI_B);
+ ASSERT (uc_bidi_class_byname ("ParagraphSeparator") == UC_BIDI_B);
+ ASSERT (uc_bidi_class_byname ("BOUNDARY NEUTRAL") == UC_BIDI_BN);
+ ASSERT (uc_bidi_class_byname ("Boundary Neutral") == UC_BIDI_BN);
+ ASSERT (uc_bidi_class_byname ("Boundary_Neutral") == UC_BIDI_BN);
+ ASSERT (uc_bidi_class_byname ("BoundaryNeutral") == UC_BIDI_BN);
+ ASSERT (uc_bidi_class_byname ("COMMON SEPARATOR") == UC_BIDI_CS);
+ ASSERT (uc_bidi_class_byname ("Common Separator") == UC_BIDI_CS);
+ ASSERT (uc_bidi_class_byname ("Common_Separator") == UC_BIDI_CS);
+ ASSERT (uc_bidi_class_byname ("CommonSeparator") == UC_BIDI_CS);
+ ASSERT (uc_bidi_class_byname ("EUROPEAN NUMBER") == UC_BIDI_EN);
+ ASSERT (uc_bidi_class_byname ("European Number") == UC_BIDI_EN);
+ ASSERT (uc_bidi_class_byname ("European_Number") == UC_BIDI_EN);
+ ASSERT (uc_bidi_class_byname ("EuropeanNumber") == UC_BIDI_EN);
+ ASSERT (uc_bidi_class_byname ("EUROPEAN SEPARATOR") == UC_BIDI_ES);
+ ASSERT (uc_bidi_class_byname ("European Separator") == UC_BIDI_ES);
+ ASSERT (uc_bidi_class_byname ("European_Separator") == UC_BIDI_ES);
+ ASSERT (uc_bidi_class_byname ("EuropeanSeparator") == UC_BIDI_ES);
+ ASSERT (uc_bidi_class_byname ("EUROPEAN TERMINATOR") == UC_BIDI_ET);
+ ASSERT (uc_bidi_class_byname ("European Terminator") == UC_BIDI_ET);
+ ASSERT (uc_bidi_class_byname ("European_Terminator") == UC_BIDI_ET);
+ ASSERT (uc_bidi_class_byname ("EuropeanTerminator") == UC_BIDI_ET);
+ ASSERT (uc_bidi_class_byname ("LEFT TO RIGHT") == UC_BIDI_L);
+ ASSERT (uc_bidi_class_byname ("Left To Right") == UC_BIDI_L);
+ ASSERT (uc_bidi_class_byname ("Left_To_Right") == UC_BIDI_L);
+ ASSERT (uc_bidi_class_byname ("LeftToRight") == UC_BIDI_L);
+ ASSERT (uc_bidi_class_byname ("LEFT TO RIGHT EMBEDDING") == UC_BIDI_LRE);
+ ASSERT (uc_bidi_class_byname ("Left To Right Embedding") == UC_BIDI_LRE);
+ ASSERT (uc_bidi_class_byname ("Left_To_Right_Embedding") == UC_BIDI_LRE);
+ ASSERT (uc_bidi_class_byname ("LeftToRightEmbedding") == UC_BIDI_LRE);
+ ASSERT (uc_bidi_class_byname ("LEFT TO RIGHT OVERRIDE") == UC_BIDI_LRO);
+ ASSERT (uc_bidi_class_byname ("Left To Right Override") == UC_BIDI_LRO);
+ ASSERT (uc_bidi_class_byname ("Left_To_Right_Override") == UC_BIDI_LRO);
+ ASSERT (uc_bidi_class_byname ("LeftToRightOverride") == UC_BIDI_LRO);
+ ASSERT (uc_bidi_class_byname ("NONSPACING MARK") == UC_BIDI_NSM);
+ ASSERT (uc_bidi_class_byname ("Nonspacing Mark") == UC_BIDI_NSM);
+ ASSERT (uc_bidi_class_byname ("Nonspacing_Mark") == UC_BIDI_NSM);
+ ASSERT (uc_bidi_class_byname ("NonspacingMark") == UC_BIDI_NSM);
+ ASSERT (uc_bidi_class_byname ("OTHER NEUTRAL") == UC_BIDI_ON);
+ ASSERT (uc_bidi_class_byname ("Other Neutral") == UC_BIDI_ON);
+ ASSERT (uc_bidi_class_byname ("Other_Neutral") == UC_BIDI_ON);
+ ASSERT (uc_bidi_class_byname ("OtherNeutral") == UC_BIDI_ON);
+ ASSERT (uc_bidi_class_byname ("POP DIRECTIONAL FORMAT") == UC_BIDI_PDF);
+ ASSERT (uc_bidi_class_byname ("Pop Directional Format") == UC_BIDI_PDF);
+ ASSERT (uc_bidi_class_byname ("Pop_Directional_Format") == UC_BIDI_PDF);
+ ASSERT (uc_bidi_class_byname ("PopDirectionalFormat") == UC_BIDI_PDF);
+ ASSERT (uc_bidi_class_byname ("RIGHT TO LEFT") == UC_BIDI_R);
+ ASSERT (uc_bidi_class_byname ("Right To Left") == UC_BIDI_R);
+ ASSERT (uc_bidi_class_byname ("Right_To_Left") == UC_BIDI_R);
+ ASSERT (uc_bidi_class_byname ("RightToLeft") == UC_BIDI_R);
+ ASSERT (uc_bidi_class_byname ("RIGHT TO LEFT EMBEDDING") == UC_BIDI_RLE);
+ ASSERT (uc_bidi_class_byname ("Right To Left Embedding") == UC_BIDI_RLE);
+ ASSERT (uc_bidi_class_byname ("Right_To_Left_Embedding") == UC_BIDI_RLE);
+ ASSERT (uc_bidi_class_byname ("RightToLeftEmbedding") == UC_BIDI_RLE);
+ ASSERT (uc_bidi_class_byname ("RIGHT TO LEFT OVERRIDE") == UC_BIDI_RLO);
+ ASSERT (uc_bidi_class_byname ("Right To Left Override") == UC_BIDI_RLO);
+ ASSERT (uc_bidi_class_byname ("Right_To_Left_Override") == UC_BIDI_RLO);
+ ASSERT (uc_bidi_class_byname ("RightToLeftOverride") == UC_BIDI_RLO);
+ ASSERT (uc_bidi_class_byname ("SEGMENT SEPARATOR") == UC_BIDI_S);
+ ASSERT (uc_bidi_class_byname ("Segment Separator") == UC_BIDI_S);
+ ASSERT (uc_bidi_class_byname ("Segment_Separator") == UC_BIDI_S);
+ ASSERT (uc_bidi_class_byname ("SegmentSeparator") == UC_BIDI_S);
+ ASSERT (uc_bidi_class_byname ("WHITE SPACE") == UC_BIDI_WS);
+ ASSERT (uc_bidi_class_byname ("White Space") == UC_BIDI_WS);
+ ASSERT (uc_bidi_class_byname ("White_Space") == UC_BIDI_WS);
+ ASSERT (uc_bidi_class_byname ("WhiteSpace") == UC_BIDI_WS);
+
ASSERT (uc_bidi_class_byname ("X") < 0);
ASSERT (uc_bidi_class_byname ("") < 0);