X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flibpspp%2Fi18n.c;h=0819299d37abaae54e4f666f046e798818c53a7a;hb=refs%2Fbuilds%2F20120518030503%2Fpspp;hp=a3dc08a1ff26982171e8b9cf9c15543e74a2d506;hpb=d4f47df26ff4087c7a75567a9843720d1aede5b0;p=pspp diff --git a/src/libpspp/i18n.c b/src/libpspp/i18n.c index a3dc08a1ff..0819299d37 100644 --- a/src/libpspp/i18n.c +++ b/src/libpspp/i18n.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2006, 2009, 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 2006, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -30,6 +30,7 @@ #include #include "libpspp/assertion.h" +#include "libpspp/compiler.h" #include "libpspp/hmapx.h" #include "libpspp/hash-functions.h" #include "libpspp/pool.h" @@ -42,6 +43,9 @@ #include "gl/relocatable.h" #include "gl/xstrndup.h" +#include "gettext.h" +#define _(msgid) gettext (msgid) + struct converter { char *tocode; @@ -542,6 +546,7 @@ void i18n_init (void) { setlocale (LC_CTYPE, ""); + setlocale (LC_COLLATE, ""); setlocale (LC_MESSAGES, ""); #if HAVE_LC_PAPER setlocale (LC_PAPER, ""); @@ -694,34 +699,52 @@ get_encoding_info (struct encoding_info *e, const char *name) "ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`" "abcdefghijklmnopqrstuvwxyz{|}~"); - struct substring out, cr, lf; + struct substring out, cr, lf, space; bool ok; memset (e, 0, sizeof *e); cr = recode_substring_pool (name, "UTF-8", ss_cstr ("\r"), NULL); lf = recode_substring_pool (name, "UTF-8", ss_cstr ("\n"), NULL); - ok = cr.length >= 1 && cr.length <= MAX_UNIT && cr.length == lf.length; + space = recode_substring_pool (name, "UTF-8", ss_cstr (" "), NULL); + ok = (cr.length >= 1 + && cr.length <= MAX_UNIT + && cr.length == lf.length + && cr.length == space.length); if (!ok) { fprintf (stderr, "warning: encoding `%s' is not supported.\n", name); ss_dealloc (&cr); ss_dealloc (&lf); + ss_dealloc (&space); ss_alloc_substring (&cr, ss_cstr ("\r")); ss_alloc_substring (&lf, ss_cstr ("\n")); + ss_alloc_substring (&space, ss_cstr (" ")); } e->unit = cr.length; memcpy (e->cr, cr.string, e->unit); memcpy (e->lf, lf.string, e->unit); + memcpy (e->space, space.string, e->unit); ss_dealloc (&cr); ss_dealloc (&lf); + ss_dealloc (&space); out = recode_substring_pool ("UTF-8", name, in, NULL); e->is_ascii_compatible = ss_equals (in, out); ss_dealloc (&out); + if (!e->is_ascii_compatible && e->unit == 1) + { + out = recode_substring_pool ("UTF-8", name, ss_cstr ("A"), NULL); + e->is_ebcdic_compatible = (out.length == 1 + && (uint8_t) out.string[0] == 0xc1); + ss_dealloc (&out); + } + else + e->is_ebcdic_compatible = false; + return ok; } @@ -734,6 +757,15 @@ is_encoding_ascii_compatible (const char *encoding) return e.is_ascii_compatible; } +bool +is_encoding_ebcdic_compatible (const char *encoding) +{ + struct encoding_info e; + + get_encoding_info (&e, encoding); + return e.is_ebcdic_compatible; +} + /* Returns true if iconv can convert ENCODING to and from UTF-8, otherwise false. */ bool @@ -742,3 +774,131 @@ is_encoding_supported (const char *encoding) return (create_iconv__ ("UTF-8", encoding)->conv != (iconv_t) -1 && create_iconv__ (encoding, "UTF-8")->conv != (iconv_t) -1); } + +/* Returns true if E is the name of a UTF-8 encoding. + + XXX Possibly we should test not E as a string but its properties via + iconv. */ +bool +is_encoding_utf8 (const char *e) +{ + return ((e[0] == 'u' || e[0] == 'U') + && (e[1] == 't' || e[1] == 'T') + && (e[2] == 'f' || e[2] == 'F') + && ((e[3] == '8' && e[4] == '\0') + || (e[3] == '-' && e[4] == '8' && e[5] == '\0'))); +} + +static struct encoding_category *categories; +static int n_categories; + +static void SENTINEL (0) +add_category (size_t *allocated_categories, const char *category, ...) +{ + struct encoding_category *c; + const char *encodings[16]; + va_list args; + int i, n; + + /* Count encoding arguments. */ + va_start (args, category); + n = 0; + while ((encodings[n] = va_arg (args, const char *)) != NULL) + { + const char *encoding = encodings[n]; + if (!strcmp (encoding, "Auto") || is_encoding_supported (encoding)) + n++; + } + assert (n < sizeof encodings / sizeof *encodings); + va_end (args); + + if (n == 0) + return; + + if (n_categories >= *allocated_categories) + categories = x2nrealloc (categories, + allocated_categories, sizeof *categories); + + c = &categories[n_categories++]; + c->category = category; + c->encodings = xmalloc (n * sizeof *c->encodings); + for (i = 0; i < n; i++) + c->encodings[i] = encodings[i]; + c->n_encodings = n; +} + +static void +init_encoding_categories (void) +{ + static bool inited; + size_t alloc; + + if (inited) + return; + inited = true; + + alloc = 0; + add_category (&alloc, "Unicode", "UTF-8", "UTF-16", "UTF-16BE", "UTF-16LE", + "UTF-32", "UTF-32BE", "UTF-32LE", NULL_SENTINEL); + add_category (&alloc, _("Arabic"), "IBM864", "ISO-8859-6", "Windows-1256", + NULL_SENTINEL); + add_category (&alloc, _("Armenian"), "ARMSCII-8", NULL_SENTINEL); + add_category (&alloc, _("Baltic"), "ISO-8859-13", "ISO-8859-4", + "Windows-1257", NULL_SENTINEL); + add_category (&alloc, _("Celtic"), "ISO-8859-14", NULL_SENTINEL); + add_category (&alloc, _("Central European"), "IBM852", "ISO-8859-2", + "Mac-CentralEurope", "Windows-1250", NULL_SENTINEL); + add_category (&alloc, _("Chinese Simplified"), "GB18030", "GB2312", "GBK", + "HZ-GB-2312", "ISO-2022-CN", NULL_SENTINEL); + add_category (&alloc, _("Chinese Traditional"), "Big5", "Big5-HKSCS", + "EUC-TW", NULL_SENTINEL); + add_category (&alloc, _("Croatian"), "MacCroatian", NULL_SENTINEL); + add_category (&alloc, _("Cyrillic"), "IBM855", "ISO-8859-5", "ISO-IR-111", + "KOI8-R", "MacCyrillic", NULL_SENTINEL); + add_category (&alloc, _("Cyrillic/Russian"), "IBM866", NULL_SENTINEL); + add_category (&alloc, _("Cyrillic/Ukrainian"), "KOI8-U", "MacUkrainian", + NULL_SENTINEL); + add_category (&alloc, _("Georgian"), "GEOSTD8", NULL_SENTINEL); + add_category (&alloc, _("Greek"), "ISO-8859-7", "MacGreek", NULL_SENTINEL); + add_category (&alloc, _("Gujarati"), "MacGujarati", NULL_SENTINEL); + add_category (&alloc, _("Gurmukhi"), "MacGurmukhi", NULL_SENTINEL); + add_category (&alloc, _("Hebrew"), "IBM862", "ISO-8859-8-I", "Windows-1255", + NULL_SENTINEL); + add_category (&alloc, _("Hebrew Visual"), "ISO-8859-8", NULL_SENTINEL); + add_category (&alloc, _("Hindi"), "MacDevangari", NULL_SENTINEL); + add_category (&alloc, _("Icelandic"), "MacIcelandic", NULL_SENTINEL); + add_category (&alloc, _("Japanese"), "EUC-JP", "ISO-2022-JP", "Shift_JIS", + NULL_SENTINEL); + add_category (&alloc, _("Korean"), "EUC-KR", "ISO-2022-KR", "JOHAB", "UHC", + NULL_SENTINEL); + add_category (&alloc, _("Nordic"), "ISO-8859-10", NULL_SENTINEL); + add_category (&alloc, _("Romanian"), "ISO-8859-16", "MacRomanian", + NULL_SENTINEL); + add_category (&alloc, _("South European"), "ISO-8859-3", NULL_SENTINEL); + add_category (&alloc, _("Thai"), "ISO-8859-11", "TIS-620", "Windows-874", + NULL_SENTINEL); + add_category (&alloc, _("Turkish"), "IBM857", "ISO-8859-9", "Windows-1254", + NULL_SENTINEL); + add_category (&alloc, _("Vietnamese"), "TVCN", "VISCII", "VPS", + "Windows-1258", NULL_SENTINEL); + add_category (&alloc, _("Western European"), "ISO-8859-1", "ISO-8859-15", + "Windows-1252", "IBM850", "MacRoman", NULL_SENTINEL); +} + +/* Returns an array of "struct encoding_category" that contains only the + categories and encodings that the system supports. */ +struct encoding_category * +get_encoding_categories (void) +{ + init_encoding_categories (); + return categories; +} + +/* Returns the number of elements in the array returned by + get_encoding_categories(). */ +size_t +get_n_encoding_categories (void) +{ + init_encoding_categories (); + return n_categories; +}