X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=src%2Flibpspp%2Fi18n.c;h=0819299d37abaae54e4f666f046e798818c53a7a;hb=8d1e072157ee3c8273e328c491be3c8bf57452da;hp=688a0c878d3299d1f9a9d729097b5024fff667ca;hpb=5ebdc1b90de2d205fa18e220581454bd0a687481;p=pspp diff --git a/src/libpspp/i18n.c b/src/libpspp/i18n.c index 688a0c878d..0819299d37 100644 --- a/src/libpspp/i18n.c +++ b/src/libpspp/i18n.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2006, 2009, 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 2006, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -30,6 +30,7 @@ #include #include "libpspp/assertion.h" +#include "libpspp/compiler.h" #include "libpspp/hmapx.h" #include "libpspp/hash-functions.h" #include "libpspp/pool.h" @@ -42,19 +43,23 @@ #include "gl/relocatable.h" #include "gl/xstrndup.h" +#include "gettext.h" +#define _(msgid) gettext (msgid) + struct converter { char *tocode; char *fromcode; iconv_t conv; + int error; }; static char *default_encoding; static struct hmapx map; /* A wrapper around iconv_open */ -static iconv_t -create_iconv (const char* tocode, const char* fromcode) +static struct converter * +create_iconv__ (const char* tocode, const char* fromcode) { size_t hash; struct hmapx_node *node; @@ -65,23 +70,34 @@ create_iconv (const char* tocode, const char* fromcode) HMAPX_FOR_EACH_WITH_HASH (converter, node, hash, &map) if (!strcmp (tocode, converter->tocode) && !strcmp (fromcode, converter->fromcode)) - return converter->conv; + return converter; converter = xmalloc (sizeof *converter); converter->tocode = xstrdup (tocode); converter->fromcode = xstrdup (fromcode); converter->conv = iconv_open (tocode, fromcode); + converter->error = converter->conv == (iconv_t) -1 ? errno : 0; hmapx_insert (&map, converter, hash); + return converter; +} + +static iconv_t +create_iconv (const char* tocode, const char* fromcode) +{ + struct converter *converter; + + converter = create_iconv__ (tocode, fromcode); + /* I don't think it's safe to translate this string or to use messaging as the converters have not yet been set up */ - if ( (iconv_t) -1 == converter->conv && 0 != strcmp (tocode, fromcode)) + if (converter->error && strcmp (tocode, fromcode)) { - const int err = errno; fprintf (stderr, "Warning: " "cannot create a converter for `%s' to `%s': %s\n", - fromcode, tocode, strerror (err)); + fromcode, tocode, strerror (converter->error)); + converter->error = 0; } return converter->conv; @@ -530,6 +546,7 @@ void i18n_init (void) { setlocale (LC_CTYPE, ""); + setlocale (LC_COLLATE, ""); setlocale (LC_MESSAGES, ""); #if HAVE_LC_PAPER setlocale (LC_PAPER, ""); @@ -682,34 +699,52 @@ get_encoding_info (struct encoding_info *e, const char *name) "ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`" "abcdefghijklmnopqrstuvwxyz{|}~"); - struct substring out, cr, lf; + struct substring out, cr, lf, space; bool ok; memset (e, 0, sizeof *e); cr = recode_substring_pool (name, "UTF-8", ss_cstr ("\r"), NULL); lf = recode_substring_pool (name, "UTF-8", ss_cstr ("\n"), NULL); - ok = cr.length >= 1 && cr.length <= MAX_UNIT && cr.length == lf.length; + space = recode_substring_pool (name, "UTF-8", ss_cstr (" "), NULL); + ok = (cr.length >= 1 + && cr.length <= MAX_UNIT + && cr.length == lf.length + && cr.length == space.length); if (!ok) { fprintf (stderr, "warning: encoding `%s' is not supported.\n", name); ss_dealloc (&cr); ss_dealloc (&lf); + ss_dealloc (&space); ss_alloc_substring (&cr, ss_cstr ("\r")); ss_alloc_substring (&lf, ss_cstr ("\n")); + ss_alloc_substring (&space, ss_cstr (" ")); } e->unit = cr.length; memcpy (e->cr, cr.string, e->unit); memcpy (e->lf, lf.string, e->unit); + memcpy (e->space, space.string, e->unit); ss_dealloc (&cr); ss_dealloc (&lf); + ss_dealloc (&space); out = recode_substring_pool ("UTF-8", name, in, NULL); e->is_ascii_compatible = ss_equals (in, out); ss_dealloc (&out); + if (!e->is_ascii_compatible && e->unit == 1) + { + out = recode_substring_pool ("UTF-8", name, ss_cstr ("A"), NULL); + e->is_ebcdic_compatible = (out.length == 1 + && (uint8_t) out.string[0] == 0xc1); + ss_dealloc (&out); + } + else + e->is_ebcdic_compatible = false; + return ok; } @@ -721,3 +756,149 @@ is_encoding_ascii_compatible (const char *encoding) get_encoding_info (&e, encoding); return e.is_ascii_compatible; } + +bool +is_encoding_ebcdic_compatible (const char *encoding) +{ + struct encoding_info e; + + get_encoding_info (&e, encoding); + return e.is_ebcdic_compatible; +} + +/* Returns true if iconv can convert ENCODING to and from UTF-8, + otherwise false. */ +bool +is_encoding_supported (const char *encoding) +{ + return (create_iconv__ ("UTF-8", encoding)->conv != (iconv_t) -1 + && create_iconv__ (encoding, "UTF-8")->conv != (iconv_t) -1); +} + +/* Returns true if E is the name of a UTF-8 encoding. + + XXX Possibly we should test not E as a string but its properties via + iconv. */ +bool +is_encoding_utf8 (const char *e) +{ + return ((e[0] == 'u' || e[0] == 'U') + && (e[1] == 't' || e[1] == 'T') + && (e[2] == 'f' || e[2] == 'F') + && ((e[3] == '8' && e[4] == '\0') + || (e[3] == '-' && e[4] == '8' && e[5] == '\0'))); +} + +static struct encoding_category *categories; +static int n_categories; + +static void SENTINEL (0) +add_category (size_t *allocated_categories, const char *category, ...) +{ + struct encoding_category *c; + const char *encodings[16]; + va_list args; + int i, n; + + /* Count encoding arguments. */ + va_start (args, category); + n = 0; + while ((encodings[n] = va_arg (args, const char *)) != NULL) + { + const char *encoding = encodings[n]; + if (!strcmp (encoding, "Auto") || is_encoding_supported (encoding)) + n++; + } + assert (n < sizeof encodings / sizeof *encodings); + va_end (args); + + if (n == 0) + return; + + if (n_categories >= *allocated_categories) + categories = x2nrealloc (categories, + allocated_categories, sizeof *categories); + + c = &categories[n_categories++]; + c->category = category; + c->encodings = xmalloc (n * sizeof *c->encodings); + for (i = 0; i < n; i++) + c->encodings[i] = encodings[i]; + c->n_encodings = n; +} + +static void +init_encoding_categories (void) +{ + static bool inited; + size_t alloc; + + if (inited) + return; + inited = true; + + alloc = 0; + add_category (&alloc, "Unicode", "UTF-8", "UTF-16", "UTF-16BE", "UTF-16LE", + "UTF-32", "UTF-32BE", "UTF-32LE", NULL_SENTINEL); + add_category (&alloc, _("Arabic"), "IBM864", "ISO-8859-6", "Windows-1256", + NULL_SENTINEL); + add_category (&alloc, _("Armenian"), "ARMSCII-8", NULL_SENTINEL); + add_category (&alloc, _("Baltic"), "ISO-8859-13", "ISO-8859-4", + "Windows-1257", NULL_SENTINEL); + add_category (&alloc, _("Celtic"), "ISO-8859-14", NULL_SENTINEL); + add_category (&alloc, _("Central European"), "IBM852", "ISO-8859-2", + "Mac-CentralEurope", "Windows-1250", NULL_SENTINEL); + add_category (&alloc, _("Chinese Simplified"), "GB18030", "GB2312", "GBK", + "HZ-GB-2312", "ISO-2022-CN", NULL_SENTINEL); + add_category (&alloc, _("Chinese Traditional"), "Big5", "Big5-HKSCS", + "EUC-TW", NULL_SENTINEL); + add_category (&alloc, _("Croatian"), "MacCroatian", NULL_SENTINEL); + add_category (&alloc, _("Cyrillic"), "IBM855", "ISO-8859-5", "ISO-IR-111", + "KOI8-R", "MacCyrillic", NULL_SENTINEL); + add_category (&alloc, _("Cyrillic/Russian"), "IBM866", NULL_SENTINEL); + add_category (&alloc, _("Cyrillic/Ukrainian"), "KOI8-U", "MacUkrainian", + NULL_SENTINEL); + add_category (&alloc, _("Georgian"), "GEOSTD8", NULL_SENTINEL); + add_category (&alloc, _("Greek"), "ISO-8859-7", "MacGreek", NULL_SENTINEL); + add_category (&alloc, _("Gujarati"), "MacGujarati", NULL_SENTINEL); + add_category (&alloc, _("Gurmukhi"), "MacGurmukhi", NULL_SENTINEL); + add_category (&alloc, _("Hebrew"), "IBM862", "ISO-8859-8-I", "Windows-1255", + NULL_SENTINEL); + add_category (&alloc, _("Hebrew Visual"), "ISO-8859-8", NULL_SENTINEL); + add_category (&alloc, _("Hindi"), "MacDevangari", NULL_SENTINEL); + add_category (&alloc, _("Icelandic"), "MacIcelandic", NULL_SENTINEL); + add_category (&alloc, _("Japanese"), "EUC-JP", "ISO-2022-JP", "Shift_JIS", + NULL_SENTINEL); + add_category (&alloc, _("Korean"), "EUC-KR", "ISO-2022-KR", "JOHAB", "UHC", + NULL_SENTINEL); + add_category (&alloc, _("Nordic"), "ISO-8859-10", NULL_SENTINEL); + add_category (&alloc, _("Romanian"), "ISO-8859-16", "MacRomanian", + NULL_SENTINEL); + add_category (&alloc, _("South European"), "ISO-8859-3", NULL_SENTINEL); + add_category (&alloc, _("Thai"), "ISO-8859-11", "TIS-620", "Windows-874", + NULL_SENTINEL); + add_category (&alloc, _("Turkish"), "IBM857", "ISO-8859-9", "Windows-1254", + NULL_SENTINEL); + add_category (&alloc, _("Vietnamese"), "TVCN", "VISCII", "VPS", + "Windows-1258", NULL_SENTINEL); + add_category (&alloc, _("Western European"), "ISO-8859-1", "ISO-8859-15", + "Windows-1252", "IBM850", "MacRoman", NULL_SENTINEL); +} + +/* Returns an array of "struct encoding_category" that contains only the + categories and encodings that the system supports. */ +struct encoding_category * +get_encoding_categories (void) +{ + init_encoding_categories (); + return categories; +} + +/* Returns the number of elements in the array returned by + get_encoding_categories(). */ +size_t +get_n_encoding_categories (void) +{ + init_encoding_categories (); + return n_categories; +}