/* PSPP - a program for statistical analysis.
- Copyright (C) 2006, 2009, 2010, 2011 Free Software Foundation, Inc.
+ Copyright (C) 2006, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
char *tocode;
char *fromcode;
iconv_t conv;
+ int error;
};
static char *default_encoding;
static struct hmapx map;
/* A wrapper around iconv_open */
-static iconv_t
-create_iconv (const char* tocode, const char* fromcode)
+static struct converter *
+create_iconv__ (const char* tocode, const char* fromcode)
{
size_t hash;
struct hmapx_node *node;
HMAPX_FOR_EACH_WITH_HASH (converter, node, hash, &map)
if (!strcmp (tocode, converter->tocode)
&& !strcmp (fromcode, converter->fromcode))
- return converter->conv;
+ return converter;
converter = xmalloc (sizeof *converter);
converter->tocode = xstrdup (tocode);
converter->fromcode = xstrdup (fromcode);
converter->conv = iconv_open (tocode, fromcode);
+ converter->error = converter->conv == (iconv_t) -1 ? errno : 0;
hmapx_insert (&map, converter, hash);
+ return converter;
+}
+
+static iconv_t
+create_iconv (const char* tocode, const char* fromcode)
+{
+ struct converter *converter;
+
+ converter = create_iconv__ (tocode, fromcode);
+
/* I don't think it's safe to translate this string or to use messaging
as the converters have not yet been set up */
- if ( (iconv_t) -1 == converter->conv && 0 != strcmp (tocode, fromcode))
+ if (converter->error && strcmp (tocode, fromcode))
{
- const int err = errno;
fprintf (stderr,
"Warning: "
"cannot create a converter for `%s' to `%s': %s\n",
- fromcode, tocode, strerror (err));
+ fromcode, tocode, strerror (converter->error));
+ converter->error = 0;
}
return converter->conv;
else
{
size_t copy_len;
- size_t prev;
+ ucs4_t prev;
size_t ofs;
int mblen;
{
bool correct_result = false;
size_t copy_len;
- size_t prev;
+ ucs4_t prev;
size_t ofs;
int mblen;
{
free (cvtr->tocode);
free (cvtr->fromcode);
- iconv_close (cvtr->conv);
+ if (cvtr->conv != (iconv_t) -1)
+ iconv_close (cvtr->conv);
free (cvtr);
}
"ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`"
"abcdefghijklmnopqrstuvwxyz{|}~");
- struct substring out, cr, lf;
+ struct substring out, cr, lf, space;
bool ok;
memset (e, 0, sizeof *e);
cr = recode_substring_pool (name, "UTF-8", ss_cstr ("\r"), NULL);
lf = recode_substring_pool (name, "UTF-8", ss_cstr ("\n"), NULL);
- ok = cr.length >= 1 && cr.length <= MAX_UNIT && cr.length == lf.length;
+ space = recode_substring_pool (name, "UTF-8", ss_cstr (" "), NULL);
+ ok = (cr.length >= 1
+ && cr.length <= MAX_UNIT
+ && cr.length == lf.length
+ && cr.length == space.length);
if (!ok)
{
fprintf (stderr, "warning: encoding `%s' is not supported.\n", name);
ss_dealloc (&cr);
ss_dealloc (&lf);
+ ss_dealloc (&space);
ss_alloc_substring (&cr, ss_cstr ("\r"));
ss_alloc_substring (&lf, ss_cstr ("\n"));
+ ss_alloc_substring (&space, ss_cstr (" "));
}
e->unit = cr.length;
memcpy (e->cr, cr.string, e->unit);
memcpy (e->lf, lf.string, e->unit);
+ memcpy (e->space, space.string, e->unit);
ss_dealloc (&cr);
ss_dealloc (&lf);
+ ss_dealloc (&space);
out = recode_substring_pool ("UTF-8", name, in, NULL);
e->is_ascii_compatible = ss_equals (in, out);
ss_dealloc (&out);
+ if (!e->is_ascii_compatible && e->unit == 1)
+ {
+ out = recode_substring_pool ("UTF-8", name, ss_cstr ("A"), NULL);
+ e->is_ebcdic_compatible = (out.length == 1
+ && (uint8_t) out.string[0] == 0xc1);
+ ss_dealloc (&out);
+ }
+ else
+ e->is_ebcdic_compatible = false;
+
return ok;
}
get_encoding_info (&e, encoding);
return e.is_ascii_compatible;
}
+
+bool
+is_encoding_ebcdic_compatible (const char *encoding)
+{
+ struct encoding_info e;
+
+ get_encoding_info (&e, encoding);
+ return e.is_ebcdic_compatible;
+}
+
+/* Returns true if iconv can convert ENCODING to and from UTF-8,
+ otherwise false. */
+bool
+is_encoding_supported (const char *encoding)
+{
+ return (create_iconv__ ("UTF-8", encoding)->conv != (iconv_t) -1
+ && create_iconv__ (encoding, "UTF-8")->conv != (iconv_t) -1);
+}
+
+/* Returns true if E is the name of a UTF-8 encoding.
+
+ XXX Possibly we should test not E as a string but its properties via
+ iconv. */
+bool
+is_encoding_utf8 (const char *e)
+{
+ return ((e[0] == 'u' || e[0] == 'U')
+ && (e[1] == 't' || e[1] == 'T')
+ && (e[2] == 'f' || e[2] == 'F')
+ && ((e[3] == '8' && e[4] == '\0')
+ || (e[3] == '-' && e[4] == '8' && e[5] == '\0')));
+}