X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flibpspp%2Fi18n.c;h=fed6113503328e7e2c232947d45dc6d67081f92a;hb=6c8b13da57b074620495a0543bd7944bca574a42;hp=6bdff2a4785734985c2e50e6f5e76b3902c57110;hpb=7841b7dc7a33947552866d1cb10916d0eecdeeb6;p=pspp diff --git a/src/libpspp/i18n.c b/src/libpspp/i18n.c index 6bdff2a478..fed6113503 100644 --- a/src/libpspp/i18n.c +++ b/src/libpspp/i18n.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2006, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Free Software Foundation, Inc. + Copyright (C) 2006, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -37,8 +37,10 @@ #include "libpspp/str.h" #include "libpspp/version.h" +#include "gl/c-ctype.h" #include "gl/c-strcase.h" #include "gl/localcharset.h" +#include #include "gl/minmax.h" #include "gl/xalloc.h" #include "gl/relocatable.h" @@ -82,7 +84,7 @@ create_iconv (const char* tocode, const char* fromcode) converter->tocode = xstrdup (tocode); converter->fromcode = xstrdup (fromcode); converter->conv = iconv_open (tocode, fromcode); - int error = converter->conv == (iconv_t) -1 ? errno : 0; + int error = converter->conv == (iconv_t) ~0 ? errno : 0; /* I don't think it's safe to translate this string or to use messaging as the converters have not yet been set up */ if (error && strcmp (tocode, fromcode)) @@ -92,6 +94,10 @@ create_iconv (const char* tocode, const char* fromcode) "cannot create a converter for `%s' to `%s': %s\n", fromcode, tocode, strerror (error)); + free (converter->tocode); + free (converter->fromcode); + free (converter); + hmapx_insert (&map, NULL, hash); return NULL; } @@ -101,21 +107,18 @@ create_iconv (const char* tocode, const char* fromcode) iconv_t bconv = iconv_open (tocode, "ASCII"); if (bconv != (iconv_t) -1) { - ICONV_CONST char *nullstr = strdup (""); - ICONV_CONST char *outbuf = strdup ("XXXXXXXX"); - ICONV_CONST char *snullstr = nullstr; - ICONV_CONST char *soutbuf = outbuf; - - size_t inbytes = 1; - const size_t bytes = 8; - size_t outbytes = bytes; - if (-1 != iconv (bconv, &nullstr, &inbytes, &outbuf, &outbytes)) - converter->null_char_width = bytes - outbytes; - free (snullstr); - free (soutbuf); + ICONV_CONST char inbuf[1] = ""; + ICONV_CONST char *inptr = inbuf; + size_t inbytes = sizeof inbuf; + + char outbuf[8]; + char *outptr = outbuf; + size_t outbytes = sizeof outbuf; + if (-1 != iconv (bconv, &inptr, &inbytes, &outptr, &outbytes)) + converter->null_char_width = outptr - outbuf; iconv_close (bconv); } - + hmapx_insert (&map, converter, hash); return converter; @@ -237,7 +240,7 @@ try_recode (struct converter *cvtr, char fallbackchar, for (i = 0 ; i < null_bytes ; ++i) *out++ = '\0'; - + return out - 1 - out_; } @@ -259,10 +262,10 @@ recode_string_pool (const char *to, const char *from, { struct substring out; - if ( text == NULL ) + if (text == NULL) return NULL; - if ( length == -1 ) + if (length == -1) length = strlen (text); out = recode_substring_pool (to, from, ss_buffer (text, length), pool); @@ -558,7 +561,7 @@ recode_substring_pool__ (const char *to, const char *from, conv = create_iconv (to, from); - if ( NULL == conv ) + if (NULL == conv) { if (fallbackchar) { @@ -660,9 +663,24 @@ set_default_encoding (const char *enc) default_encoding = xstrdup (enc); } +/* Return the ISO two letter code for the current LC_MESSAGES + locale category. */ +char * +get_language (void) +{ + const char *localename = gl_locale_name (LC_MESSAGES, "LC_MESSAGES"); + if (0 == strcmp (localename, "C")) + return NULL; + char *ln = xstrdup (localename); + char *end = strchr (ln, '_'); + if (end) + *end = '\0'; + return ln; +} + /* Attempts to set the encoding from a locale name - returns true if successfull. + returns true if successful. This function does not (should not!) alter the current locale. */ bool @@ -680,7 +698,7 @@ set_encoding_from_locale (const char *loc) loc_encoding = xstrdup (locale_charset ()); - if ( 0 == strcmp (loc_encoding, c_encoding)) + if (0 == strcmp (loc_encoding, c_encoding)) { ok = false; } @@ -732,7 +750,7 @@ valid_encoding (const char *enc) { iconv_t conv = iconv_open (UTF8, enc); - if ( conv == (iconv_t) -1) + if (conv == (iconv_t) -1) return false; iconv_close (conv); @@ -742,7 +760,7 @@ valid_encoding (const char *enc) /* Return the system local's idea of the - decimal seperator character */ + decimal separator character */ char get_system_decimal (void) { @@ -841,6 +859,80 @@ utf8_strncasecmp (const char *a, size_t an, const char *b, size_t bn) return result; } +static bool +is_all_digits (const uint8_t *s, size_t len) +{ + for (size_t i = 0; i < len; i++) + if (!c_isdigit (s[i])) + return false; + return true; +} + +/* Compares UTF-8 strings A and B case-insensitively. If the strings end in a + number, then they are compared numerically. Returns a negative value if A < + B, zero if A == B, positive if A > B. */ +int +utf8_strverscasecmp (const char *a, const char *b) +{ + /* Normalize A. */ + uint8_t a_stub[64]; + size_t a_len = sizeof a_stub; + uint8_t *a_norm = u8_casefold (CHAR_CAST (uint8_t *, a), strlen (a), NULL, + UNINORM_NFKD, a_stub, &a_len); + + /* Normalize B. */ + uint8_t b_stub[64]; + size_t b_len = sizeof b_stub; + uint8_t *b_norm = u8_casefold (CHAR_CAST (uint8_t *, b), strlen (b), NULL, + UNINORM_NFKD, b_stub, &b_len); + + int result; + if (!a_norm || !b_norm) + { + result = strcmp (a, b); + goto exit; + } + + size_t len = MIN (a_len, b_len); + for (size_t i = 0; i < len; i++) + if (a_norm[i] != b_norm[i]) + { + /* If both strings end in digits, compare them numerically. */ + if (is_all_digits (&a_norm[i], a_len - i) + && is_all_digits (&b_norm[i], b_len - i)) + { + /* Start by stripping leading zeros, since those don't matter for + numerical comparison. */ + size_t ap, bp; + for (ap = i; ap < a_len; ap++) + if (a_norm[ap] != '0') + break; + for (bp = i; bp < b_len; bp++) + if (b_norm[bp] != '0') + break; + + /* The number with more digits, if there is one, is larger. */ + size_t a_digits = a_len - ap; + size_t b_digits = b_len - bp; + if (a_digits != b_digits) + result = a_digits > b_digits ? 1 : -1; + else + result = memcmp (&a_norm[ap], &b_norm[bp], a_digits); + } + else + result = a_norm[i] > b_norm[i] ? 1 : -1; + goto exit; + } + result = a_len < b_len ? -1 : a_len > b_len; + +exit: + if (a_norm != a_stub) + free (a_norm); + if (b_norm != b_stub) + free (b_norm); + return result; +} + static char * utf8_casemap (const char *s, uint8_t *(*f) (const uint8_t *, size_t, const char *, uninorm_t, @@ -873,6 +965,12 @@ utf8_to_lower (const char *s) { return utf8_casemap (s, u8_tolower); } + +char * +utf8_to_title (const char *s) +{ + return utf8_casemap (s, u8_totitle); +} bool get_encoding_info (struct encoding_info *e, const char *name)