X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flibpspp%2Fi18n.c;h=bc0db0b896426c094b305382141a6a3e69959cdb;hb=77ccca8a4264f354b6f3b4e859fd3b82bba2ce84;hp=6bdff2a4785734985c2e50e6f5e76b3902c57110;hpb=7841b7dc7a33947552866d1cb10916d0eecdeeb6;p=pspp diff --git a/src/libpspp/i18n.c b/src/libpspp/i18n.c index 6bdff2a478..bc0db0b896 100644 --- a/src/libpspp/i18n.c +++ b/src/libpspp/i18n.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2006, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Free Software Foundation, Inc. + Copyright (C) 2006, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -37,6 +37,7 @@ #include "libpspp/str.h" #include "libpspp/version.h" +#include "gl/c-ctype.h" #include "gl/c-strcase.h" #include "gl/localcharset.h" #include "gl/minmax.h" @@ -82,7 +83,7 @@ create_iconv (const char* tocode, const char* fromcode) converter->tocode = xstrdup (tocode); converter->fromcode = xstrdup (fromcode); converter->conv = iconv_open (tocode, fromcode); - int error = converter->conv == (iconv_t) -1 ? errno : 0; + int error = converter->conv == (iconv_t) ~0 ? errno : 0; /* I don't think it's safe to translate this string or to use messaging as the converters have not yet been set up */ if (error && strcmp (tocode, fromcode)) @@ -92,6 +93,10 @@ create_iconv (const char* tocode, const char* fromcode) "cannot create a converter for `%s' to `%s': %s\n", fromcode, tocode, strerror (error)); + free (converter->tocode); + free (converter->fromcode); + free (converter); + hmapx_insert (&map, NULL, hash); return NULL; } @@ -115,7 +120,7 @@ create_iconv (const char* tocode, const char* fromcode) free (soutbuf); iconv_close (bconv); } - + hmapx_insert (&map, converter, hash); return converter; @@ -237,7 +242,7 @@ try_recode (struct converter *cvtr, char fallbackchar, for (i = 0 ; i < null_bytes ; ++i) *out++ = '\0'; - + return out - 1 - out_; } @@ -662,7 +667,7 @@ set_default_encoding (const char *enc) /* Attempts to set the encoding from a locale name - returns true if successfull. + returns true if successful. This function does not (should not!) alter the current locale. */ bool @@ -742,7 +747,7 @@ valid_encoding (const char *enc) /* Return the system local's idea of the - decimal seperator character */ + decimal separator character */ char get_system_decimal (void) { @@ -841,6 +846,80 @@ utf8_strncasecmp (const char *a, size_t an, const char *b, size_t bn) return result; } +static bool +is_all_digits (const uint8_t *s, size_t len) +{ + for (size_t i = 0; i < len; i++) + if (!c_isdigit (s[i])) + return false; + return true; +} + +/* Compares UTF-8 strings A and B case-insensitively. If the strings end in a + number, then they are compared numerically. Returns a negative value if A < + B, zero if A == B, positive if A > B. */ +int +utf8_strverscasecmp (const char *a, const char *b) +{ + /* Normalize A. */ + uint8_t a_stub[64]; + size_t a_len = sizeof a_stub; + uint8_t *a_norm = u8_casefold (CHAR_CAST (uint8_t *, a), strlen (a), NULL, + UNINORM_NFKD, a_stub, &a_len); + + /* Normalize B. */ + uint8_t b_stub[64]; + size_t b_len = sizeof b_stub; + uint8_t *b_norm = u8_casefold (CHAR_CAST (uint8_t *, b), strlen (b), NULL, + UNINORM_NFKD, b_stub, &b_len); + + int result; + if (!a_norm || !b_norm) + { + result = strcmp (a, b); + goto exit; + } + + size_t len = MIN (a_len, b_len); + for (size_t i = 0; i < len; i++) + if (a_norm[i] != b_norm[i]) + { + /* If both strings end in digits, compare them numerically. */ + if (is_all_digits (&a_norm[i], a_len - i) + && is_all_digits (&b_norm[i], b_len - i)) + { + /* Start by stripping leading zeros, since those don't matter for + numerical comparison. */ + size_t ap, bp; + for (ap = i; ap < a_len; ap++) + if (a_norm[ap] != '0') + break; + for (bp = i; bp < b_len; bp++) + if (b_norm[bp] != '0') + break; + + /* The number with more digits, if there is one, is larger. */ + size_t a_digits = a_len - ap; + size_t b_digits = b_len - bp; + if (a_digits != b_digits) + result = a_digits > b_digits ? 1 : -1; + else + result = memcmp (&a_norm[ap], &b_norm[bp], a_digits); + } + else + result = a_norm[i] > b_norm[i] ? 1 : -1; + goto exit; + } + result = a_len < b_len ? -1 : a_len > b_len; + +exit: + if (a_norm != a_stub) + free (a_norm); + if (b_norm != b_stub) + free (b_norm); + return result; +} + static char * utf8_casemap (const char *s, uint8_t *(*f) (const uint8_t *, size_t, const char *, uninorm_t,