X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flibpspp%2Fi18n.c;h=fed6113503328e7e2c232947d45dc6d67081f92a;hb=6c8b13da57b074620495a0543bd7944bca574a42;hp=6bdff2a4785734985c2e50e6f5e76b3902c57110;hpb=7841b7dc7a33947552866d1cb10916d0eecdeeb6;p=pspp

diff --git a/src/libpspp/i18n.c b/src/libpspp/i18n.c
index 6bdff2a478..fed6113503 100644
--- a/src/libpspp/i18n.c
+++ b/src/libpspp/i18n.c
@@ -1,5 +1,5 @@
 /* PSPP - a program for statistical analysis.
-   Copyright (C) 2006, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Free Software Foundation, Inc.
+   Copyright (C) 2006, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Free Software Foundation, Inc.
 
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -37,8 +37,10 @@
 #include "libpspp/str.h"
 #include "libpspp/version.h"
 
+#include "gl/c-ctype.h"
 #include "gl/c-strcase.h"
 #include "gl/localcharset.h"
+#include <gl/localename.h>
 #include "gl/minmax.h"
 #include "gl/xalloc.h"
 #include "gl/relocatable.h"
@@ -82,7 +84,7 @@ create_iconv (const char* tocode, const char* fromcode)
   converter->tocode = xstrdup (tocode);
   converter->fromcode = xstrdup (fromcode);
   converter->conv = iconv_open (tocode, fromcode);
-  int error = converter->conv == (iconv_t) -1 ? errno : 0;
+  int error = converter->conv == (iconv_t) ~0 ? errno : 0;
   /* I don't think it's safe to translate this string or to use messaging
      as the converters have not yet been set up */
   if (error && strcmp (tocode, fromcode))
@@ -92,6 +94,10 @@ create_iconv (const char* tocode, const char* fromcode)
                "cannot create a converter for `%s' to `%s': %s\n",
                fromcode, tocode, strerror (error));
 
+      free (converter->tocode);
+      free (converter->fromcode);
+      free (converter);
+
       hmapx_insert (&map, NULL, hash);
       return NULL;
     }
@@ -101,21 +107,18 @@ create_iconv (const char* tocode, const char* fromcode)
   iconv_t bconv = iconv_open (tocode, "ASCII");
   if (bconv != (iconv_t) -1)
     {
-      ICONV_CONST  char *nullstr = strdup ("");
-      ICONV_CONST  char *outbuf = strdup ("XXXXXXXX");
-      ICONV_CONST  char *snullstr = nullstr;
-      ICONV_CONST  char *soutbuf = outbuf;
-
-      size_t inbytes = 1;
-      const size_t bytes = 8;
-      size_t outbytes = bytes;
-      if (-1 != iconv (bconv, &nullstr, &inbytes, &outbuf, &outbytes))
-	converter->null_char_width = bytes - outbytes;
-      free (snullstr);
-      free (soutbuf);
+      ICONV_CONST char inbuf[1] = "";
+      ICONV_CONST char *inptr = inbuf;
+      size_t inbytes = sizeof inbuf;
+
+      char outbuf[8];
+      char *outptr = outbuf;
+      size_t outbytes = sizeof outbuf;
+      if (-1 != iconv (bconv, &inptr, &inbytes, &outptr, &outbytes))
+	converter->null_char_width = outptr - outbuf;
       iconv_close (bconv);
     }
-  
+
   hmapx_insert (&map, converter, hash);
 
   return converter;
@@ -237,7 +240,7 @@ try_recode (struct converter *cvtr, char fallbackchar,
 
   for (i = 0 ; i < null_bytes ; ++i)
     *out++ = '\0';
-  
+
   return out - 1 - out_;
 }
 
@@ -259,10 +262,10 @@ recode_string_pool (const char *to, const char *from,
 {
   struct substring out;
 
-  if ( text == NULL )
+  if (text == NULL)
     return NULL;
 
-  if ( length == -1 )
+  if (length == -1)
     length = strlen (text);
 
   out = recode_substring_pool (to, from, ss_buffer (text, length), pool);
@@ -558,7 +561,7 @@ recode_substring_pool__ (const char *to, const char *from,
 
   conv = create_iconv (to, from);
 
-  if ( NULL == conv )
+  if (NULL == conv)
     {
       if (fallbackchar)
         {
@@ -660,9 +663,24 @@ set_default_encoding (const char *enc)
   default_encoding = xstrdup (enc);
 }
 
+/* Return the ISO two letter code for the current LC_MESSAGES
+   locale category.  */
+char *
+get_language (void)
+{
+  const char *localename = gl_locale_name (LC_MESSAGES, "LC_MESSAGES");
+  if (0 == strcmp (localename, "C"))
+    return NULL;
+  char *ln = xstrdup (localename);
+  char *end = strchr (ln, '_');
+  if (end)
+    *end = '\0';
+  return ln;
+}
+
 
 /* Attempts to set the encoding from a locale name
-   returns true if successfull.
+   returns true if successful.
    This function does not (should not!) alter the current locale.
 */
 bool
@@ -680,7 +698,7 @@ set_encoding_from_locale (const char *loc)
   loc_encoding = xstrdup (locale_charset ());
 
 
-  if ( 0 == strcmp (loc_encoding, c_encoding))
+  if (0 == strcmp (loc_encoding, c_encoding))
     {
       ok = false;
     }
@@ -732,7 +750,7 @@ valid_encoding (const char *enc)
 {
   iconv_t conv = iconv_open (UTF8, enc);
 
-  if ( conv == (iconv_t) -1)
+  if (conv == (iconv_t) -1)
     return false;
 
   iconv_close (conv);
@@ -742,7 +760,7 @@ valid_encoding (const char *enc)
 
 
 /* Return the system local's idea of the
-   decimal seperator character */
+   decimal separator character */
 char
 get_system_decimal (void)
 {
@@ -841,6 +859,80 @@ utf8_strncasecmp (const char *a, size_t an, const char *b, size_t bn)
   return result;
 }
 
+static bool
+is_all_digits (const uint8_t *s, size_t len)
+{
+  for (size_t i = 0; i < len; i++)
+    if (!c_isdigit (s[i]))
+      return false;
+  return true;
+}
+
+/* Compares UTF-8 strings A and B case-insensitively.  If the strings end in a
+   number, then they are compared numerically.  Returns a negative value if A <
+   B, zero if A == B, positive if A > B. */
+int
+utf8_strverscasecmp (const char *a, const char *b)
+{
+  /* Normalize A. */
+  uint8_t a_stub[64];
+  size_t a_len = sizeof a_stub;
+  uint8_t *a_norm = u8_casefold (CHAR_CAST (uint8_t *, a), strlen (a), NULL,
+                                 UNINORM_NFKD, a_stub, &a_len);
+
+  /* Normalize B. */
+  uint8_t b_stub[64];
+  size_t b_len = sizeof b_stub;
+  uint8_t *b_norm = u8_casefold (CHAR_CAST (uint8_t *, b), strlen (b), NULL,
+                                 UNINORM_NFKD, b_stub, &b_len);
+
+  int result;
+  if (!a_norm || !b_norm)
+    {
+      result = strcmp (a, b);
+      goto exit;
+    }
+
+  size_t len = MIN (a_len, b_len);
+  for (size_t i = 0; i < len; i++)
+    if (a_norm[i] != b_norm[i])
+      {
+        /* If both strings end in digits, compare them numerically. */
+        if (is_all_digits (&a_norm[i], a_len - i)
+            && is_all_digits (&b_norm[i], b_len - i))
+          {
+            /* Start by stripping leading zeros, since those don't matter for
+               numerical comparison. */
+            size_t ap, bp;
+            for (ap = i; ap < a_len; ap++)
+              if (a_norm[ap] != '0')
+                break;
+            for (bp = i; bp < b_len; bp++)
+              if (b_norm[bp] != '0')
+                break;
+
+            /* The number with more digits, if there is one, is larger. */
+            size_t a_digits = a_len - ap;
+            size_t b_digits = b_len - bp;
+            if (a_digits != b_digits)
+              result = a_digits > b_digits ? 1 : -1;
+            else
+              result = memcmp (&a_norm[ap], &b_norm[bp], a_digits);
+          }
+        else
+          result = a_norm[i] > b_norm[i] ? 1 : -1;
+        goto exit;
+      }
+  result = a_len < b_len ? -1 : a_len > b_len;
+
+exit:
+  if (a_norm != a_stub)
+    free (a_norm);
+  if (b_norm != b_stub)
+    free (b_norm);
+  return result;
+}
+
 static char *
 utf8_casemap (const char *s,
               uint8_t *(*f) (const uint8_t *, size_t, const char *, uninorm_t,
@@ -873,6 +965,12 @@ utf8_to_lower (const char *s)
 {
   return utf8_casemap (s, u8_tolower);
 }
+
+char *
+utf8_to_title (const char *s)
+{
+  return utf8_casemap (s, u8_totitle);
+}
 
 bool
 get_encoding_info (struct encoding_info *e, const char *name)