X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flibpspp%2Fi18n.c;h=3faadcbb87c770395f5ce0c86fdef509df50c612;hb=06ac0c2d623e9ae54c3e22a2d6740219ab357b21;hp=bc0db0b896426c094b305382141a6a3e69959cdb;hpb=8180c5dd1591446174c0753ee960921786113403;p=pspp diff --git a/src/libpspp/i18n.c b/src/libpspp/i18n.c index bc0db0b896..3faadcbb87 100644 --- a/src/libpspp/i18n.c +++ b/src/libpspp/i18n.c @@ -1,5 +1,6 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2006, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Free Software Foundation, Inc. + Copyright (C) 2006, 2009, 2010, 2011, 2012, 2013, 2014, 2015, + 2016, 2021 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,11 +29,13 @@ #include #include #include +#include #include "libpspp/assertion.h" #include "libpspp/compiler.h" #include "libpspp/hmapx.h" #include "libpspp/hash-functions.h" +#include "libpspp/misc.h" #include "libpspp/pool.h" #include "libpspp/str.h" #include "libpspp/version.h" @@ -40,6 +43,7 @@ #include "gl/c-ctype.h" #include "gl/c-strcase.h" #include "gl/localcharset.h" +#include #include "gl/minmax.h" #include "gl/xalloc.h" #include "gl/relocatable.h" @@ -61,7 +65,7 @@ static struct hmapx map; /* A wrapper around iconv_open */ static struct converter * -create_iconv (const char* tocode, const char* fromcode) +create_iconv (const char* tocode, const char* fromcode, bool warn) { size_t hash; struct hmapx_node *node; @@ -88,10 +92,11 @@ create_iconv (const char* tocode, const char* fromcode) as the converters have not yet been set up */ if (error && strcmp (tocode, fromcode)) { - fprintf (stderr, - "Warning: " - "cannot create a converter for `%s' to `%s': %s\n", - fromcode, tocode, strerror (error)); + if (warn) + fprintf (stderr, + "Warning: " + "cannot create a converter for `%s' to `%s': %s\n", + fromcode, tocode, strerror (error)); free (converter->tocode); free (converter->fromcode); @@ -106,18 +111,15 @@ create_iconv (const char* tocode, const char* fromcode) iconv_t bconv = iconv_open (tocode, "ASCII"); if (bconv != (iconv_t) -1) { - ICONV_CONST char *nullstr = strdup (""); - ICONV_CONST char *outbuf = strdup ("XXXXXXXX"); - ICONV_CONST char *snullstr = nullstr; - ICONV_CONST char *soutbuf = outbuf; - - size_t inbytes = 1; - const size_t bytes = 8; - size_t outbytes = bytes; - if (-1 != iconv (bconv, &nullstr, &inbytes, &outbuf, &outbytes)) - converter->null_char_width = bytes - outbytes; - free (snullstr); - free (soutbuf); + ICONV_CONST char inbuf[1] = ""; + ICONV_CONST char *inptr = inbuf; + size_t inbytes = sizeof inbuf; + + char outbuf[8]; + char *outptr = outbuf; + size_t outbytes = sizeof outbuf; + if (-1 != iconv (bconv, &inptr, &inbytes, &outptr, &outbytes)) + converter->null_char_width = outptr - outbuf; iconv_close (bconv); } @@ -264,10 +266,10 @@ recode_string_pool (const char *to, const char *from, { struct substring out; - if ( text == NULL ) + if (text == NULL) return NULL; - if ( length == -1 ) + if (length == -1) length = strlen (text); out = recode_substring_pool (to, from, ss_buffer (text, length), pool); @@ -501,6 +503,55 @@ utf8_encoding_concat_len (const char *head, const char *tail, return prefix_len + tail_len; } +/* Returns the number of display columns that would be occupied by the LENGTH + bytes of UTF-8 starting at S. */ +size_t +utf8_count_columns (const char *s_, size_t length) +{ + const uint8_t *s = CHAR_CAST (const uint8_t *, s_); + + size_t columns = 0; + for (int ofs = 0; ofs < length; ) + { + ucs4_t uc; + ofs += u8_mbtouc (&uc, s + ofs, length - ofs); + if (uc != '\t') + { + int width = uc_width (uc, "UTF-8"); + if (width > 0) + columns += width; + } + else + columns = ROUND_UP (columns + 1, 8); + } + return columns; +} + +/* Returns the byte offset in LENGTH-byte UTF-8 string S that is N_COLUMNS + display columns into the string. */ +size_t +utf8_columns_to_bytes (const char *s_, size_t length, size_t n_columns) +{ + const uint8_t *s = CHAR_CAST (const uint8_t *, s_); + + size_t columns = 0; + int ofs; + for (ofs = 0; ofs < length && columns < n_columns; ) + { + ucs4_t uc; + ofs += u8_mbtouc (&uc, s + ofs, length - ofs); + if (uc != '\t') + { + int width = uc_width (uc, "UTF-8"); + if (width > 0) + columns += width; + } + else + columns = ROUND_UP (columns + 1, 8); + } + return ofs; +} + /* Returns an allocated, null-terminated string, owned by the caller, containing as many characters[*] from the beginning of S that would fit within MAX_LEN bytes if the returned string were to be re-encoded in @@ -561,9 +612,9 @@ recode_substring_pool__ (const char *to, const char *from, if (from == NULL) from = default_encoding; - conv = create_iconv (to, from); + conv = create_iconv (to, from, true); - if ( NULL == conv ) + if (NULL == conv) { if (fallbackchar) { @@ -643,7 +694,9 @@ void i18n_init (void) { setlocale (LC_ALL, ""); - bindtextdomain (PACKAGE, relocate(locale_dir)); + char *allocated; + bindtextdomain (PACKAGE, relocate2 (locale_dir, &allocated)); + free (allocated); textdomain (PACKAGE); assert (default_encoding == NULL); @@ -665,6 +718,21 @@ set_default_encoding (const char *enc) default_encoding = xstrdup (enc); } +/* Return the ISO two letter code for the current LC_MESSAGES + locale category. */ +char * +get_language (void) +{ + const char *localename = gl_locale_name (LC_MESSAGES, "LC_MESSAGES"); + if (0 == strcmp (localename, "C")) + return NULL; + char *ln = xstrdup (localename); + char *end = strchr (ln, '_'); + if (end) + *end = '\0'; + return ln; +} + /* Attempts to set the encoding from a locale name returns true if successful. @@ -685,7 +753,7 @@ set_encoding_from_locale (const char *loc) loc_encoding = xstrdup (locale_charset ()); - if ( 0 == strcmp (loc_encoding, c_encoding)) + if (0 == strcmp (loc_encoding, c_encoding)) { ok = false; } @@ -737,7 +805,7 @@ valid_encoding (const char *enc) { iconv_t conv = iconv_open (UTF8, enc); - if ( conv == (iconv_t) -1) + if (conv == (iconv_t) -1) return false; iconv_close (conv); @@ -812,7 +880,15 @@ utf8_hash_case_bytes (const char *s, size_t n, unsigned int basis) unsigned int utf8_hash_case_string (const char *s, unsigned int basis) { - return utf8_hash_case_bytes (s, strlen (s), basis); + return utf8_hash_case_substring (ss_cstr (s), basis); +} + +/* Returns a hash value for UTF-8 string S, with lowercase and uppercase + letters treated as equal, starting from BASIS. */ +unsigned int +utf8_hash_case_substring (struct substring s, unsigned int basis) +{ + return utf8_hash_case_bytes (s.string, s.length, basis); } /* Compares UTF-8 strings A and B case-insensitively. @@ -820,7 +896,13 @@ utf8_hash_case_string (const char *s, unsigned int basis) int utf8_strcasecmp (const char *a, const char *b) { - return utf8_strncasecmp (a, strlen (a), b, strlen (b)); + return utf8_sscasecmp (ss_cstr (a), ss_cstr (b)); +} + +int +utf8_sscasecmp (struct substring a, struct substring b) +{ + return utf8_strncasecmp (a.string, a.length, b.string, b.length); } /* Compares UTF-8 strings A (with length AN) and B (with length BN) @@ -952,6 +1034,12 @@ utf8_to_lower (const char *s) { return utf8_casemap (s, u8_tolower); } + +char * +utf8_to_title (const char *s) +{ + return utf8_casemap (s, u8_totitle); +} bool get_encoding_info (struct encoding_info *e, const char *name) @@ -1034,8 +1122,8 @@ is_encoding_ebcdic_compatible (const char *encoding) bool is_encoding_supported (const char *encoding) { - return (create_iconv ("UTF-8", encoding) - && create_iconv (encoding, "UTF-8")); + return (create_iconv ("UTF-8", encoding, false) + && create_iconv (encoding, "UTF-8", false)); } /* Returns true if E is the name of a UTF-8 encoding.