X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flibpspp%2Fi18n.c;h=3faadcbb87c770395f5ce0c86fdef509df50c612;hb=588d9107cd4b6eee3a0c3ece3cf53868e22c52f4;hp=31f07ed8da0234d24bf8e53b5b77f377098ebaeb;hpb=bea279dad74479d86671f3acdc8c945d3ce5f2be;p=pspp diff --git a/src/libpspp/i18n.c b/src/libpspp/i18n.c index 31f07ed8da..3faadcbb87 100644 --- a/src/libpspp/i18n.c +++ b/src/libpspp/i18n.c @@ -1,5 +1,6 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2006, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Free Software Foundation, Inc. + Copyright (C) 2006, 2009, 2010, 2011, 2012, 2013, 2014, 2015, + 2016, 2021 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,11 +29,13 @@ #include #include #include +#include #include "libpspp/assertion.h" #include "libpspp/compiler.h" #include "libpspp/hmapx.h" #include "libpspp/hash-functions.h" +#include "libpspp/misc.h" #include "libpspp/pool.h" #include "libpspp/str.h" #include "libpspp/version.h" @@ -62,7 +65,7 @@ static struct hmapx map; /* A wrapper around iconv_open */ static struct converter * -create_iconv (const char* tocode, const char* fromcode) +create_iconv (const char* tocode, const char* fromcode, bool warn) { size_t hash; struct hmapx_node *node; @@ -89,10 +92,11 @@ create_iconv (const char* tocode, const char* fromcode) as the converters have not yet been set up */ if (error && strcmp (tocode, fromcode)) { - fprintf (stderr, - "Warning: " - "cannot create a converter for `%s' to `%s': %s\n", - fromcode, tocode, strerror (error)); + if (warn) + fprintf (stderr, + "Warning: " + "cannot create a converter for `%s' to `%s': %s\n", + fromcode, tocode, strerror (error)); free (converter->tocode); free (converter->fromcode); @@ -107,18 +111,15 @@ create_iconv (const char* tocode, const char* fromcode) iconv_t bconv = iconv_open (tocode, "ASCII"); if (bconv != (iconv_t) -1) { - ICONV_CONST char *nullstr = strdup (""); - ICONV_CONST char *outbuf = strdup ("XXXXXXXX"); - ICONV_CONST char *snullstr = nullstr; - ICONV_CONST char *soutbuf = outbuf; - - size_t inbytes = 1; - const size_t bytes = 8; - size_t outbytes = bytes; - if (-1 != iconv (bconv, &nullstr, &inbytes, &outbuf, &outbytes)) - converter->null_char_width = bytes - outbytes; - free (snullstr); - free (soutbuf); + ICONV_CONST char inbuf[1] = ""; + ICONV_CONST char *inptr = inbuf; + size_t inbytes = sizeof inbuf; + + char outbuf[8]; + char *outptr = outbuf; + size_t outbytes = sizeof outbuf; + if (-1 != iconv (bconv, &inptr, &inbytes, &outptr, &outbytes)) + converter->null_char_width = outptr - outbuf; iconv_close (bconv); } @@ -502,6 +503,55 @@ utf8_encoding_concat_len (const char *head, const char *tail, return prefix_len + tail_len; } +/* Returns the number of display columns that would be occupied by the LENGTH + bytes of UTF-8 starting at S. */ +size_t +utf8_count_columns (const char *s_, size_t length) +{ + const uint8_t *s = CHAR_CAST (const uint8_t *, s_); + + size_t columns = 0; + for (int ofs = 0; ofs < length; ) + { + ucs4_t uc; + ofs += u8_mbtouc (&uc, s + ofs, length - ofs); + if (uc != '\t') + { + int width = uc_width (uc, "UTF-8"); + if (width > 0) + columns += width; + } + else + columns = ROUND_UP (columns + 1, 8); + } + return columns; +} + +/* Returns the byte offset in LENGTH-byte UTF-8 string S that is N_COLUMNS + display columns into the string. */ +size_t +utf8_columns_to_bytes (const char *s_, size_t length, size_t n_columns) +{ + const uint8_t *s = CHAR_CAST (const uint8_t *, s_); + + size_t columns = 0; + int ofs; + for (ofs = 0; ofs < length && columns < n_columns; ) + { + ucs4_t uc; + ofs += u8_mbtouc (&uc, s + ofs, length - ofs); + if (uc != '\t') + { + int width = uc_width (uc, "UTF-8"); + if (width > 0) + columns += width; + } + else + columns = ROUND_UP (columns + 1, 8); + } + return ofs; +} + /* Returns an allocated, null-terminated string, owned by the caller, containing as many characters[*] from the beginning of S that would fit within MAX_LEN bytes if the returned string were to be re-encoded in @@ -562,7 +612,7 @@ recode_substring_pool__ (const char *to, const char *from, if (from == NULL) from = default_encoding; - conv = create_iconv (to, from); + conv = create_iconv (to, from, true); if (NULL == conv) { @@ -644,7 +694,9 @@ void i18n_init (void) { setlocale (LC_ALL, ""); - bindtextdomain (PACKAGE, relocate(locale_dir)); + char *allocated; + bindtextdomain (PACKAGE, relocate2 (locale_dir, &allocated)); + free (allocated); textdomain (PACKAGE); assert (default_encoding == NULL); @@ -828,7 +880,15 @@ utf8_hash_case_bytes (const char *s, size_t n, unsigned int basis) unsigned int utf8_hash_case_string (const char *s, unsigned int basis) { - return utf8_hash_case_bytes (s, strlen (s), basis); + return utf8_hash_case_substring (ss_cstr (s), basis); +} + +/* Returns a hash value for UTF-8 string S, with lowercase and uppercase + letters treated as equal, starting from BASIS. */ +unsigned int +utf8_hash_case_substring (struct substring s, unsigned int basis) +{ + return utf8_hash_case_bytes (s.string, s.length, basis); } /* Compares UTF-8 strings A and B case-insensitively. @@ -836,7 +896,13 @@ utf8_hash_case_string (const char *s, unsigned int basis) int utf8_strcasecmp (const char *a, const char *b) { - return utf8_strncasecmp (a, strlen (a), b, strlen (b)); + return utf8_sscasecmp (ss_cstr (a), ss_cstr (b)); +} + +int +utf8_sscasecmp (struct substring a, struct substring b) +{ + return utf8_strncasecmp (a.string, a.length, b.string, b.length); } /* Compares UTF-8 strings A (with length AN) and B (with length BN) @@ -1056,8 +1122,8 @@ is_encoding_ebcdic_compatible (const char *encoding) bool is_encoding_supported (const char *encoding) { - return (create_iconv ("UTF-8", encoding) - && create_iconv (encoding, "UTF-8")); + return (create_iconv ("UTF-8", encoding, false) + && create_iconv (encoding, "UTF-8", false)); } /* Returns true if E is the name of a UTF-8 encoding.