X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flibpspp%2Fi18n.c;h=3faadcbb87c770395f5ce0c86fdef509df50c612;hb=9a938bf1aae1fb5e4e4ea98dbf2322cf8382c20c;hp=69162f14f07c72b35e55537f2bd0623f2bb20d5c;hpb=2dc4871bd8834c0b8420d374c7973cdf85435730;p=pspp diff --git a/src/libpspp/i18n.c b/src/libpspp/i18n.c index 69162f14f0..3faadcbb87 100644 --- a/src/libpspp/i18n.c +++ b/src/libpspp/i18n.c @@ -29,11 +29,13 @@ #include #include #include +#include #include "libpspp/assertion.h" #include "libpspp/compiler.h" #include "libpspp/hmapx.h" #include "libpspp/hash-functions.h" +#include "libpspp/misc.h" #include "libpspp/pool.h" #include "libpspp/str.h" #include "libpspp/version.h" @@ -501,6 +503,55 @@ utf8_encoding_concat_len (const char *head, const char *tail, return prefix_len + tail_len; } +/* Returns the number of display columns that would be occupied by the LENGTH + bytes of UTF-8 starting at S. */ +size_t +utf8_count_columns (const char *s_, size_t length) +{ + const uint8_t *s = CHAR_CAST (const uint8_t *, s_); + + size_t columns = 0; + for (int ofs = 0; ofs < length; ) + { + ucs4_t uc; + ofs += u8_mbtouc (&uc, s + ofs, length - ofs); + if (uc != '\t') + { + int width = uc_width (uc, "UTF-8"); + if (width > 0) + columns += width; + } + else + columns = ROUND_UP (columns + 1, 8); + } + return columns; +} + +/* Returns the byte offset in LENGTH-byte UTF-8 string S that is N_COLUMNS + display columns into the string. */ +size_t +utf8_columns_to_bytes (const char *s_, size_t length, size_t n_columns) +{ + const uint8_t *s = CHAR_CAST (const uint8_t *, s_); + + size_t columns = 0; + int ofs; + for (ofs = 0; ofs < length && columns < n_columns; ) + { + ucs4_t uc; + ofs += u8_mbtouc (&uc, s + ofs, length - ofs); + if (uc != '\t') + { + int width = uc_width (uc, "UTF-8"); + if (width > 0) + columns += width; + } + else + columns = ROUND_UP (columns + 1, 8); + } + return ofs; +} + /* Returns an allocated, null-terminated string, owned by the caller, containing as many characters[*] from the beginning of S that would fit within MAX_LEN bytes if the returned string were to be re-encoded in @@ -829,7 +880,15 @@ utf8_hash_case_bytes (const char *s, size_t n, unsigned int basis) unsigned int utf8_hash_case_string (const char *s, unsigned int basis) { - return utf8_hash_case_bytes (s, strlen (s), basis); + return utf8_hash_case_substring (ss_cstr (s), basis); +} + +/* Returns a hash value for UTF-8 string S, with lowercase and uppercase + letters treated as equal, starting from BASIS. */ +unsigned int +utf8_hash_case_substring (struct substring s, unsigned int basis) +{ + return utf8_hash_case_bytes (s.string, s.length, basis); } /* Compares UTF-8 strings A and B case-insensitively. @@ -837,7 +896,13 @@ utf8_hash_case_string (const char *s, unsigned int basis) int utf8_strcasecmp (const char *a, const char *b) { - return utf8_strncasecmp (a, strlen (a), b, strlen (b)); + return utf8_sscasecmp (ss_cstr (a), ss_cstr (b)); +} + +int +utf8_sscasecmp (struct substring a, struct substring b) +{ + return utf8_strncasecmp (a.string, a.length, b.string, b.length); } /* Compares UTF-8 strings A (with length AN) and B (with length BN)