X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flibpspp%2Fi18n.c;h=4e04d32c25f39564af7fa7acc451389acaf4eebe;hb=61cb03a73ff9f5d38e9728d4bf5a449212d3acdc;hp=69162f14f07c72b35e55537f2bd0623f2bb20d5c;hpb=51acdebd6747816b6f955634e1bfcc9c8071b56d;p=pspp diff --git a/src/libpspp/i18n.c b/src/libpspp/i18n.c index 69162f14f0..4e04d32c25 100644 --- a/src/libpspp/i18n.c +++ b/src/libpspp/i18n.c @@ -29,11 +29,13 @@ #include #include #include +#include #include "libpspp/assertion.h" #include "libpspp/compiler.h" #include "libpspp/hmapx.h" #include "libpspp/hash-functions.h" +#include "libpspp/misc.h" #include "libpspp/pool.h" #include "libpspp/str.h" #include "libpspp/version.h" @@ -501,6 +503,55 @@ utf8_encoding_concat_len (const char *head, const char *tail, return prefix_len + tail_len; } +/* Returns the number of display columns that would be occupied by the LENGTH + bytes of UTF-8 starting at S. */ +size_t +utf8_count_columns (const char *s_, size_t length) +{ + const uint8_t *s = CHAR_CAST (const uint8_t *, s_); + + size_t columns = 0; + for (int ofs = 0; ofs < length; ) + { + ucs4_t uc; + ofs += u8_mbtouc (&uc, s + ofs, length - ofs); + if (uc != '\t') + { + int width = uc_width (uc, "UTF-8"); + if (width > 0) + columns += width; + } + else + columns = ROUND_UP (columns + 1, 8); + } + return columns; +} + +/* Returns the byte offset in LENGTH-byte UTF-8 string S that is N_COLUMNS + display columns into the string. */ +size_t +utf8_columns_to_bytes (const char *s_, size_t length, size_t n_columns) +{ + const uint8_t *s = CHAR_CAST (const uint8_t *, s_); + + size_t columns = 0; + int ofs; + for (ofs = 0; ofs < length && columns < n_columns; ) + { + ucs4_t uc; + ofs += u8_mbtouc (&uc, s + ofs, length - ofs); + if (uc != '\t') + { + int width = uc_width (uc, "UTF-8"); + if (width > 0) + columns += width; + } + else + columns = ROUND_UP (columns + 1, 8); + } + return ofs; +} + /* Returns an allocated, null-terminated string, owned by the caller, containing as many characters[*] from the beginning of S that would fit within MAX_LEN bytes if the returned string were to be re-encoded in