These will have additional upcoming users.
#include <unictype.h>
#include <unistd.h>
#include <unistr.h>
-#include <uniwidth.h>
#include "language/command.h"
#include "language/lexer/macro.h"
}
}
-static int
-count_columns (const char *s_, size_t length)
-{
- const uint8_t *s = CHAR_CAST (const uint8_t *, s_);
- int columns;
- size_t ofs;
- int mblen;
-
- columns = 0;
- for (ofs = 0; ofs < length; ofs += mblen)
- {
- ucs4_t uc;
-
- mblen = u8_mbtouc (&uc, s + ofs, length - ofs);
- if (uc != '\t')
- {
- int width = uc_width (uc, "UTF-8");
- if (width > 0)
- columns += width;
- }
- else
- columns = ROUND_UP (columns + 1, 8);
- }
-
- return columns + 1;
-}
-
static int
lex_token_get_first_column (const struct lex_source *src,
const struct lex_token *token)
{
- return count_columns (&src->buffer[token->line_pos - src->tail],
- token->token_pos - token->line_pos);
+ return utf8_count_columns (&src->buffer[token->line_pos - src->tail],
+ token->token_pos - token->line_pos) + 1;
}
static int
newline = memrchr (start, '\n', end - start);
if (newline != NULL)
start = newline + 1;
- return count_columns (start, end - start);
+ return utf8_count_columns (start, end - start) + 1;
}
static struct msg_location
#include <string.h>
#include <unicase.h>
#include <unigbrk.h>
+#include <uniwidth.h>
#include "libpspp/assertion.h"
#include "libpspp/compiler.h"
#include "libpspp/hmapx.h"
#include "libpspp/hash-functions.h"
+#include "libpspp/misc.h"
#include "libpspp/pool.h"
#include "libpspp/str.h"
#include "libpspp/version.h"
return prefix_len + tail_len;
}
+/* Returns the number of display columns that would be occupied by the LENGTH
+ bytes of UTF-8 starting at S. */
+size_t
+utf8_count_columns (const char *s_, size_t length)
+{
+ const uint8_t *s = CHAR_CAST (const uint8_t *, s_);
+
+ size_t columns = 0;
+ for (int ofs = 0; ofs < length; )
+ {
+ ucs4_t uc;
+ ofs += u8_mbtouc (&uc, s + ofs, length - ofs);
+ if (uc != '\t')
+ {
+ int width = uc_width (uc, "UTF-8");
+ if (width > 0)
+ columns += width;
+ }
+ else
+ columns = ROUND_UP (columns + 1, 8);
+ }
+ return columns;
+}
+
+/* Returns the byte offset in LENGTH-byte UTF-8 string S that is N_COLUMNS
+ display columns into the string. */
+size_t
+utf8_columns_to_bytes (const char *s_, size_t length, size_t n_columns)
+{
+ const uint8_t *s = CHAR_CAST (const uint8_t *, s_);
+
+ size_t columns = 0;
+ int ofs;
+ for (ofs = 0; ofs < length && columns < n_columns; )
+ {
+ ucs4_t uc;
+ ofs += u8_mbtouc (&uc, s + ofs, length - ofs);
+ if (uc != '\t')
+ {
+ int width = uc_width (uc, "UTF-8");
+ if (width > 0)
+ columns += width;
+ }
+ else
+ columns = ROUND_UP (columns + 1, 8);
+ }
+ return ofs;
+}
+
/* Returns an allocated, null-terminated string, owned by the caller,
containing as many characters[*] from the beginning of S that would fit
within MAX_LEN bytes if the returned string were to be re-encoded in
size_t utf8_encoding_concat_len (const char *head, const char *tail,
const char *encoding, size_t max_len);
+size_t utf8_count_columns (const char *, size_t);
+size_t utf8_columns_to_bytes (const char *, size_t, size_t n_columns);
+
char *utf8_to_filename (const char *filename);
char *filename_to_utf8 (const char *filename);
#include <unistr.h>
#include "libpspp/cast.h"
+#include "libpspp/i18n.h"
#include "libpspp/message.h"
#include "libpspp/pool.h"
else
return 0;
}
+
+size_t
+ss_utf8_count_columns (struct substring s)
+{
+ return utf8_count_columns (s.string, s.length);
+}
+
+/* Returns a substring of S starting at 0-based display column START and
+ running for N display columns. */
+struct substring
+ss_utf8_columns (struct substring s, size_t start, size_t n)
+{
+ ss_advance (&s, utf8_columns_to_bytes (s.string, s.length, start));
+ s.length = utf8_columns_to_bytes (s.string, s.length, n);
+ return s;
+}
\f
/* Initializes ST as an empty string. */
void
ucs4_t ss_get_mb (struct substring *);
ucs4_t ss_at_mb (struct substring, size_t ofs);
int ss_at_mblen (struct substring, size_t ofs);
+size_t ss_utf8_count_columns (struct substring);
+struct substring ss_utf8_columns (struct substring, size_t start, size_t n);
\f
/* Variable length strings. */