From 64a3eba8f71d4131653eda2de3196364e126052d Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 27 Jun 2021 11:19:07 -0700 Subject: [PATCH] lexer: New function lex_next_representation(). --- src/language/lexer/lexer.c | 28 +++++++++++++++++++++++++--- src/language/lexer/lexer.h | 4 ++++ 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c index cde5f58ac4..c14bc6acb8 100644 --- a/src/language/lexer/lexer.c +++ b/src/language/lexer/lexer.c @@ -109,6 +109,8 @@ struct lexer }; static struct lex_source *lex_source__ (const struct lexer *); +static struct substring lex_source_get_syntax__ (const struct lex_source *, + int n0, int n1); static const struct lex_token *lex_next__ (const struct lexer *, int n); static void lex_source_push_endcmd__ (struct lex_source *); @@ -935,6 +937,18 @@ lex_next_tokss (const struct lexer *lexer, int n) return lex_next (lexer, n)->string; } +/* Returns the text of the syntax in tokens N0 ahead of the current one, + through N1 ahead of the current one, inclusive. (For example, if N0 and N1 + are both zero, this requests the syntax for the current token.) The caller + must not modify or free the returned string. The syntax is encoded in UTF-8 + and in the original form supplied to the lexer so that, for example, it may + include comments, spaces, and new-lines if it spans multiple tokens. */ +struct substring +lex_next_representation (const struct lexer *lexer, int n0, int n1) +{ + return lex_source_get_syntax__ (lex_source__ (lexer), n0, n1); +} + static bool lex_tokens_match (const struct token *actual, const struct token *expected) { @@ -1310,16 +1324,24 @@ lex_source__ (const struct lexer *lexer) } static struct substring -lex_source_get_syntax__ (const struct lex_source *src, int n0, int n1) +lex_tokens_get_syntax__ (const struct lex_source *src, + const struct lex_token *token0, + const struct lex_token *token1) { - const struct lex_token *token0 = lex_source_next__ (src, n0); - const struct lex_token *token1 = lex_source_next__ (src, MAX (n0, n1)); size_t start = token0->token_pos; size_t end = token1->token_pos + token1->token_len; return ss_buffer (&src->buffer[start - src->tail], end - start); } +static struct substring +lex_source_get_syntax__ (const struct lex_source *src, int n0, int n1) +{ + return lex_tokens_get_syntax__ (src, + lex_source_next__ (src, n0), + lex_source_next__ (src, MAX (n0, n1))); +} + static void lex_ellipsize__ (struct substring in, char *out, size_t out_size) { diff --git a/src/language/lexer/lexer.h b/src/language/lexer/lexer.h index caf5750331..f57ee822ed 100644 --- a/src/language/lexer/lexer.h +++ b/src/language/lexer/lexer.h @@ -142,6 +142,10 @@ const char *lex_next_tokcstr (const struct lexer *, int n); double lex_next_tokval (const struct lexer *, int n); struct substring lex_next_tokss (const struct lexer *, int n); +/* Token representation. */ +struct substring lex_next_representation (const struct lexer *, + int n0, int n1); + /* Current position. */ int lex_get_first_line_number (const struct lexer *, int n); int lex_get_last_line_number (const struct lexer *, int n); -- 2.30.2