From 64a3eba8f71d4131653eda2de3196364e126052d Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@cs.stanford.edu>
Date: Sun, 27 Jun 2021 11:19:07 -0700
Subject: [PATCH] lexer: New function lex_next_representation().

---
 src/language/lexer/lexer.c | 28 +++++++++++++++++++++++++---
 src/language/lexer/lexer.h |  4 ++++
 2 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c
index cde5f58ac4..c14bc6acb8 100644
--- a/src/language/lexer/lexer.c
+++ b/src/language/lexer/lexer.c
@@ -109,6 +109,8 @@ struct lexer
   };
 
 static struct lex_source *lex_source__ (const struct lexer *);
+static struct substring lex_source_get_syntax__ (const struct lex_source *,
+                                                 int n0, int n1);
 static const struct lex_token *lex_next__ (const struct lexer *, int n);
 static void lex_source_push_endcmd__ (struct lex_source *);
 
@@ -935,6 +937,18 @@ lex_next_tokss (const struct lexer *lexer, int n)
   return lex_next (lexer, n)->string;
 }
 
+/* Returns the text of the syntax in tokens N0 ahead of the current one,
+   through N1 ahead of the current one, inclusive.  (For example, if N0 and N1
+   are both zero, this requests the syntax for the current token.)  The caller
+   must not modify or free the returned string.  The syntax is encoded in UTF-8
+   and in the original form supplied to the lexer so that, for example, it may
+   include comments, spaces, and new-lines if it spans multiple tokens. */
+struct substring
+lex_next_representation (const struct lexer *lexer, int n0, int n1)
+{
+  return lex_source_get_syntax__ (lex_source__ (lexer), n0, n1);
+}
+
 static bool
 lex_tokens_match (const struct token *actual, const struct token *expected)
 {
@@ -1310,16 +1324,24 @@ lex_source__ (const struct lexer *lexer)
 }
 
 static struct substring
-lex_source_get_syntax__ (const struct lex_source *src, int n0, int n1)
+lex_tokens_get_syntax__ (const struct lex_source *src,
+                         const struct lex_token *token0,
+                         const struct lex_token *token1)
 {
-  const struct lex_token *token0 = lex_source_next__ (src, n0);
-  const struct lex_token *token1 = lex_source_next__ (src, MAX (n0, n1));
   size_t start = token0->token_pos;
   size_t end = token1->token_pos + token1->token_len;
 
   return ss_buffer (&src->buffer[start - src->tail], end - start);
 }
 
+static struct substring
+lex_source_get_syntax__ (const struct lex_source *src, int n0, int n1)
+{
+  return lex_tokens_get_syntax__ (src,
+                                  lex_source_next__ (src, n0),
+                                  lex_source_next__ (src, MAX (n0, n1)));
+}
+
 static void
 lex_ellipsize__ (struct substring in, char *out, size_t out_size)
 {
diff --git a/src/language/lexer/lexer.h b/src/language/lexer/lexer.h
index caf5750331..f57ee822ed 100644
--- a/src/language/lexer/lexer.h
+++ b/src/language/lexer/lexer.h
@@ -142,6 +142,10 @@ const char *lex_next_tokcstr (const struct lexer *, int n);
 double lex_next_tokval (const struct lexer *, int n);
 struct substring lex_next_tokss (const struct lexer *, int n);
 
+/* Token representation. */
+struct substring lex_next_representation (const struct lexer *,
+                                          int n0, int n1);
+
 /* Current position. */
 int lex_get_first_line_number (const struct lexer *, int n);
 int lex_get_last_line_number (const struct lexer *, int n);
-- 
2.30.2