lexer: New function lex_next_representation().

author Ben Pfaff <blp@cs.stanford.edu>

Sun, 27 Jun 2021 18:19:07 +0000 (11:19 -0700)

committer Ben Pfaff <blp@cs.stanford.edu>

Sun, 27 Jun 2021 18:23:22 +0000 (11:23 -0700)
author Ben Pfaff <blp@cs.stanford.edu>
Sun, 27 Jun 2021 18:19:07 +0000 (11:19 -0700)
committer Ben Pfaff <blp@cs.stanford.edu>
Sun, 27 Jun 2021 18:23:22 +0000 (11:23 -0700)
diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c

index cde5f58ac46d95a7c727a3e639ddf2cd2ce8f95a..c14bc6acb84d0fdbf95d7965263748374029f213 100644 (file)
--- a/src/language/lexer/lexer.c
+++ b/src/language/lexer/lexer.c
@@ -109,6 +109,8 @@ struct lexer
    };
  
  static struct lex_source *lex_source__ (const struct lexer *);
+static struct substring lex_source_get_syntax__ (const struct lex_source *,
+                                                 int n0, int n1);
  static const struct lex_token *lex_next__ (const struct lexer *, int n);
  static void lex_source_push_endcmd__ (struct lex_source *);
  
@@ -935,6 +937,18 @@ lex_next_tokss (const struct lexer *lexer, int n)
    return lex_next (lexer, n)->string;
  }
  
+/* Returns the text of the syntax in tokens N0 ahead of the current one,
+   through N1 ahead of the current one, inclusive.  (For example, if N0 and N1
+   are both zero, this requests the syntax for the current token.)  The caller
+   must not modify or free the returned string.  The syntax is encoded in UTF-8
+   and in the original form supplied to the lexer so that, for example, it may
+   include comments, spaces, and new-lines if it spans multiple tokens. */
+struct substring
+lex_next_representation (const struct lexer *lexer, int n0, int n1)
+{
+  return lex_source_get_syntax__ (lex_source__ (lexer), n0, n1);
+}
+
  static bool
  lex_tokens_match (const struct token *actual, const struct token *expected)
  {
@@ -1310,16 +1324,24 @@ lex_source__ (const struct lexer *lexer)
  }
  
  static struct substring
-lex_source_get_syntax__ (const struct lex_source *src, int n0, int n1)
+lex_tokens_get_syntax__ (const struct lex_source *src,
+                         const struct lex_token *token0,
+                         const struct lex_token *token1)
  {
-  const struct lex_token *token0 = lex_source_next__ (src, n0);
-  const struct lex_token *token1 = lex_source_next__ (src, MAX (n0, n1));
    size_t start = token0->token_pos;
    size_t end = token1->token_pos + token1->token_len;
  
    return ss_buffer (&src->buffer[start - src->tail], end - start);
  }
  
+static struct substring
+lex_source_get_syntax__ (const struct lex_source *src, int n0, int n1)
+{
+  return lex_tokens_get_syntax__ (src,
+                                  lex_source_next__ (src, n0),
+                                  lex_source_next__ (src, MAX (n0, n1)));
+}
+
  static void
  lex_ellipsize__ (struct substring in, char *out, size_t out_size)
  {
diff --git a/src/language/lexer/lexer.h b/src/language/lexer/lexer.h

index caf57503317973788122afd42367d0a4eeaf1b1f..f57ee822ed0abb9040f19f337f5714c05016e441 100644 (file)
--- a/src/language/lexer/lexer.h
+++ b/src/language/lexer/lexer.h
@@ -142,6 +142,10 @@ const char *lex_next_tokcstr (const struct lexer *, int n);
  double lex_next_tokval (const struct lexer *, int n);
  struct substring lex_next_tokss (const struct lexer *, int n);
  
+/* Token representation. */
+struct substring lex_next_representation (const struct lexer *,
+                                          int n0, int n1);
+
  /* Current position. */
  int lex_get_first_line_number (const struct lexer *, int n);
  int lex_get_last_line_number (const struct lexer *, int n);
author	Ben Pfaff <blp@cs.stanford.edu>
	Sun, 27 Jun 2021 18:19:07 +0000 (11:19 -0700)
committer	Ben Pfaff <blp@cs.stanford.edu>
	Sun, 27 Jun 2021 18:23:22 +0000 (11:23 -0700)
src/language/lexer/lexer.c		patch \| blob \| history
src/language/lexer/lexer.h		patch \| blob \| history