From 51acdebd6747816b6f955634e1bfcc9c8071b56d Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@cs.stanford.edu>
Date: Sun, 5 Dec 2021 20:01:01 -0800
Subject: [PATCH] lexer: New lex_at_phrase(), lex_get_n() functions.

These will have their first users in upcoming commits.
---
 src/language/lexer/lexer.c | 56 ++++++++++++++++++++++++++++----------
 src/language/lexer/lexer.h |  2 ++
 2 files changed, 43 insertions(+), 15 deletions(-)

diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c
index dd06eeee86..27d5dedb54 100644
--- a/src/language/lexer/lexer.c
+++ b/src/language/lexer/lexer.c
@@ -379,6 +379,14 @@ lex_get (struct lexer *lexer)
           return;
       }
 }
+
+/* Advances LEXER by N tokens. */
+void
+lex_get_n (struct lexer *lexer, size_t n)
+{
+  while (n-- > 0)
+    lex_get (lexer);
+}
 
 /* Issuing errors. */
 
@@ -564,7 +572,8 @@ lex_next_error_valist (struct lexer *lexer, int n0, int n1,
           ds_put_cstr (&s, ": ");
           ds_put_vformat (&s, format, args);
         }
-      ds_put_byte (&s, '.');
+      if (ds_last (&s) != '.')
+        ds_put_byte (&s, '.');
       msg (SE, "%s", ds_cstr (&s));
       ds_destroy (&s);
     }
@@ -1136,32 +1145,49 @@ lex_tokens_match (const struct token *actual, const struct token *expected)
     }
 }
 
-/* If LEXER is positioned at the sequence of tokens that may be parsed from S,
-   skips it and returns true.  Otherwise, returns false.
-
-   S may consist of an arbitrary sequence of tokens, e.g. "KRUSKAL-WALLIS",
-   "2SLS", or "END INPUT PROGRAM".  Identifiers may be abbreviated to their
-   first three letters. */
-bool
-lex_match_phrase (struct lexer *lexer, const char *s)
+static size_t
+lex_at_phrase__ (struct lexer *lexer, const char *s)
 {
   struct string_lexer slex;
   struct token token;
-  int i;
 
-  i = 0;
+  size_t i = 0;
   string_lexer_init (&slex, s, strlen (s), SEG_MODE_INTERACTIVE, true);
   while (string_lexer_next (&slex, &token))
     {
       bool match = lex_tokens_match (lex_next (lexer, i++), &token);
       token_uninit (&token);
       if (!match)
-        return false;
+        return 0;
     }
+  return i;
+}
 
-  while (i-- > 0)
-    lex_get (lexer);
-  return true;
+/* If LEXER is positioned at the sequence of tokens that may be parsed from S,
+   returns true.  Otherwise, returns false.
+
+   S may consist of an arbitrary sequence of tokens, e.g. "KRUSKAL-WALLIS",
+   "2SLS", or "END INPUT PROGRAM".  Identifiers may be abbreviated to their
+   first three letters. */
+bool
+lex_at_phrase (struct lexer *lexer, const char *s)
+{
+  return lex_at_phrase__ (lexer, s) > 0;
+}
+
+/* If LEXER is positioned at the sequence of tokens that may be parsed from S,
+   skips it and returns true.  Otherwise, returns false.
+
+   S may consist of an arbitrary sequence of tokens, e.g. "KRUSKAL-WALLIS",
+   "2SLS", or "END INPUT PROGRAM".  Identifiers may be abbreviated to their
+   first three letters. */
+bool
+lex_match_phrase (struct lexer *lexer, const char *s)
+{
+  size_t n = lex_at_phrase__ (lexer, s);
+  if (n > 0)
+    lex_get_n (lexer, n);
+  return n > 0;
 }
 
 static int
diff --git a/src/language/lexer/lexer.h b/src/language/lexer/lexer.h
index 6aa900e8df..1282b6946b 100644
--- a/src/language/lexer/lexer.h
+++ b/src/language/lexer/lexer.h
@@ -100,6 +100,7 @@ void lex_append (struct lexer *, struct lex_reader *);
 
 /* Advancing. */
 void lex_get (struct lexer *);
+void lex_get_n (struct lexer *, size_t n);
 
 /* Token testing functions. */
 bool lex_is_number (const struct lexer *);
@@ -120,6 +121,7 @@ bool lex_match (struct lexer *, enum token_type);
 bool lex_match_id (struct lexer *, const char *);
 bool lex_match_id_n (struct lexer *, const char *, size_t n);
 bool lex_match_int (struct lexer *, int);
+bool lex_at_phrase (struct lexer *, const char *s);
 bool lex_match_phrase (struct lexer *, const char *s);
 
 /* Forcible matching functions. */
-- 
2.30.2