From 51acdebd6747816b6f955634e1bfcc9c8071b56d Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 5 Dec 2021 20:01:01 -0800 Subject: [PATCH] lexer: New lex_at_phrase(), lex_get_n() functions. These will have their first users in upcoming commits. --- src/language/lexer/lexer.c | 56 ++++++++++++++++++++++++++++---------- src/language/lexer/lexer.h | 2 ++ 2 files changed, 43 insertions(+), 15 deletions(-) diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c index dd06eeee86..27d5dedb54 100644 --- a/src/language/lexer/lexer.c +++ b/src/language/lexer/lexer.c @@ -379,6 +379,14 @@ lex_get (struct lexer *lexer) return; } } + +/* Advances LEXER by N tokens. */ +void +lex_get_n (struct lexer *lexer, size_t n) +{ + while (n-- > 0) + lex_get (lexer); +} /* Issuing errors. */ @@ -564,7 +572,8 @@ lex_next_error_valist (struct lexer *lexer, int n0, int n1, ds_put_cstr (&s, ": "); ds_put_vformat (&s, format, args); } - ds_put_byte (&s, '.'); + if (ds_last (&s) != '.') + ds_put_byte (&s, '.'); msg (SE, "%s", ds_cstr (&s)); ds_destroy (&s); } @@ -1136,32 +1145,49 @@ lex_tokens_match (const struct token *actual, const struct token *expected) } } -/* If LEXER is positioned at the sequence of tokens that may be parsed from S, - skips it and returns true. Otherwise, returns false. - - S may consist of an arbitrary sequence of tokens, e.g. "KRUSKAL-WALLIS", - "2SLS", or "END INPUT PROGRAM". Identifiers may be abbreviated to their - first three letters. */ -bool -lex_match_phrase (struct lexer *lexer, const char *s) +static size_t +lex_at_phrase__ (struct lexer *lexer, const char *s) { struct string_lexer slex; struct token token; - int i; - i = 0; + size_t i = 0; string_lexer_init (&slex, s, strlen (s), SEG_MODE_INTERACTIVE, true); while (string_lexer_next (&slex, &token)) { bool match = lex_tokens_match (lex_next (lexer, i++), &token); token_uninit (&token); if (!match) - return false; + return 0; } + return i; +} - while (i-- > 0) - lex_get (lexer); - return true; +/* If LEXER is positioned at the sequence of tokens that may be parsed from S, + returns true. Otherwise, returns false. + + S may consist of an arbitrary sequence of tokens, e.g. "KRUSKAL-WALLIS", + "2SLS", or "END INPUT PROGRAM". Identifiers may be abbreviated to their + first three letters. */ +bool +lex_at_phrase (struct lexer *lexer, const char *s) +{ + return lex_at_phrase__ (lexer, s) > 0; +} + +/* If LEXER is positioned at the sequence of tokens that may be parsed from S, + skips it and returns true. Otherwise, returns false. + + S may consist of an arbitrary sequence of tokens, e.g. "KRUSKAL-WALLIS", + "2SLS", or "END INPUT PROGRAM". Identifiers may be abbreviated to their + first three letters. */ +bool +lex_match_phrase (struct lexer *lexer, const char *s) +{ + size_t n = lex_at_phrase__ (lexer, s); + if (n > 0) + lex_get_n (lexer, n); + return n > 0; } static int diff --git a/src/language/lexer/lexer.h b/src/language/lexer/lexer.h index 6aa900e8df..1282b6946b 100644 --- a/src/language/lexer/lexer.h +++ b/src/language/lexer/lexer.h @@ -100,6 +100,7 @@ void lex_append (struct lexer *, struct lex_reader *); /* Advancing. */ void lex_get (struct lexer *); +void lex_get_n (struct lexer *, size_t n); /* Token testing functions. */ bool lex_is_number (const struct lexer *); @@ -120,6 +121,7 @@ bool lex_match (struct lexer *, enum token_type); bool lex_match_id (struct lexer *, const char *); bool lex_match_id_n (struct lexer *, const char *, size_t n); bool lex_match_int (struct lexer *, int); +bool lex_at_phrase (struct lexer *, const char *s); bool lex_match_phrase (struct lexer *, const char *s); /* Forcible matching functions. */ -- 2.30.2