From 106e3340befcc44a43dffcdd08d92ef70678712f Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 13 Feb 2022 11:07:22 -0800 Subject: [PATCH] lexer: New function lex_ofs_representation(). This interface is more convenient for situations where it's easier to consume tokens before getting their representations. --- src/language/lexer/lexer.c | 68 +++++++++++++++++++++++----------- src/language/lexer/lexer.h | 1 + src/language/stats/matrix.c | 19 ++-------- src/language/utilities/title.c | 20 +++------- 4 files changed, 55 insertions(+), 53 deletions(-) diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c index 908556a7ee..1fadf53b30 100644 --- a/src/language/lexer/lexer.c +++ b/src/language/lexer/lexer.c @@ -282,8 +282,8 @@ struct lexer }; static struct lex_source *lex_source__ (const struct lexer *); -static char *lex_source_get_syntax__ (const struct lex_source *, - int n0, int n1); +static char *lex_source_syntax__ (const struct lex_source *, + int ofs0, int ofs1); static const struct lex_token *lex_next__ (const struct lexer *, int n); static void lex_source_push_endcmd__ (struct lex_source *); static void lex_source_push_parse (struct lex_source *, struct lex_token *); @@ -1451,15 +1451,37 @@ lex_ofs_end_point (const struct lexer *lexer, int ofs) /* Returns the text of the syntax in tokens N0 ahead of the current one, through N1 ahead of the current one, inclusive. (For example, if N0 and N1 - are both zero, this requests the syntax for the current token.) The caller - must eventually free the returned string (with free()). The syntax is - encoded in UTF-8 and in the original form supplied to the lexer so that, for - example, it may include comments, spaces, and new-lines if it spans multiple - tokens. Macro expansion, however, has already been performed. */ + are both zero, this requests the syntax for the current token.) + + The caller must eventually free the returned string (with free()). The + syntax is encoded in UTF-8 and in the original form supplied to the lexer so + that, for example, it may include comments, spaces, and new-lines if it + spans multiple tokens. Macro expansion, however, has already been + performed. */ char * lex_next_representation (const struct lexer *lexer, int n0, int n1) { - return lex_source_get_syntax__ (lex_source__ (lexer), n0, n1); + const struct lex_source *src = lex_source__ (lexer); + return (src + ? lex_source_syntax__ (src, n0 + src->parse_ofs, n1 + src->parse_ofs) + : xstrdup ("")); +} + + +/* Returns the text of the syntax in tokens with offsets OFS0 to OFS1, + inclusive. (For example, if OFS0 and OFS1 are both zero, this requests the + syntax for the first token in the current command.) + + The caller must eventually free the returned string (with free()). The + syntax is encoded in UTF-8 and in the original form supplied to the lexer so + that, for example, it may include comments, spaces, and new-lines if it + spans multiple tokens. Macro expansion, however, has already been + performed. */ +char * +lex_ofs_representation (const struct lexer *lexer, int ofs0, int ofs1) +{ + const struct lex_source *src = lex_source__ (lexer); + return src ? lex_source_syntax__ (src, ofs0, ofs1) : xstrdup (""); } /* Returns true if the token N ahead of the current one was produced by macro @@ -1787,32 +1809,33 @@ lex_source__ (const struct lexer *lexer) : ll_data (ll_head (&lexer->sources), struct lex_source, ll)); } -/* Returns the text of the syntax in SRC for tokens N0 ahead of the current - one, through N1 ahead of the current one, inclusive. (For example, if N0 - and N1 are both zero, this requests the syntax for the current token.) The - caller must eventually free the returned string (with free()). The syntax - is encoded in UTF-8 and in the original form supplied to the lexer so that, - for example, it may include comments, spaces, and new-lines if it spans - multiple tokens. Macro expansion, however, has already been performed. */ +/* Returns the text of the syntax in SRC for tokens with offsets OFS0 through + OFS1 in the current command, inclusive. (For example, if OFS0 and OFS1 are + both zero, this requests the syntax for the first token in the current + command.) The caller must eventually free the returned string (with + free()). The syntax is encoded in UTF-8 and in the original form supplied + to the lexer so that, for example, it may include comments, spaces, and + new-lines if it spans multiple tokens. Macro expansion, however, has + already been performed. */ static char * -lex_source_get_syntax__ (const struct lex_source *src, int n0, int n1) +lex_source_syntax__ (const struct lex_source *src, int ofs0, int ofs1) { struct string s = DS_EMPTY_INITIALIZER; - for (size_t i = n0; i <= n1; ) + for (size_t i = ofs0; i <= ofs1; ) { /* Find [I,J) as the longest sequence of tokens not produced by macro expansion, or otherwise the longest sequence expanded from a single macro call. */ - const struct lex_token *first = lex_source_next__ (src, i); + const struct lex_token *first = lex_source_ofs__ (src, i); size_t j; - for (j = i + 1; j <= n1; j++) + for (j = i + 1; j <= ofs1; j++) { - const struct lex_token *cur = lex_source_next__ (src, j); + const struct lex_token *cur = lex_source_ofs__ (src, j); if ((first->macro_rep != NULL) != (cur->macro_rep != NULL) || first->macro_rep != cur->macro_rep) break; } - const struct lex_token *last = lex_source_next__ (src, j - 1); + const struct lex_token *last = lex_source_ofs__ (src, j - 1); /* Now add the syntax for this sequence of tokens to SRC. */ if (!ds_is_empty (&s)) @@ -1883,7 +1906,8 @@ lex_source_error_valist (struct lex_source *src, int n0, int n1, else { /* Get the syntax that caused the error. */ - char *raw_syntax = lex_source_get_syntax__ (src, n0, n1); + char *raw_syntax = lex_source_syntax__ (src, n0 + src->parse_ofs, + n1 + src->parse_ofs); char syntax[64]; str_ellipsize (ss_cstr (raw_syntax), syntax, sizeof syntax); free (raw_syntax); diff --git a/src/language/lexer/lexer.h b/src/language/lexer/lexer.h index 764da74b19..bb9d6a954d 100644 --- a/src/language/lexer/lexer.h +++ b/src/language/lexer/lexer.h @@ -166,6 +166,7 @@ struct msg_point lex_ofs_end_point (const struct lexer *, int ofs); /* Token representation. */ char *lex_next_representation (const struct lexer *, int n0, int n1); +char *lex_ofs_representation (const struct lexer *, int ofs0, int ofs1); bool lex_next_is_from_macro (const struct lexer *, int n); /* Current position. */ diff --git a/src/language/stats/matrix.c b/src/language/stats/matrix.c index 9bbc37f9a2..b1537cd92e 100644 --- a/src/language/stats/matrix.c +++ b/src/language/stats/matrix.c @@ -5367,25 +5367,12 @@ matrix_print_parse (struct matrix_state *s) if (lex_token (s->lexer) != T_SLASH && lex_token (s->lexer) != T_ENDCMD) { - size_t depth = 0; - for (size_t i = 0; ; i++) - { - enum token_type t = lex_next_token (s->lexer, i); - if (t == T_LPAREN || t == T_LBRACK || t == T_LCURLY) - depth++; - else if ((t == T_RPAREN || t == T_RBRACK || t == T_RCURLY) && depth) - depth--; - else if ((t == T_SLASH && !depth) || t == T_ENDCMD || t == T_STOP) - { - if (i > 0) - cmd->print.title = lex_next_representation (s->lexer, 0, i - 1); - break; - } - } - + int start_ofs = lex_ofs (s->lexer); cmd->print.expression = matrix_parse_exp (s); if (!cmd->print.expression) goto error; + cmd->print.title = lex_ofs_representation (s->lexer, start_ofs, + lex_ofs (s->lexer) - 1); } while (lex_match (s->lexer, T_SLASH)) diff --git a/src/language/utilities/title.c b/src/language/utilities/title.c index 323bdf3fa5..8ad6a4acb3 100644 --- a/src/language/utilities/title.c +++ b/src/language/utilities/title.c @@ -44,28 +44,18 @@ parse_title (struct lexer *lexer, void (*set_title) (const char *)) set_title (lex_tokcstr (lexer)); lex_get (lexer); } - else if (lex_token (lexer) == T_ENDCMD) - { - /* This would be a bad special case below because n-1 would be - SIZE_MAX. */ - set_title (""); - } else { - /* Count the tokens in the title. */ - size_t n = 0; - while (lex_next (lexer, n)->type != T_ENDCMD) - n++; + int start_ofs = lex_ofs (lexer); + while (lex_token (lexer) != T_ENDCMD) + lex_get (lexer); /* Get the raw representation of all the tokens, including any space between them, and use it as the title. */ - char *title = lex_next_representation (lexer, 0, n - 1); + char *title = lex_ofs_representation (lexer, start_ofs, + lex_ofs (lexer) - 1); set_title (title); free (title); - - /* Skip past the tokens. */ - for (size_t i = 0; i < n; i++) - lex_get (lexer); } return CMD_SUCCESS; } -- 2.30.2