From: Ben Pfaff Date: Sun, 25 Sep 2022 23:01:26 +0000 (-0700) Subject: lexer: New function lex_force_match_phrase(). X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a208fcf0dcdcc445dd7d492e00ebdf3ce23e247f;p=pspp lexer: New function lex_force_match_phrase(). --- diff --git a/src/language/data-io/data-reader.c b/src/language/data-io/data-reader.c index d34067e819..6e8c82bf0b 100644 --- a/src/language/data-io/data-reader.c +++ b/src/language/data-io/data-reader.c @@ -216,8 +216,7 @@ read_inline_record (struct dfm_reader *r) while (lex_token (r->lexer) == T_ENDCMD) lex_get (r->lexer); - if (!lex_force_match_id (r->lexer, "BEGIN") - || !lex_force_match_id (r->lexer, "DATA")) + if (!lex_force_match_phrase (r->lexer, "BEGIN DATA")) return false; lex_match (r->lexer, T_ENDCMD); diff --git a/src/language/dictionary/attributes.c b/src/language/dictionary/attributes.c index 954314ef4f..e646af3847 100644 --- a/src/language/dictionary/attributes.c +++ b/src/language/dictionary/attributes.c @@ -59,8 +59,7 @@ cmd_variable_attribute (struct lexer *lexer, struct dataset *ds) size_t n_vars, i; bool ok; - if (!lex_force_match_id (lexer, "VARIABLES") - || !lex_force_match (lexer, T_EQUALS) + if (!lex_force_match_phrase (lexer, "VARIABLES=") || !parse_variables (lexer, dict, &vars, &n_vars, PV_NONE)) return CMD_FAILURE; diff --git a/src/language/dictionary/mrsets.c b/src/language/dictionary/mrsets.c index ffd97b6800..9c91ba9861 100644 --- a/src/language/dictionary/mrsets.c +++ b/src/language/dictionary/mrsets.c @@ -147,8 +147,7 @@ parse_group (struct lexer *lexer, struct dictionary *dict, } else if (type == MRSET_MD && lex_match_id (lexer, "LABELSOURCE")) { - if (!lex_force_match (lexer, T_EQUALS) - || !lex_force_match_id (lexer, "VARLABEL")) + if (!lex_force_match_phrase (lexer, "=VARLABEL")) goto error; labelsource_varlabel = true; @@ -504,8 +503,7 @@ static bool parse_mrset_names (struct lexer *lexer, struct dictionary *dict, struct stringi_set *mrset_names) { - if (!lex_force_match_id (lexer, "NAME") - || !lex_force_match (lexer, T_EQUALS)) + if (!lex_force_match_phrase (lexer, "NAME=")) return false; stringi_set_init (mrset_names); diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c index a2a0da9713..35a9afa381 100644 --- a/src/language/lexer/lexer.c +++ b/src/language/lexer/lexer.c @@ -85,7 +85,8 @@ static struct msg_point lex_token_start_point (const struct lex_source *, static struct msg_point lex_token_end_point (const struct lex_source *, const struct lex_token *); -static size_t lex_ofs_at_phrase__ (struct lexer *, int ofs, const char *s); +static bool lex_ofs_at_phrase__ (struct lexer *, int ofs, const char *s, + size_t *n_matchedp); /* Source offset of the last byte in TOKEN. */ static size_t @@ -618,7 +619,7 @@ lex_sbc_only_once (struct lexer *lexer, const char *sbc) /* lex_ofs_at_phrase__() handles subcommand names that are keywords, such as BY. */ - if (lex_ofs_at_phrase__ (lexer, ofs, sbc)) + if (lex_ofs_at_phrase__ (lexer, ofs, sbc, NULL)) lex_ofs_error (lexer, ofs, ofs, _("Subcommand %s may only be specified once."), sbc); else @@ -1634,22 +1635,31 @@ lex_tokens_match (const struct token *actual, const struct token *expected) } } -static size_t -lex_ofs_at_phrase__ (struct lexer *lexer, int ofs, const char *s) +static bool +lex_ofs_at_phrase__ (struct lexer *lexer, int ofs, const char *s, + size_t *n_matchedp) { struct string_lexer slex; struct token token; - size_t i = 0; + size_t n_matched = 0; + bool all_matched = true; string_lexer_init (&slex, s, strlen (s), SEG_MODE_INTERACTIVE, true); while (string_lexer_next (&slex, &token)) { - bool match = lex_tokens_match (lex_ofs_token (lexer, ofs + i++), &token); + bool match = lex_tokens_match (lex_ofs_token (lexer, ofs + n_matched), + &token); token_uninit (&token); if (!match) - return 0; + { + all_matched = false; + break; + } + n_matched++; } - return i; + if (n_matchedp) + *n_matchedp = n_matched; + return all_matched; } /* If LEXER is positioned at the sequence of tokens that may be parsed from S, @@ -1661,7 +1671,7 @@ lex_ofs_at_phrase__ (struct lexer *lexer, int ofs, const char *s) bool lex_at_phrase (struct lexer *lexer, const char *s) { - return lex_ofs_at_phrase__ (lexer, lex_ofs (lexer), s) > 0; + return lex_ofs_at_phrase__ (lexer, lex_ofs (lexer), s, NULL); } /* If LEXER is positioned at the sequence of tokens that may be parsed from S, @@ -1673,10 +1683,29 @@ lex_at_phrase (struct lexer *lexer, const char *s) bool lex_match_phrase (struct lexer *lexer, const char *s) { - size_t n = lex_ofs_at_phrase__ (lexer, lex_ofs (lexer), s); - if (n > 0) - lex_get_n (lexer, n); - return n > 0; + size_t n_matched; + if (!lex_ofs_at_phrase__ (lexer, lex_ofs (lexer), s, &n_matched)) + return false; + lex_get_n (lexer, n_matched); + return true; +} + +/* If LEXER is positioned at the sequence of tokens that may be parsed from S, + skips it and returns true. Otherwise, issues an error and returns false. + + S may consist of an arbitrary sequence of tokens, e.g. "KRUSKAL-WALLIS", + "2SLS", or "END INPUT PROGRAM". Identifiers may be abbreviated to their + first three letters. */ +bool +lex_force_match_phrase (struct lexer *lexer, const char *s) +{ + size_t n_matched; + bool ok = lex_ofs_at_phrase__ (lexer, lex_ofs (lexer), s, &n_matched); + if (ok) + lex_get_n (lexer, n_matched); + else + lex_next_error (lexer, 0, n_matched, _("Syntax error expecting `%s'."), s); + return ok; } /* Returns the 1-based line number of the source text at the byte OFFSET in diff --git a/src/language/lexer/lexer.h b/src/language/lexer/lexer.h index b172ac610f..ae40a87a04 100644 --- a/src/language/lexer/lexer.h +++ b/src/language/lexer/lexer.h @@ -127,6 +127,7 @@ bool lex_match_id_n (struct lexer *, const char *, size_t n); bool lex_match_int (struct lexer *, int); bool lex_at_phrase (struct lexer *, const char *s); bool lex_match_phrase (struct lexer *, const char *s); +bool lex_force_match_phrase (struct lexer *, const char *s); /* Forcible matching functions. */ bool lex_force_match (struct lexer *, enum token_type) WARN_UNUSED_RESULT; diff --git a/src/language/stats/ctables.c b/src/language/stats/ctables.c index dd0978731b..5359c4cabd 100644 --- a/src/language/stats/ctables.c +++ b/src/language/stats/ctables.c @@ -5710,9 +5710,7 @@ ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict, char *name = ss_xstrdup (lex_tokss (lexer)); lex_get (lexer); - if (!lex_force_match (lexer, T_EQUALS) - || !lex_force_match_id (lexer, "EXPR") - || !lex_force_match (lexer, T_LPAREN)) + if (!lex_force_match_phrase (lexer, "=EXPR(")) { free (name); return false; diff --git a/src/language/stats/npar.c b/src/language/stats/npar.c index 5dabe25a16..713c543d08 100644 --- a/src/language/stats/npar.c +++ b/src/language/stats/npar.c @@ -423,8 +423,7 @@ npar_runs (struct lexer *lexer, struct dataset *ds, return false; } - if (!lex_force_match (lexer, T_RPAREN) - || !lex_force_match (lexer, T_EQUALS)) + if (!lex_force_match_phrase (lexer, ")=")) return false; if (!parse_variables_const_pool (lexer, specs->pool, dataset_dict (ds), @@ -756,8 +755,7 @@ parse_two_sample_related_test (struct lexer *lexer, if (lex_match (lexer, T_LPAREN)) { - if (!lex_force_match_id (lexer, "PAIRED") - || !lex_force_match (lexer, T_RPAREN)) + if (!lex_force_match_phrase (lexer, "PAIRED)")) return false; paired = true; diff --git a/src/language/utilities/host.c b/src/language/utilities/host.c index 45fca6175b..22d25c0eb0 100644 --- a/src/language/utilities/host.c +++ b/src/language/utilities/host.c @@ -293,9 +293,7 @@ cmd_host (struct lexer *lexer, struct dataset *ds UNUSED) return CMD_FAILURE; } - if (!lex_force_match_id (lexer, "COMMAND") - || !lex_force_match (lexer, T_EQUALS) - || !lex_force_match (lexer, T_LBRACK) + if (!lex_force_match_phrase (lexer, "COMMAND=[") || !lex_force_string (lexer)) return CMD_FAILURE; diff --git a/tests/language/data-io/inpt-pgm.at b/tests/language/data-io/inpt-pgm.at index a0054588da..1c61d52fe7 100644 --- a/tests/language/data-io/inpt-pgm.at +++ b/tests/language/data-io/inpt-pgm.at @@ -46,7 +46,7 @@ END INPUT PROGRAM. DESCRIPTIVES x. ]) AT_CHECK([pspp -O format=csv input-program.sps], [1], [dnl -error: DESCRIPTIVES: At end of input: Syntax error expecting BEGIN. +error: DESCRIPTIVES: At end of input: Syntax error expecting `BEGIN DATA'. ]) AT_CLEANUP diff --git a/tests/language/stats/aggregate.at b/tests/language/stats/aggregate.at index f4f0fee665..a76870289b 100644 --- a/tests/language/stats/aggregate.at +++ b/tests/language/stats/aggregate.at @@ -502,7 +502,7 @@ aggregate.sps:16: error: AGGREGATE: Number of source variables (1) does not matc 17 | AGGREGATE /y=pin(x, 2, 1). | ^~~~" -"aggregate.sps:18.1-18.9: error: AGGREGATE: Syntax error expecting BEGIN. +"aggregate.sps:18.1-18.9: error: AGGREGATE: Syntax error expecting `BEGIN DATA'. 18 | AGGREGATE /y=mean(x)**. | ^~~~~~~~~" diff --git a/tests/language/stats/ctables.at b/tests/language/stats/ctables.at index 63a8cdfd28..7aca64e3a9 100644 --- a/tests/language/stats/ctables.at +++ b/tests/language/stats/ctables.at @@ -413,17 +413,17 @@ ctables.sps:42.20: error: CTABLES: Syntax error expecting identifier. 42 | CTABLES /PCOMPUTE &1. | ^ -ctables.sps:43.21-43.22: error: CTABLES: Syntax error expecting `='. +ctables.sps:43.21-43.22: error: CTABLES: Syntax error expecting `=EXPR@{:@'. 43 | CTABLES /PCOMPUTE &k**. | ^~ -ctables.sps:44.22-44.23: error: CTABLES: Syntax error expecting EXPR. +ctables.sps:44.21-44.23: error: CTABLES: Syntax error expecting `=EXPR@{:@'. 44 | CTABLES /PCOMPUTE &k=**. - | ^~ + | ^~~ -ctables.sps:45.26-45.27: error: CTABLES: Syntax error expecting `@{:@'. +ctables.sps:45.21-45.27: error: CTABLES: Syntax error expecting `=EXPR@{:@'. 45 | CTABLES /PCOMPUTE &k=EXPR**. - | ^~ + | ^~~~~~~ ctables.sps:46.28: error: CTABLES: Syntax error expecting `@:}@'. 46 | CTABLES /PCOMPUTE &k=EXPR(1x). diff --git a/tests/language/stats/npar.at b/tests/language/stats/npar.at index 2836b4685b..1857a04606 100644 --- a/tests/language/stats/npar.at +++ b/tests/language/stats/npar.at @@ -2007,7 +2007,7 @@ AT_CHECK([pspp -O format=csv npar.sps], [1], [dnl 6 | NPAR TESTS RUNS (**). | ^~" -"npar.sps:7.23-7.24: error: NPAR TESTS: Syntax error expecting `@:}@'. +"npar.sps:7.23-7.24: error: NPAR TESTS: Syntax error expecting `@:}@='. 7 | NPAR TESTS RUNS (MEAN **). | ^~" @@ -2019,7 +2019,7 @@ AT_CHECK([pspp -O format=csv npar.sps], [1], [dnl 9 | NPAR TESTS CHISQUARE **. | ^~" -"npar.sps:10.24-10.25: error: NPAR TESTS: Syntax error expecting BEGIN. +"npar.sps:10.24-10.25: error: NPAR TESTS: Syntax error expecting `BEGIN DATA'. 10 | NPAR TESTS CHISQUARE x **. | ^~" @@ -2139,13 +2139,13 @@ AT_CHECK([pspp -O format=csv npar.sps], [1], [dnl 38 | NPAR TESTS MCNEMAR x WITH **. | ^~" -"npar.sps:39.30-39.31: error: NPAR TESTS: Syntax error expecting PAIRED. +"npar.sps:39.30-39.31: error: NPAR TESTS: Syntax error expecting `PAIRED@:}@'. 39 | NPAR TESTS MCNEMAR x WITH y (**). | ^~" -"npar.sps:40.37-40.38: error: NPAR TESTS: Syntax error expecting `@:}@'. +"npar.sps:40.30-40.38: error: NPAR TESTS: Syntax error expecting `PAIRED)'. 40 | NPAR TESTS MCNEMAR x WITH y (PAIRED **). - | ^~" + | ^~~~~~~~~" "npar.sps:41.20-41.29: error: NPAR TESTS: PAIRED was specified, but the number of variables preceding WITH (1) does not match the number following (2). 41 | NPAR TESTS MCNEMAR x WITH y z (PAIRED).