From: Ben Pfaff <blp@cs.stanford.edu>
Date: Sun, 25 Sep 2022 23:01:26 +0000 (-0700)
Subject: lexer: New function lex_force_match_phrase().
X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a208fcf0dcdcc445dd7d492e00ebdf3ce23e247f;p=pspp

lexer: New function lex_force_match_phrase().
---

diff --git a/src/language/data-io/data-reader.c b/src/language/data-io/data-reader.c
index d34067e819..6e8c82bf0b 100644
--- a/src/language/data-io/data-reader.c
+++ b/src/language/data-io/data-reader.c
@@ -216,8 +216,7 @@ read_inline_record (struct dfm_reader *r)
       while (lex_token (r->lexer) == T_ENDCMD)
         lex_get (r->lexer);
 
-      if (!lex_force_match_id (r->lexer, "BEGIN")
-          || !lex_force_match_id (r->lexer, "DATA"))
+      if (!lex_force_match_phrase (r->lexer, "BEGIN DATA"))
         return false;
 
       lex_match (r->lexer, T_ENDCMD);
diff --git a/src/language/dictionary/attributes.c b/src/language/dictionary/attributes.c
index 954314ef4f..e646af3847 100644
--- a/src/language/dictionary/attributes.c
+++ b/src/language/dictionary/attributes.c
@@ -59,8 +59,7 @@ cmd_variable_attribute (struct lexer *lexer, struct dataset *ds)
       size_t n_vars, i;
       bool ok;
 
-      if (!lex_force_match_id (lexer, "VARIABLES")
-          || !lex_force_match (lexer, T_EQUALS)
+      if (!lex_force_match_phrase (lexer, "VARIABLES=")
           || !parse_variables (lexer, dict, &vars, &n_vars, PV_NONE))
         return CMD_FAILURE;
 
diff --git a/src/language/dictionary/mrsets.c b/src/language/dictionary/mrsets.c
index ffd97b6800..9c91ba9861 100644
--- a/src/language/dictionary/mrsets.c
+++ b/src/language/dictionary/mrsets.c
@@ -147,8 +147,7 @@ parse_group (struct lexer *lexer, struct dictionary *dict,
         }
       else if (type == MRSET_MD && lex_match_id (lexer, "LABELSOURCE"))
         {
-          if (!lex_force_match (lexer, T_EQUALS)
-              || !lex_force_match_id (lexer, "VARLABEL"))
+          if (!lex_force_match_phrase (lexer, "=VARLABEL"))
             goto error;
 
           labelsource_varlabel = true;
@@ -504,8 +503,7 @@ static bool
 parse_mrset_names (struct lexer *lexer, struct dictionary *dict,
                    struct stringi_set *mrset_names)
 {
-  if (!lex_force_match_id (lexer, "NAME")
-      || !lex_force_match (lexer, T_EQUALS))
+  if (!lex_force_match_phrase (lexer, "NAME="))
     return false;
 
   stringi_set_init (mrset_names);
diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c
index a2a0da9713..35a9afa381 100644
--- a/src/language/lexer/lexer.c
+++ b/src/language/lexer/lexer.c
@@ -85,7 +85,8 @@ static struct msg_point lex_token_start_point (const struct lex_source *,
 static struct msg_point lex_token_end_point (const struct lex_source *,
                                              const struct lex_token *);
 
-static size_t lex_ofs_at_phrase__ (struct lexer *, int ofs, const char *s);
+static bool lex_ofs_at_phrase__ (struct lexer *, int ofs, const char *s,
+                                 size_t *n_matchedp);
 
 /* Source offset of the last byte in TOKEN. */
 static size_t
@@ -618,7 +619,7 @@ lex_sbc_only_once (struct lexer *lexer, const char *sbc)
 
   /* lex_ofs_at_phrase__() handles subcommand names that are keywords, such as
      BY. */
-  if (lex_ofs_at_phrase__ (lexer, ofs, sbc))
+  if (lex_ofs_at_phrase__ (lexer, ofs, sbc, NULL))
     lex_ofs_error (lexer, ofs, ofs,
                    _("Subcommand %s may only be specified once."), sbc);
   else
@@ -1634,22 +1635,31 @@ lex_tokens_match (const struct token *actual, const struct token *expected)
     }
 }
 
-static size_t
-lex_ofs_at_phrase__ (struct lexer *lexer, int ofs, const char *s)
+static bool
+lex_ofs_at_phrase__ (struct lexer *lexer, int ofs, const char *s,
+                     size_t *n_matchedp)
 {
   struct string_lexer slex;
   struct token token;
 
-  size_t i = 0;
+  size_t n_matched = 0;
+  bool all_matched = true;
   string_lexer_init (&slex, s, strlen (s), SEG_MODE_INTERACTIVE, true);
   while (string_lexer_next (&slex, &token))
     {
-      bool match = lex_tokens_match (lex_ofs_token (lexer, ofs + i++), &token);
+      bool match = lex_tokens_match (lex_ofs_token (lexer, ofs + n_matched),
+                                     &token);
       token_uninit (&token);
       if (!match)
-        return 0;
+        {
+          all_matched = false;
+          break;
+        }
+      n_matched++;
     }
-  return i;
+  if (n_matchedp)
+    *n_matchedp = n_matched;
+  return all_matched;
 }
 
 /* If LEXER is positioned at the sequence of tokens that may be parsed from S,
@@ -1661,7 +1671,7 @@ lex_ofs_at_phrase__ (struct lexer *lexer, int ofs, const char *s)
 bool
 lex_at_phrase (struct lexer *lexer, const char *s)
 {
-  return lex_ofs_at_phrase__ (lexer, lex_ofs (lexer), s) > 0;
+  return lex_ofs_at_phrase__ (lexer, lex_ofs (lexer), s, NULL);
 }
 
 /* If LEXER is positioned at the sequence of tokens that may be parsed from S,
@@ -1673,10 +1683,29 @@ lex_at_phrase (struct lexer *lexer, const char *s)
 bool
 lex_match_phrase (struct lexer *lexer, const char *s)
 {
-  size_t n = lex_ofs_at_phrase__ (lexer, lex_ofs (lexer), s);
-  if (n > 0)
-    lex_get_n (lexer, n);
-  return n > 0;
+  size_t n_matched;
+  if (!lex_ofs_at_phrase__ (lexer, lex_ofs (lexer), s, &n_matched))
+    return false;
+  lex_get_n (lexer, n_matched);
+  return true;
+}
+
+/* If LEXER is positioned at the sequence of tokens that may be parsed from S,
+   skips it and returns true.  Otherwise, issues an error and returns false.
+
+   S may consist of an arbitrary sequence of tokens, e.g. "KRUSKAL-WALLIS",
+   "2SLS", or "END INPUT PROGRAM".  Identifiers may be abbreviated to their
+   first three letters. */
+bool
+lex_force_match_phrase (struct lexer *lexer, const char *s)
+{
+  size_t n_matched;
+  bool ok = lex_ofs_at_phrase__ (lexer, lex_ofs (lexer), s, &n_matched);
+  if (ok)
+    lex_get_n (lexer, n_matched);
+  else
+    lex_next_error (lexer, 0, n_matched, _("Syntax error expecting `%s'."), s);
+  return ok;
 }
 
 /* Returns the 1-based line number of the source text at the byte OFFSET in
diff --git a/src/language/lexer/lexer.h b/src/language/lexer/lexer.h
index b172ac610f..ae40a87a04 100644
--- a/src/language/lexer/lexer.h
+++ b/src/language/lexer/lexer.h
@@ -127,6 +127,7 @@ bool lex_match_id_n (struct lexer *, const char *, size_t n);
 bool lex_match_int (struct lexer *, int);
 bool lex_at_phrase (struct lexer *, const char *s);
 bool lex_match_phrase (struct lexer *, const char *s);
+bool lex_force_match_phrase (struct lexer *, const char *s);
 
 /* Forcible matching functions. */
 bool lex_force_match (struct lexer *, enum token_type) WARN_UNUSED_RESULT;
diff --git a/src/language/stats/ctables.c b/src/language/stats/ctables.c
index dd0978731b..5359c4cabd 100644
--- a/src/language/stats/ctables.c
+++ b/src/language/stats/ctables.c
@@ -5710,9 +5710,7 @@ ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
   char *name = ss_xstrdup (lex_tokss (lexer));
 
   lex_get (lexer);
-  if (!lex_force_match (lexer, T_EQUALS)
-      || !lex_force_match_id (lexer, "EXPR")
-      || !lex_force_match (lexer, T_LPAREN))
+  if (!lex_force_match_phrase (lexer, "=EXPR("))
     {
       free (name);
       return false;
diff --git a/src/language/stats/npar.c b/src/language/stats/npar.c
index 5dabe25a16..713c543d08 100644
--- a/src/language/stats/npar.c
+++ b/src/language/stats/npar.c
@@ -423,8 +423,7 @@ npar_runs (struct lexer *lexer, struct dataset *ds,
       return false;
     }
 
-  if (!lex_force_match (lexer, T_RPAREN)
-      || !lex_force_match (lexer, T_EQUALS))
+  if (!lex_force_match_phrase (lexer, ")="))
     return false;
 
   if (!parse_variables_const_pool (lexer, specs->pool, dataset_dict (ds),
@@ -756,8 +755,7 @@ parse_two_sample_related_test (struct lexer *lexer,
 
       if (lex_match (lexer, T_LPAREN))
         {
-          if (!lex_force_match_id (lexer, "PAIRED")
-              || !lex_force_match (lexer, T_RPAREN))
+          if (!lex_force_match_phrase (lexer, "PAIRED)"))
             return false;
           paired = true;
 
diff --git a/src/language/utilities/host.c b/src/language/utilities/host.c
index 45fca6175b..22d25c0eb0 100644
--- a/src/language/utilities/host.c
+++ b/src/language/utilities/host.c
@@ -293,9 +293,7 @@ cmd_host (struct lexer *lexer, struct dataset *ds UNUSED)
       return CMD_FAILURE;
     }
 
-  if (!lex_force_match_id (lexer, "COMMAND")
-      || !lex_force_match (lexer, T_EQUALS)
-      || !lex_force_match (lexer, T_LBRACK)
+  if (!lex_force_match_phrase (lexer, "COMMAND=[")
       || !lex_force_string (lexer))
     return CMD_FAILURE;
 
diff --git a/tests/language/data-io/inpt-pgm.at b/tests/language/data-io/inpt-pgm.at
index a0054588da..1c61d52fe7 100644
--- a/tests/language/data-io/inpt-pgm.at
+++ b/tests/language/data-io/inpt-pgm.at
@@ -46,7 +46,7 @@ END INPUT PROGRAM.
 DESCRIPTIVES x.
 ])
 AT_CHECK([pspp -O format=csv input-program.sps], [1], [dnl
-error: DESCRIPTIVES: At end of input: Syntax error expecting BEGIN.
+error: DESCRIPTIVES: At end of input: Syntax error expecting `BEGIN DATA'.
 ])
 AT_CLEANUP
 
diff --git a/tests/language/stats/aggregate.at b/tests/language/stats/aggregate.at
index f4f0fee665..a76870289b 100644
--- a/tests/language/stats/aggregate.at
+++ b/tests/language/stats/aggregate.at
@@ -502,7 +502,7 @@ aggregate.sps:16: error: AGGREGATE: Number of source variables (1) does not matc
    17 | AGGREGATE /y=pin(x, 2, 1).
       |                     ^~~~"
 
-"aggregate.sps:18.1-18.9: error: AGGREGATE: Syntax error expecting BEGIN.
+"aggregate.sps:18.1-18.9: error: AGGREGATE: Syntax error expecting `BEGIN DATA'.
    18 | AGGREGATE /y=mean(x)**.
       | ^~~~~~~~~"
 
diff --git a/tests/language/stats/ctables.at b/tests/language/stats/ctables.at
index 63a8cdfd28..7aca64e3a9 100644
--- a/tests/language/stats/ctables.at
+++ b/tests/language/stats/ctables.at
@@ -413,17 +413,17 @@ ctables.sps:42.20: error: CTABLES: Syntax error expecting identifier.
    42 | CTABLES /PCOMPUTE &1.
       |                    ^
 
-ctables.sps:43.21-43.22: error: CTABLES: Syntax error expecting `='.
+ctables.sps:43.21-43.22: error: CTABLES: Syntax error expecting `=EXPR@{:@'.
    43 | CTABLES /PCOMPUTE &k**.
       |                     ^~
 
-ctables.sps:44.22-44.23: error: CTABLES: Syntax error expecting EXPR.
+ctables.sps:44.21-44.23: error: CTABLES: Syntax error expecting `=EXPR@{:@'.
    44 | CTABLES /PCOMPUTE &k=**.
-      |                      ^~
+      |                     ^~~
 
-ctables.sps:45.26-45.27: error: CTABLES: Syntax error expecting `@{:@'.
+ctables.sps:45.21-45.27: error: CTABLES: Syntax error expecting `=EXPR@{:@'.
    45 | CTABLES /PCOMPUTE &k=EXPR**.
-      |                          ^~
+      |                     ^~~~~~~
 
 ctables.sps:46.28: error: CTABLES: Syntax error expecting `@:}@'.
    46 | CTABLES /PCOMPUTE &k=EXPR(1x).
diff --git a/tests/language/stats/npar.at b/tests/language/stats/npar.at
index 2836b4685b..1857a04606 100644
--- a/tests/language/stats/npar.at
+++ b/tests/language/stats/npar.at
@@ -2007,7 +2007,7 @@ AT_CHECK([pspp -O format=csv npar.sps], [1], [dnl
     6 | NPAR TESTS RUNS (**).
       |                  ^~"
 
-"npar.sps:7.23-7.24: error: NPAR TESTS: Syntax error expecting `@:}@'.
+"npar.sps:7.23-7.24: error: NPAR TESTS: Syntax error expecting `@:}@='.
     7 | NPAR TESTS RUNS (MEAN **).
       |                       ^~"
 
@@ -2019,7 +2019,7 @@ AT_CHECK([pspp -O format=csv npar.sps], [1], [dnl
     9 | NPAR TESTS CHISQUARE **.
       |                      ^~"
 
-"npar.sps:10.24-10.25: error: NPAR TESTS: Syntax error expecting BEGIN.
+"npar.sps:10.24-10.25: error: NPAR TESTS: Syntax error expecting `BEGIN DATA'.
    10 | NPAR TESTS CHISQUARE x **.
       |                        ^~"
 
@@ -2139,13 +2139,13 @@ AT_CHECK([pspp -O format=csv npar.sps], [1], [dnl
    38 | NPAR TESTS MCNEMAR x WITH **.
       |                           ^~"
 
-"npar.sps:39.30-39.31: error: NPAR TESTS: Syntax error expecting PAIRED.
+"npar.sps:39.30-39.31: error: NPAR TESTS: Syntax error expecting `PAIRED@:}@'.
    39 | NPAR TESTS MCNEMAR x WITH y (**).
       |                              ^~"
 
-"npar.sps:40.37-40.38: error: NPAR TESTS: Syntax error expecting `@:}@'.
+"npar.sps:40.30-40.38: error: NPAR TESTS: Syntax error expecting `PAIRED)'.
    40 | NPAR TESTS MCNEMAR x WITH y (PAIRED **).
-      |                                     ^~"
+      |                              ^~~~~~~~~"
 
 "npar.sps:41.20-41.29: error: NPAR TESTS: PAIRED was specified, but the number of variables preceding WITH (1) does not match the number following (2).
    41 | NPAR TESTS MCNEMAR x WITH y z (PAIRED).