From: Ben Pfaff <blp@cs.stanford.edu>
Date: Mon, 5 Jul 2021 22:15:45 +0000 (-0700)
Subject: scan: Get rid of scan token types in favor of new scan result state.
X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fheads%2Fdev11;p=pspp

scan: Get rid of scan token types in favor of new scan result state.
---

diff --git a/src/language/control/define.c b/src/language/control/define.c
index 3a7f535c86..23a58fe8f9 100644
--- a/src/language/control/define.c
+++ b/src/language/control/define.c
@@ -65,8 +65,8 @@ parse_quoted_token (struct lexer *lexer, struct token *token)
   struct string_lexer slex;
   string_lexer_init (&slex, s.string, s.length, SEG_MODE_INTERACTIVE, true);
   struct token another_token = { .type = T_STOP };
-  if (!string_lexer_next (&slex, token)
-      || string_lexer_next (&slex, &another_token))
+  if (string_lexer_next (&slex, token) != SLR_TOKEN
+      || string_lexer_next (&slex, &another_token) != SLR_END)
     {
       token_uninit (token);
       token_uninit (&another_token);
diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c
index 9753024cc3..f8b5a84082 100644
--- a/src/language/lexer/lexer.c
+++ b/src/language/lexer/lexer.c
@@ -1081,13 +1081,12 @@ lex_match_phrase (struct lexer *lexer, const char *s)
   i = 0;
   string_lexer_init (&slex, s, strlen (s), SEG_MODE_INTERACTIVE, true);
   while (string_lexer_next (&slex, &token))
-    if (token.type != SCAN_SKIP)
-      {
-        bool match = lex_tokens_match (lex_next (lexer, i++), &token);
-        token_uninit (&token);
-        if (!match)
-          return false;
-      }
+    {
+      bool match = lex_tokens_match (lex_next (lexer, i++), &token);
+      token_uninit (&token);
+      if (!match)
+        return false;
+    }
 
   while (i-- > 0)
     lex_get (lexer);
@@ -1667,6 +1666,7 @@ lex_source_try_get__ (struct lex_source *src)
 
   /* Extract segments and pass them through the scanner until we obtain a
      token. */
+  enum scan_result result;
   for (;;)
     {
       /* Extract a segment. */
@@ -1693,9 +1693,8 @@ lex_source_try_get__ (struct lex_source *src)
         }
 
       /* Pass the segment into the scanner and try to get a token out. */
-      enum scan_result result = scanner_push (&scanner, type,
-                                              ss_buffer (segment, seg_len),
-                                              &token->token);
+      result = scanner_push (&scanner, type, ss_buffer (segment, seg_len),
+                             &token->token);
       if (result == SCAN_SAVE)
         saved = state;
       else if (result == SCAN_BACK)
@@ -1703,7 +1702,9 @@ lex_source_try_get__ (struct lex_source *src)
           state = saved;
           break;
         }
-      else if (result == SCAN_DONE)
+      else if (result == SCAN_DONE
+               || result == SCAN_EMPTY
+               || result == SCAN_ERROR)
         break;
     }
 
@@ -1757,37 +1758,24 @@ lex_source_try_get__ (struct lex_source *src)
   src->line_pos = state.line_pos;
   src->n_newlines += state.newlines;
 
-  switch (token->token.type)
+  if (result == SCAN_EMPTY)
+    {
+      lex_source_pop_front (src);
+      return false;
+    }
+  else if (result == SCAN_ERROR)
+    {
+      lex_get_error (src, token->token.string.string);
+      return false;
+    }
+  else if (token->token.type == T_STOP)
     {
-    default:
-      return true;
-
-    case T_STOP:
       token->token.type = T_ENDCMD;
       src->eof = true;
       return true;
-
-    case SCAN_BAD_HEX_LENGTH:
-    case SCAN_BAD_HEX_DIGIT:
-    case SCAN_BAD_UNICODE_DIGIT:
-    case SCAN_BAD_UNICODE_LENGTH:
-    case SCAN_BAD_UNICODE_CODE_POINT:
-    case SCAN_EXPECTED_QUOTE:
-    case SCAN_EXPECTED_EXPONENT:
-    case SCAN_UNEXPECTED_CHAR:
-     {
-      char *msg = scan_token_to_error (&token->token);
-      lex_get_error (src, msg);
-      free (msg);
-      return false;
-     }
-
-    case SCAN_SKIP:
-      lex_source_pop_front (src);
-      return false;
     }
-
-  NOT_REACHED ();
+  else
+    return true;
 }
 
 /* Attempts to add a new token at the front of SRC.  Returns true if
diff --git a/src/language/lexer/macro.c b/src/language/lexer/macro.c
index 6b5d624301..b5cbf2dfc6 100644
--- a/src/language/lexer/macro.c
+++ b/src/language/lexer/macro.c
@@ -248,6 +248,7 @@ macro_tokens_from_string__ (struct macro_tokens *mts, const struct substring src
       struct scanner scanner;
       scanner_init (&scanner, token);
 
+      enum scan_result result;
       for (;;)
         {
           enum segment_type type;
@@ -258,37 +259,40 @@ macro_tokens_from_string__ (struct macro_tokens *mts, const struct substring src
           struct substring segment = ss_head (state.body, seg_len);
           ss_advance (&state.body, seg_len);
 
-          enum scan_result result = scanner_push (&scanner, type, segment, token);
+          result = scanner_push (&scanner, type, segment, token);
           if (result == SCAN_SAVE)
             saved = state;
-          else if (result == SCAN_BACK)
-            {
-              state = saved;
-              break;
-            }
-          else if (result == SCAN_DONE)
+          else if (result != SCAN_MORE)
             break;
         }
 
-      /* We have a token in 'token'. */
-      mt.syntax.length = state.body.string - mt.syntax.string;
-      if (is_scan_type (token->type))
+
+      switch (result)
         {
-          if (token->type != SCAN_SKIP)
-            {
-              char *s = scan_token_to_error (token);
-              if (stack)
-                {
-                  mt.token.type = T_STRING;
-                  macro_error (stack, &mt, "%s", s);
-                }
-              else
-                msg (SE, "%s", s);
-              free (s);
-            }
+        case SCAN_BACK:
+          state = saved;
+          /* Fall through. */
+        case SCAN_DONE:
+          mt.syntax.length = state.body.string - mt.syntax.string;
+          macro_tokens_add (mts, &mt);
+          break;
+
+        case SCAN_EMPTY:
+          break;
+
+        case SCAN_ERROR:
+          mt.syntax.length = state.body.string - mt.syntax.string;
+          if (stack)
+            macro_error (stack, &mt, "%s", token->string.string);
+          else
+            msg (SE, "%s", token->string.string);
+          break;
+
+        case SCAN_MORE:
+        case SCAN_SAVE:
+          NOT_REACHED ();
         }
-      else
-        macro_tokens_add (mts, &mt);
+
       token_uninit (token);
     }
 }
@@ -1016,17 +1020,15 @@ unquote_string (const char *s, enum segmenter_mode segmenter_mode,
   string_lexer_init (&slex, s, strlen (s), segmenter_mode, true);
 
   struct token token1;
-  if (!string_lexer_next (&slex, &token1))
-    return false;
-
-  if (token1.type != T_STRING)
+  if (string_lexer_next (&slex, &token1) != SLR_TOKEN
+      || token1.type != T_STRING)
     {
       token_uninit (&token1);
       return false;
     }
 
   struct token token2;
-  if (string_lexer_next (&slex, &token2))
+  if (string_lexer_next (&slex, &token2) != SLR_END)
     {
       token_uninit (&token1);
       token_uninit (&token2);
diff --git a/src/language/lexer/scan.c b/src/language/lexer/scan.c
index 3b9e3c5a22..a2f821c05d 100644
--- a/src/language/lexer/scan.c
+++ b/src/language/lexer/scan.c
@@ -71,7 +71,7 @@ digit_value (int c)
     }
 }
 
-static bool
+static char *
 scan_quoted_string__ (struct substring s, struct token *token)
 {
   int quote;
@@ -97,118 +97,89 @@ scan_quoted_string__ (struct substring s, struct token *token)
   memcpy (ss_end (token->string), s.string, ss_length (s));
   token->string.length += ss_length (s);
 
-  return true;
+  return NULL;
 }
 
-static bool
+static char *
 scan_hex_string__ (struct substring s, struct token *token)
 {
-  uint8_t *dst;
-  size_t i;
-
   /* Trim X' from front and ' from back. */
   s.string += 2;
   s.length -= 3;
 
   if (s.length % 2 != 0)
-    {
-      token->type = SCAN_BAD_HEX_LENGTH;
-      token->number = s.length;
-      return false;
-    }
+    return xasprintf (_("String of hex digits has %zu characters, which "
+                        "is not a multiple of 2."), s.length);
 
   ss_realloc (&token->string, token->string.length + s.length / 2 + 1);
-  dst = CHAR_CAST (uint8_t *, ss_end (token->string));
+  uint8_t *dst = CHAR_CAST (uint8_t *, ss_end (token->string));
   token->string.length += s.length / 2;
-  for (i = 0; i < s.length; i += 2)
+  for (size_t i = 0; i < s.length; i += 2)
     {
       int hi = digit_value (s.string[i]);
       int lo = digit_value (s.string[i + 1]);
 
       if (hi >= 16 || lo >= 16)
-        {
-          token->type = SCAN_BAD_HEX_DIGIT;
-          token->number = s.string[hi >= 16 ? i : i + 1];
-          return false;
-        }
+        return xasprintf (_("`%c' is not a valid hex digit."),
+                          s.string[hi >= 16 ? i : i + 1]);
 
       *dst++ = hi * 16 + lo;
     }
 
-  return true;
+  return NULL;
 }
 
-static bool
+static char *
 scan_unicode_string__ (struct substring s, struct token *token)
 {
-  uint8_t *dst;
-  ucs4_t uc;
-  size_t i;
-
   /* Trim U' from front and ' from back. */
   s.string += 2;
   s.length -= 3;
 
   if (s.length < 1 || s.length > 8)
-    {
-      token->type = SCAN_BAD_UNICODE_LENGTH;
-      token->number = s.length;
-      return 0;
-    }
+    return xasprintf (_("Unicode string contains %zu bytes, which is "
+                        "not in the valid range of 1 to 8 bytes."),
+                      s.length);
 
   ss_realloc (&token->string, token->string.length + 4 + 1);
 
-  uc = 0;
-  for (i = 0; i < s.length; i++)
+  ucs4_t uc = 0;
+  for (size_t i = 0; i < s.length; i++)
     {
       int digit = digit_value (s.string[i]);
       if (digit >= 16)
-        {
-          token->type = SCAN_BAD_UNICODE_DIGIT;
-          token->number = s.string[i];
-          return 0;
-        }
+        return xasprintf (_("`%c' is not a valid hex digit."),
+                          s.string[i]);
       uc = uc * 16 + digit;
     }
 
   if ((uc >= 0xd800 && uc < 0xe000) || uc > 0x10ffff)
-    {
-      token->type = SCAN_BAD_UNICODE_CODE_POINT;
-      token->number = uc;
-      return 0;
-    }
+    return xasprintf (_("U+%04llX is not a valid Unicode code point."),
+                      (long long) uc);
 
-  dst = CHAR_CAST (uint8_t *, ss_end (token->string));
+  uint8_t *dst = CHAR_CAST (uint8_t *, ss_end (token->string));
   token->string.length += u8_uctomb (dst, uc, 4);
 
-  return true;
+  return NULL;
+}
+
+static enum scan_result
+scan_error__ (struct token *token, char *error)
+{
+  ss_dealloc (&token->string);
+  token->type = T_STRING;
+  token->string = ss_cstr (error);
+  return SCAN_ERROR;
 }
 
 static enum scan_result
 scan_string_segment__ (struct scanner *scanner, enum segment_type type,
                        struct substring s, struct token *token)
 {
-  bool ok;
-
-  switch (type)
-    {
-    case SEG_QUOTED_STRING:
-      ok = scan_quoted_string__ (s, token);
-      break;
-
-    case SEG_HEX_STRING:
-      ok = scan_hex_string__ (s, token);
-      break;
-
-    case SEG_UNICODE_STRING:
-      ok = scan_unicode_string__ (s, token);
-      break;
-
-    default:
-      NOT_REACHED ();
-    }
-
-  if (ok)
+  char *error = (type == SEG_QUOTED_STRING ? scan_quoted_string__ (s, token)
+                 : type == SEG_HEX_STRING ? scan_hex_string__ (s, token)
+                 : scan_unicode_string__ (s, token));
+  if (!error)
     {
       token->type = T_STRING;
       token->string.string[token->string.length] = '\0';
@@ -217,14 +188,7 @@ scan_string_segment__ (struct scanner *scanner, enum segment_type type,
       return SCAN_SAVE;
     }
   else
-    {
-      /* The function we called above should have filled in token->type and
-         token->number properly to describe the error. */
-      ss_dealloc (&token->string);
-      token->string = ss_empty ();
-      return SCAN_DONE;
-    }
-
+    return scan_error__ (token, error);
 }
 
 static enum scan_result
@@ -397,76 +361,11 @@ static enum scan_result
 scan_unexpected_char (const struct substring *s, struct token *token)
 {
   ucs4_t uc;
-
-  token->type = SCAN_UNEXPECTED_CHAR;
   u8_mbtouc (&uc, CHAR_CAST (const uint8_t *, s->string), s->length);
-  token->number = uc;
-
-  return SCAN_DONE;
-}
-
-const char *
-scan_type_to_string (enum scan_type type)
-{
-  switch (type)
-    {
-#define SCAN_TYPE(NAME) case SCAN_##NAME: return #NAME;
-      SCAN_TYPES
-#undef SCAN_TYPE
-
-    default:
-      return token_type_to_name ((enum token_type) type);
-    }
-}
-
-bool
-is_scan_type (enum scan_type type)
-{
-  return type > SCAN_FIRST && type < SCAN_LAST;
-}
-
-/* If TOKEN has the type of a scan error (a subset of those identified by
-   is_scan_type()), returns an appropriate error message.  Otherwise, returns
-   NULL. */
-char *
-scan_token_to_error (const struct token *token)
-{
-  switch (token->type)
-    {
-    case SCAN_BAD_HEX_LENGTH:
-      return xasprintf (_("String of hex digits has %d characters, which "
-                          "is not a multiple of 2."), (int) token->number);
-
-    case SCAN_BAD_HEX_DIGIT:
-    case SCAN_BAD_UNICODE_DIGIT:
-      return xasprintf (_("`%c' is not a valid hex digit."),
-                        (int) token->number);
-
-    case SCAN_BAD_UNICODE_LENGTH:
-      return xasprintf (_("Unicode string contains %d bytes, which is "
-                          "not in the valid range of 1 to 8 bytes."),
-                        (int) token->number);
-
-    case SCAN_BAD_UNICODE_CODE_POINT:
-      return xasprintf (_("U+%04X is not a valid Unicode code point."),
-                        (int) token->number);
-
-    case SCAN_EXPECTED_QUOTE:
-      return xasprintf (_("Unterminated string constant."));
-
-    case SCAN_EXPECTED_EXPONENT:
-      return xasprintf (_("Missing exponent following `%s'."),
-                        token->string.string);
-
-    case SCAN_UNEXPECTED_CHAR:
-     {
-      char c_name[16];
-      return xasprintf (_("Bad character %s in input."),
-                        uc_name (token->number, c_name));
-     }
-    }
 
-  return NULL;
+  char c_name[16];
+  return scan_error__ (token, xasprintf (_("Bad character %s in input."),
+                                         uc_name (uc, c_name)));
 }
 
 static enum scan_result
@@ -527,8 +426,7 @@ scan_start__ (struct scanner *scanner, enum segment_type type,
     case SEG_COMMENT:
     case SEG_NEWLINE:
     case SEG_COMMENT_COMMAND:
-      token->type = SCAN_SKIP;
-      return SCAN_DONE;
+      return SCAN_EMPTY;
 
     case SEG_START_DOCUMENT:
       token->type = T_ID;
@@ -546,13 +444,13 @@ scan_start__ (struct scanner *scanner, enum segment_type type,
       return SCAN_DONE;
 
     case SEG_EXPECTED_QUOTE:
-      token->type = SCAN_EXPECTED_QUOTE;
-      return SCAN_DONE;
+      return scan_error__ (token,
+                           xasprintf (_("Unterminated string constant.")));
 
     case SEG_EXPECTED_EXPONENT:
-      token->type = SCAN_EXPECTED_EXPONENT;
-      ss_alloc_substring (&token->string, s);
-      return SCAN_DONE;
+      return scan_error__ (token,
+                           xasprintf (_("Missing exponent following `%.*s'."),
+                                      (int) s.length, s.string));
 
     case SEG_UNEXPECTED_CHAR:
       return scan_unexpected_char (&s, token);
@@ -626,6 +524,14 @@ scanner_init (struct scanner *scanner, struct token *token)
        the segments up to and including the segment for which SCAN_SAVE was
        most recently returned.  Segments following that one should be passed to
        the next scanner to be initialized.
+
+     - SCAN_EMPTY: This is similar to SCAN_DONE, but there's no token because
+       the scanner consumed white space or comments or other syntax that
+       doesn't produce a token.
+
+     - SCAN_ERROR: This is simila to SCAN_DONE, but the token is a T_STRING
+       that describes some lexical error.  The caller should report the error
+       and discard the token.
 */
 enum scan_result
 scanner_push (struct scanner *scanner, enum segment_type type,
@@ -664,14 +570,14 @@ string_lexer_init (struct string_lexer *slex, const char *input, size_t length,
 }
 
 /*  */
-bool
+enum string_lexer_result
 string_lexer_next (struct string_lexer *slex, struct token *token)
 {
   struct segmenter saved_segmenter;
   size_t saved_offset = 0;
 
   struct scanner scanner;
-
+next:
   scanner_init (&scanner, token);
   for (;;)
     {
@@ -691,7 +597,7 @@ string_lexer_next (struct string_lexer *slex, struct token *token)
           slex->offset = saved_offset;
           /* Fall through. */
         case SCAN_DONE:
-          return token->type != T_STOP;
+          return token->type == T_STOP ? SLR_END : SLR_TOKEN;
 
         case SCAN_MORE:
           break;
@@ -700,6 +606,12 @@ string_lexer_next (struct string_lexer *slex, struct token *token)
           saved_segmenter = slex->segmenter;
           saved_offset = slex->offset;
           break;
+
+        case SCAN_ERROR:
+          return SLR_ERROR;
+
+        case SCAN_EMPTY:
+          goto next;
         }
     }
 }
diff --git a/src/language/lexer/scan.h b/src/language/lexer/scan.h
index 0dde273804..bd76e3ffda 100644
--- a/src/language/lexer/scan.h
+++ b/src/language/lexer/scan.h
@@ -35,39 +35,6 @@ struct token;
    types.
 */
 
-#define SCAN_TYPES                              \
-    SCAN_TYPE(BAD_HEX_LENGTH)                   \
-    SCAN_TYPE(BAD_HEX_DIGIT)                    \
-                                                \
-    SCAN_TYPE(BAD_UNICODE_LENGTH)               \
-    SCAN_TYPE(BAD_UNICODE_DIGIT)                \
-    SCAN_TYPE(BAD_UNICODE_CODE_POINT)           \
-                                                \
-    SCAN_TYPE(EXPECTED_QUOTE)                   \
-    SCAN_TYPE(EXPECTED_EXPONENT)                \
-    SCAN_TYPE(UNEXPECTED_CHAR)                  \
-                                                \
-    SCAN_TYPE(SKIP)
-
-/* Types of scan tokens.
-
-   Scan token types are a superset of enum token_type.  Only the additional
-   scan token types are defined here, so see the definition of enum token_type
-   for the others. */
-enum scan_type
-  {
-#define SCAN_TYPE(TYPE) SCAN_##TYPE,
-    SCAN_FIRST = 255,
-    SCAN_TYPES
-    SCAN_LAST
-#undef SCAN_TYPE
-  };
-
-const char *scan_type_to_string (enum scan_type);
-bool is_scan_type (enum scan_type);
-
-char *scan_token_to_error (const struct token *);
-
 /* A scanner.  Opaque. */
 struct scanner
   {
@@ -80,6 +47,8 @@ enum scan_result
   {
     /* Complete token. */
     SCAN_DONE,                  /* Token successfully scanned. */
+    SCAN_EMPTY,                 /* This segment does not produce any token. */
+    SCAN_ERROR,                 /* This segment yields an error message. */
     SCAN_MORE,                  /* More segments needed to scan token. */
 
     /* Incomplete token. */
@@ -101,8 +70,16 @@ struct string_lexer
     struct segmenter segmenter;
   };
 
+enum string_lexer_result
+  {
+    SLR_END,
+    SLR_TOKEN,
+    SLR_ERROR
+  };
+
 void string_lexer_init (struct string_lexer *, const char *input,
                         size_t length, enum segmenter_mode, bool is_snippet);
-bool string_lexer_next (struct string_lexer *, struct token *);
+enum string_lexer_result string_lexer_next (struct string_lexer *,
+                                            struct token *);
 
 #endif /* scan.h */
diff --git a/src/language/lexer/token.h b/src/language/lexer/token.h
index dca1452c6a..8ec28f3714 100644
--- a/src/language/lexer/token.h
+++ b/src/language/lexer/token.h
@@ -23,13 +23,10 @@
 #include "libpspp/str.h"
 #include "data/identifier.h"
 
-/* A PSPP syntax token.
-
-   The 'type' member is used by the scanner (see scan.h) for SCAN_* values as
-   well, which is why it is not declared as type "enum token_type". */
+/* A PSPP syntax token. */
 struct token
   {
-    int type;                   /* Usually a "enum token_type" value. */
+    enum token_type type;
     double number;
     struct substring string;
   };
diff --git a/tests/language/lexer/scan-test.c b/tests/language/lexer/scan-test.c
index 2a77e127ac..53163bcad6 100644
--- a/tests/language/lexer/scan-test.c
+++ b/tests/language/lexer/scan-test.c
@@ -54,7 +54,6 @@ main (int argc, char *argv[])
   char *input;
 
   struct string_lexer slex;
-  bool more;
 
   set_program_name (argv[0]);
   file_name = parse_options (argc, argv);
@@ -74,13 +73,13 @@ main (int argc, char *argv[])
     }
 
   string_lexer_init (&slex, input, length, mode, false);
+  enum string_lexer_result result;
   do
     {
       struct token token;
+      result = string_lexer_next (&slex, &token);
 
-      more = string_lexer_next (&slex, &token);
-
-      printf ("%s", scan_type_to_string (token.type));
+      printf ("%s", result == SLR_ERROR ? "error" : token_type_to_name (token.type));
       if (token.number != 0.0)
         {
           double x = token.number;
@@ -96,7 +95,7 @@ main (int argc, char *argv[])
 
       token_uninit (&token);
     }
-  while (more);
+  while (result != SLR_END);
 
   free (input);
 
diff --git a/tests/language/lexer/scan.at b/tests/language/lexer/scan.at
index 146b891e1c..56711cbac0 100644
--- a/tests/language/lexer/scan.at
+++ b/tests/language/lexer/scan.at
@@ -34,52 +34,29 @@ WXYZ. /* unterminated end of line comment
 ])
 AT_DATA([expout-base], [dnl
 ID "a"
-SKIP
 ID "aB"
-SKIP
 ID "i5"
-SKIP
 ID "$x"
-SKIP
 ID "@efg"
-SKIP
 ID "@@."
-SKIP
 MACRO_ID "!abcd"
-SKIP
 ID "#.#"
-SKIP
 MACRO_PUNCT "."
 ID "x"
-SKIP
 MACRO_PUNCT "_"
 ID "z"
 ENDCMD
-SKIP
 ID "abcd."
-SKIP
 ID "abcd"
 ENDCMD
-SKIP
 ID "QRSTUV"
 ENDCMD
-SKIP
-SKIP
 ID "QrStUv"
 ENDCMD
-SKIP
-SKIP
-SKIP
 ID "WXYZ"
 ENDCMD
-SKIP
-SKIP
-SKIP
-UNEXPECTED_CHAR 65533
+error "Bad character U+FFFD in input."
 ENDCMD
-SKIP
-SKIP
--SKIP
 STOP
 ])
 PSPP_CHECK_SCAN([-i])
@@ -95,88 +72,47 @@ and. with.
 ])
 AT_DATA([expout-base], [dnl
 AND
-SKIP
 OR
-SKIP
 NOT
-SKIP
 EQ
-SKIP
 GE
-SKIP
 GT
-SKIP
 LE
-SKIP
 LT
-SKIP
 NE
-SKIP
 ALL
-SKIP
 BY
-SKIP
 TO
-SKIP
 WITH
-SKIP
 AND
-SKIP
 OR
-SKIP
 NOT
-SKIP
 EQ
-SKIP
 GE
-SKIP
 GT
-SKIP
 LE
-SKIP
 LT
-SKIP
 NE
-SKIP
 ALL
-SKIP
 BY
-SKIP
 TO
-SKIP
 WITH
-SKIP
 ID "andx"
-SKIP
 ID "orx"
-SKIP
 ID "notx"
-SKIP
 ID "eqx"
-SKIP
 ID "gex"
-SKIP
 ID "gtx"
-SKIP
 ID "lex"
-SKIP
 ID "ltx"
-SKIP
 ID "nex"
-SKIP
 ID "allx"
-SKIP
 ID "byx"
-SKIP
 ID "tox"
-SKIP
 ID "withx"
-SKIP
 ID "and."
-SKIP
 WITH
 ENDCMD
--SKIP
 STOP
 ])
 PSPP_CHECK_SCAN([-i])
@@ -191,45 +127,25 @@ AT_DATA([input], [dnl
 ])
 AT_DATA([expout-base], [dnl
 NOT
-SKIP
 AND
-SKIP
 OR
-SKIP
 EQUALS
-SKIP
 GE
-SKIP
 GT
-SKIP
 LE
-SKIP
 LT
-SKIP
 NE
-SKIP
 NE
-SKIP
 LPAREN
-SKIP
 RPAREN
-SKIP
 COMMA
-SKIP
 DASH
-SKIP
 PLUS
-SKIP
 ASTERISK
-SKIP
 SLASH
-SKIP
 LBRACK
-SKIP
 RBRACK
-SKIP
 EXP
-SKIP
 NOT
 AND
 OR
@@ -250,25 +166,15 @@ SLASH
 LBRACK
 RBRACK
 EXP
-SKIP
 MACRO_PUNCT "%"
-SKIP
 MACRO_PUNCT ":"
-SKIP
 MACRO_PUNCT ";"
-SKIP
 MACRO_PUNCT "?"
-SKIP
 MACRO_PUNCT "_"
-SKIP
 MACRO_PUNCT "`"
-SKIP
 MACRO_PUNCT "{"
-SKIP
 MACRO_PUNCT "}"
-SKIP
 NOT
--SKIP
 STOP
 ])
 PSPP_CHECK_SCAN([-i])
@@ -287,73 +193,39 @@ AT_DATA([input], [dnl
 ])
 AT_DATA([expout-base], [dnl
 POS_NUM
-SKIP
 POS_NUM 1
-SKIP
 POS_NUM 1
-SKIP
 POS_NUM 1
-SKIP
 POS_NUM 1
 ENDCMD
-SKIP
 POS_NUM 123
 ENDCMD
-SKIP
-SKIP
-SKIP
-SKIP
-SKIP
 ENDCMD
 POS_NUM 1
-SKIP
 POS_NUM 0.1
-SKIP
 POS_NUM 0.1
-SKIP
 POS_NUM 0.1
-SKIP
 POS_NUM 50
-SKIP
 POS_NUM 0.6
-SKIP
 POS_NUM 70
-SKIP
 POS_NUM 60
-SKIP
 POS_NUM 0.006
-SKIP
 ENDCMD
 POS_NUM 30
-SKIP
 POS_NUM 0.04
-SKIP
 POS_NUM 5
-SKIP
 POS_NUM 6
-SKIP
 POS_NUM 0.0007
-SKIP
 POS_NUM 12.3
-SKIP
 POS_NUM 4.56
-SKIP
 POS_NUM 789
-SKIP
 POS_NUM 999
-SKIP
 POS_NUM 0.0112
-SKIP
 ENDCMD
-SKIP
-EXPECTED_EXPONENT "1e"
-SKIP
+error "Missing exponent following `1e'."
 ID "e1"
-SKIP
-EXPECTED_EXPONENT "1e+"
-SKIP
-EXPECTED_EXPONENT "1e-"
--SKIP
+error "Missing exponent following `1e+'."
+error "Missing exponent following `1e-'."
 STOP
 ])
 PSPP_CHECK_SCAN([-i])
@@ -394,61 +266,33 @@ x"4142"
 ])
 AT_DATA([expout-base], [dnl
 STRING "x"
-SKIP
 STRING "y"
-SKIP
 STRING "abc"
-SKIP
 STRING "Don't"
-SKIP
 STRING "Can't"
-SKIP
 STRING "Won't"
-SKIP
 STRING ""quoted""
-SKIP
 STRING ""quoted""
-SKIP
 STRING ""
-SKIP
 STRING ""
-SKIP
 STRING "'"
-SKIP
 STRING """
-SKIP
-EXPECTED_QUOTE
-SKIP
-EXPECTED_QUOTE
-SKIP
+error "Unterminated string constant."
+error "Unterminated string constant."
 STRING "xyzabcde"
-SKIP
 STRING "foobar"
-SKIP
 STRING "foobar"
-SKIP
 STRING "foo"
-SKIP
 PLUS
-SKIP
 ENDCMD
-SKIP
 STRING "bar"
-SKIP
 ENDCMD
-SKIP
 PLUS
-SKIP
 STRING "AB5152"
-SKIP
 STRING "4142QR"
-SKIP
 STRING "ABã"
-SKIP
 STRING "ï¿½ããããã"
-SKIP
 STRING "abcï¿½ãxyz"
--SKIP
 STOP
 ])
 PSPP_CHECK_SCAN([-i])
@@ -461,18 +305,14 @@ AT_DATA([input], [dnl
 #! /usr/bin/pspp
 ])
 AT_DATA([expout-base], [dnl
-SKIP
-SKIP
 ID "#"
 MACRO_ID "!"
-SKIP
 SLASH
 ID "usr"
 SLASH
 ID "bin"
 SLASH
 ID "pspp"
--SKIP
 STOP
 ])
 PSPP_CHECK_SCAN([-i])
@@ -499,57 +339,27 @@ next command.
 
 ])
 AT_DATA([expout-base], [dnl
-SKIP
-SKIP
-SKIP
 ENDCMD
-SKIP
 ENDCMD
-SKIP
-SKIP
 ENDCMD
-SKIP
-SKIP
 ENDCMD
-SKIP
 ENDCMD
-SKIP
-SKIP
 ENDCMD
-SKIP
-SKIP
 ENDCMD
-SKIP
 ID "com"
-SKIP
 ID "is"
-SKIP
 ID "ambiguous"
-SKIP
 WITH
-SKIP
 ID "COMPUTE"
 ENDCMD
-SKIP
 ENDCMD
-SKIP
-SKIP
-SKIP
 ENDCMD
-SKIP
 ENDCMD
-SKIP
-SKIP
-SKIP
 ENDCMD
-SKIP
 ID "next"
-SKIP
 ID "command"
 ENDCMD
-SKIP
 -ENDCMD
--SKIP
 STOP
 ])
 PSPP_CHECK_SCAN([-i])
@@ -574,31 +384,21 @@ ID "DOCUMENT"
 STRING "DOCUMENT one line."
 ENDCMD
 ENDCMD
-SKIP
 ID "DOCUMENT"
 STRING "DOC more"
-SKIP
 STRING "    than"
-SKIP
 STRING "        one"
-SKIP
 STRING "            line."
 ENDCMD
 ENDCMD
-SKIP
 ID "DOCUMENT"
 STRING "docu"
-SKIP
 STRING "first.paragraph"
-SKIP
 STRING "isn't parsed as tokens"
-SKIP
 STRING ""
-SKIP
 STRING "second paragraph."
 -ENDCMD
 -ENDCMD
--SKIP
 STOP
 ])
 PSPP_CHECK_SCAN([-i])
@@ -616,32 +416,17 @@ FILE /*
 ])
 AT_DATA([expout-base], [dnl
 ID "FIL"
-SKIP
 ID "label"
-SKIP
 STRING "isn't quoted"
 ENDCMD
-SKIP
 ID "FILE"
-SKIP
-SKIP
 ID "lab"
-SKIP
 STRING "is quoted"
 ENDCMD
-SKIP
 ID "FILE"
-SKIP
-SKIP
-SKIP
-SKIP
-SKIP
 ID "lab"
-SKIP
 STRING "not quoted here either"
-SKIP
 -ENDCMD
--SKIP
 STOP
 ])
 PSPP_CHECK_SCAN([-i])
@@ -664,41 +449,22 @@ end data
 ])
 AT_DATA([expout-base], [dnl
 ID "begin"
-SKIP
 ID "data"
 ENDCMD
-SKIP
 STRING "123"
-SKIP
 STRING "xxx"
-SKIP
 ID "end"
-SKIP
 ID "data"
 ENDCMD
-SKIP
 ENDCMD
-SKIP
 ID "BEG"
-SKIP
-SKIP
-SKIP
 ID "DAT"
-SKIP
-SKIP
-SKIP
 STRING "5 6 7 /* x"
-SKIP
 STRING ""
-SKIP
 STRING "end  data"
-SKIP
 ID "end"
-SKIP
 ID "data"
-SKIP
 ENDCMD
--SKIP
 STOP
 ])
 PSPP_CHECK_SCAN([-i])
@@ -719,43 +485,26 @@ end
 ])
 AT_DATA([expout-base], [dnl
 ID "do"
-SKIP
 ID "repeat"
-SKIP
 ID "x"
 EQUALS
 ID "a"
-SKIP
 ID "b"
-SKIP
 ID "c"
-SKIP
-SKIP
 ID "y"
 EQUALS
 ID "d"
-SKIP
 ID "e"
-SKIP
 ID "f"
 ENDCMD
-SKIP
 STRING "  do repeat a=1 thru 5."
-SKIP
 STRING "another command."
-SKIP
 STRING "second command"
-SKIP
 STRING "+ third command."
-SKIP
 STRING "end /* x */ /* y */ repeat print."
-SKIP
 ID "end"
-SKIP
-SKIP
 ID "repeat"
 ENDCMD
--SKIP
 STOP
 ])
 PSPP_CHECK_SCAN([-i])
@@ -781,60 +530,35 @@ end repeat
 ])
 AT_DATA([expout-base], [dnl
 ID "do"
-SKIP
 ID "repeat"
-SKIP
 ID "x"
 EQUALS
 ID "a"
-SKIP
 ID "b"
-SKIP
 ID "c"
-SKIP
-SKIP
 ID "y"
 EQUALS
 ID "d"
-SKIP
 ID "e"
-SKIP
 ID "f"
-SKIP
 ENDCMD
 STRING "do repeat a=1 thru 5"
-SKIP
 STRING "another command"
-SKIP
 STRING "second command"
-SKIP
 STRING "+ third command"
-SKIP
 STRING "end /* x */ /* y */ repeat print"
-SKIP
 ID "end"
-SKIP
-SKIP
 ID "repeat"
-SKIP
 ENDCMD
 ID "do"
-SKIP
-SKIP
 ID "repeat"
-SKIP
 ID "#a"
 EQUALS
 POS_NUM 1
-SKIP
 ENDCMD
-SKIP
 STRING "  inner command"
-SKIP
 ID "end"
-SKIP
 ID "repeat"
--SKIP
 STOP
 ])
 PSPP_CHECK_SCAN([-b])
@@ -849,17 +573,12 @@ var1 var2 var3
 ])
 AT_DATA([expout-base], [dnl
 ID "define"
-SKIP
 MACRO_ID "!macro1"
 LPAREN
 RPAREN
-SKIP
-SKIP
 STRING "var1 var2 var3"
-SKIP
 MACRO_ID "!enddefine"
 ENDCMD
--SKIP
 STOP
 ])
 PSPP_CHECK_SCAN([-i])
@@ -873,15 +592,12 @@ define !macro1() var1 var2 var3
 ])
 AT_DATA([expout-base], [dnl
 ID "define"
-SKIP
 MACRO_ID "!macro1"
 LPAREN
 RPAREN
 STRING " var1 var2 var3"
-SKIP
 MACRO_ID "!enddefine"
 ENDCMD
--SKIP
 STOP
 ])
 PSPP_CHECK_SCAN([-i])
@@ -895,16 +611,12 @@ var1 var2 var3!enddefine.
 ])
 AT_DATA([expout-base], [dnl
 ID "define"
-SKIP
 MACRO_ID "!macro1"
 LPAREN
 RPAREN
-SKIP
-SKIP
 STRING "var1 var2 var3"
 MACRO_ID "!enddefine"
 ENDCMD
--SKIP
 STOP
 ])
 PSPP_CHECK_SCAN([-i])
@@ -917,14 +629,12 @@ define !macro1()var1 var2 var3!enddefine.
 ])
 AT_DATA([expout-base], [dnl
 ID "define"
-SKIP
 MACRO_ID "!macro1"
 LPAREN
 RPAREN
 STRING "var1 var2 var3"
 MACRO_ID "!enddefine"
 ENDCMD
--SKIP
 STOP
 ])
 PSPP_CHECK_SCAN([-i])
@@ -938,15 +648,11 @@ define !macro1()
 ])
 AT_DATA([expout-base], [dnl
 ID "define"
-SKIP
 MACRO_ID "!macro1"
 LPAREN
 RPAREN
-SKIP
-SKIP
 MACRO_ID "!enddefine"
 ENDCMD
--SKIP
 STOP
 ])
 PSPP_CHECK_SCAN([-i])
@@ -962,19 +668,13 @@ define !macro1()
 ])
 AT_DATA([expout-base], [dnl
 ID "define"
-SKIP
 MACRO_ID "!macro1"
 LPAREN
 RPAREN
-SKIP
-SKIP
 STRING ""
-SKIP
 STRING ""
-SKIP
 MACRO_ID "!enddefine"
 ENDCMD
--SKIP
 STOP
 ])
 PSPP_CHECK_SCAN([-i])
@@ -988,28 +688,22 @@ define !macro1(a(), b(), c())
 ])
 AT_DATA([expout-base], [dnl
 ID "define"
-SKIP
 MACRO_ID "!macro1"
 LPAREN
 ID "a"
 LPAREN
 RPAREN
 COMMA
-SKIP
 ID "b"
 LPAREN
 RPAREN
 COMMA
-SKIP
 ID "c"
 LPAREN
 RPAREN
 RPAREN
-SKIP
-SKIP
 MACRO_ID "!enddefine"
 ENDCMD
--SKIP
 STOP
 ])
 PSPP_CHECK_SCAN([-i])
@@ -1027,34 +721,22 @@ define !macro1(
 ])
 AT_DATA([expout-base], [dnl
 ID "define"
-SKIP
 MACRO_ID "!macro1"
 LPAREN
-SKIP
-SKIP
 ID "a"
 LPAREN
 RPAREN
 COMMA
-SKIP
 ID "b"
 LPAREN
-SKIP
-SKIP
 RPAREN
 COMMA
-SKIP
-SKIP
 ID "c"
 LPAREN
 RPAREN
-SKIP
 RPAREN
-SKIP
-SKIP
 MACRO_ID "!enddefine"
 ENDCMD
--SKIP
 STOP
 ])
 PSPP_CHECK_SCAN([-i])
@@ -1072,26 +754,18 @@ content 2
 ])
 AT_DATA([expout-base], [dnl
 ID "define"
-SKIP
 MACRO_ID "!macro1"
-SKIP
 LPAREN
 ID "x"
 COMMA
 ID "y"
 COMMA
 ID "z"
-SKIP
 RPAREN
-SKIP
-SKIP
 STRING "content 1"
-SKIP
 STRING "content 2"
-SKIP
 MACRO_ID "!enddefine"
 ENDCMD
--SKIP
 STOP
 ])
 PSPP_CHECK_SCAN([-i])
@@ -1105,20 +779,14 @@ data list /x 1.
 ])
 AT_DATA([expout-base], [dnl
 ID "define"
-SKIP
 MACRO_ID "!macro1"
 ENDCMD
-SKIP
 ID "data"
-SKIP
 ID "list"
-SKIP
 SLASH
 ID "x"
-SKIP
 POS_NUM 1
 ENDCMD
--SKIP
 STOP
 ])
 PSPP_CHECK_SCAN([-i])
@@ -1133,22 +801,15 @@ data list /x 1.
 ])
 AT_DATA([expout-base], [dnl
 ID "define"
-SKIP
 MACRO_ID "!macro1"
-SKIP
 ID "x"
 ENDCMD
-SKIP
 ID "data"
-SKIP
 ID "list"
-SKIP
 SLASH
 ID "x"
-SKIP
 POS_NUM 1
 ENDCMD
--SKIP
 STOP
 ])
 PSPP_CHECK_SCAN([-i])
@@ -1163,24 +824,17 @@ data list /x 1.
 ])
 AT_DATA([expout-base], [dnl
 ID "define"
-SKIP
 MACRO_ID "!macro1"
 LPAREN
 ENDCMD
-SKIP
 ID "x"
 ENDCMD
-SKIP
 ID "data"
-SKIP
 ID "list"
-SKIP
 SLASH
 ID "x"
-SKIP
 POS_NUM 1
 ENDCMD
--SKIP
 STOP
 ])
 PSPP_CHECK_SCAN([-i])
@@ -1196,20 +850,14 @@ data list /x 1.
 ])
 AT_DATA([expout-base], [dnl
 ID "define"
-SKIP
 MACRO_ID "!macro1"
 ENDCMD
-SKIP
 ID "data"
-SKIP
 ID "list"
-SKIP
 SLASH
 ID "x"
-SKIP
 POS_NUM 1
 ENDCMD
--SKIP
 STOP
 ])
 PSPP_CHECK_SCAN([-i])
@@ -1224,16 +872,11 @@ content line 2
 ])
 AT_DATA([expout-base], [dnl
 ID "define"
-SKIP
 MACRO_ID "!macro1"
 LPAREN
 RPAREN
-SKIP
-SKIP
 STRING "content line 1"
-SKIP
 STRING "content line 2"
--SKIP
 STOP
 ])
 PSPP_CHECK_SCAN([-i])
@@ -1252,44 +895,25 @@ fourth command.
 ])
 AT_DATA([expout-base], [dnl
 ID "first"
-SKIP
 ID "command"
-SKIP
-SKIP
 ID "another"
-SKIP
 ID "line"
-SKIP
 ID "of"
-SKIP
 ID "first"
-SKIP
 ID "command"
-SKIP
 ENDCMD
-SKIP
 ID "second"
-SKIP
 ID "command"
-SKIP
 ENDCMD
 ID "third"
-SKIP
 ID "command"
-SKIP
 ENDCMD
-SKIP
 ID "fourth"
-SKIP
 ID "command"
 ENDCMD
-SKIP
-SKIP
 ID "fifth"
-SKIP
 ID "command"
 ENDCMD
--SKIP
 STOP
 ])
 PSPP_CHECK_SCAN([-b])