From: Ben Pfaff Date: Mon, 5 Jul 2021 22:15:45 +0000 (-0700) Subject: scan: Get rid of scan token types in favor of new scan result state. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fheads%2Fdev11;p=pspp scan: Get rid of scan token types in favor of new scan result state. --- diff --git a/src/language/control/define.c b/src/language/control/define.c index 3a7f535c86..23a58fe8f9 100644 --- a/src/language/control/define.c +++ b/src/language/control/define.c @@ -65,8 +65,8 @@ parse_quoted_token (struct lexer *lexer, struct token *token) struct string_lexer slex; string_lexer_init (&slex, s.string, s.length, SEG_MODE_INTERACTIVE, true); struct token another_token = { .type = T_STOP }; - if (!string_lexer_next (&slex, token) - || string_lexer_next (&slex, &another_token)) + if (string_lexer_next (&slex, token) != SLR_TOKEN + || string_lexer_next (&slex, &another_token) != SLR_END) { token_uninit (token); token_uninit (&another_token); diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c index 9753024cc3..f8b5a84082 100644 --- a/src/language/lexer/lexer.c +++ b/src/language/lexer/lexer.c @@ -1081,13 +1081,12 @@ lex_match_phrase (struct lexer *lexer, const char *s) i = 0; string_lexer_init (&slex, s, strlen (s), SEG_MODE_INTERACTIVE, true); while (string_lexer_next (&slex, &token)) - if (token.type != SCAN_SKIP) - { - bool match = lex_tokens_match (lex_next (lexer, i++), &token); - token_uninit (&token); - if (!match) - return false; - } + { + bool match = lex_tokens_match (lex_next (lexer, i++), &token); + token_uninit (&token); + if (!match) + return false; + } while (i-- > 0) lex_get (lexer); @@ -1667,6 +1666,7 @@ lex_source_try_get__ (struct lex_source *src) /* Extract segments and pass them through the scanner until we obtain a token. */ + enum scan_result result; for (;;) { /* Extract a segment. */ @@ -1693,9 +1693,8 @@ lex_source_try_get__ (struct lex_source *src) } /* Pass the segment into the scanner and try to get a token out. */ - enum scan_result result = scanner_push (&scanner, type, - ss_buffer (segment, seg_len), - &token->token); + result = scanner_push (&scanner, type, ss_buffer (segment, seg_len), + &token->token); if (result == SCAN_SAVE) saved = state; else if (result == SCAN_BACK) @@ -1703,7 +1702,9 @@ lex_source_try_get__ (struct lex_source *src) state = saved; break; } - else if (result == SCAN_DONE) + else if (result == SCAN_DONE + || result == SCAN_EMPTY + || result == SCAN_ERROR) break; } @@ -1757,37 +1758,24 @@ lex_source_try_get__ (struct lex_source *src) src->line_pos = state.line_pos; src->n_newlines += state.newlines; - switch (token->token.type) + if (result == SCAN_EMPTY) + { + lex_source_pop_front (src); + return false; + } + else if (result == SCAN_ERROR) + { + lex_get_error (src, token->token.string.string); + return false; + } + else if (token->token.type == T_STOP) { - default: - return true; - - case T_STOP: token->token.type = T_ENDCMD; src->eof = true; return true; - - case SCAN_BAD_HEX_LENGTH: - case SCAN_BAD_HEX_DIGIT: - case SCAN_BAD_UNICODE_DIGIT: - case SCAN_BAD_UNICODE_LENGTH: - case SCAN_BAD_UNICODE_CODE_POINT: - case SCAN_EXPECTED_QUOTE: - case SCAN_EXPECTED_EXPONENT: - case SCAN_UNEXPECTED_CHAR: - { - char *msg = scan_token_to_error (&token->token); - lex_get_error (src, msg); - free (msg); - return false; - } - - case SCAN_SKIP: - lex_source_pop_front (src); - return false; } - - NOT_REACHED (); + else + return true; } /* Attempts to add a new token at the front of SRC. Returns true if diff --git a/src/language/lexer/macro.c b/src/language/lexer/macro.c index 6b5d624301..b5cbf2dfc6 100644 --- a/src/language/lexer/macro.c +++ b/src/language/lexer/macro.c @@ -248,6 +248,7 @@ macro_tokens_from_string__ (struct macro_tokens *mts, const struct substring src struct scanner scanner; scanner_init (&scanner, token); + enum scan_result result; for (;;) { enum segment_type type; @@ -258,37 +259,40 @@ macro_tokens_from_string__ (struct macro_tokens *mts, const struct substring src struct substring segment = ss_head (state.body, seg_len); ss_advance (&state.body, seg_len); - enum scan_result result = scanner_push (&scanner, type, segment, token); + result = scanner_push (&scanner, type, segment, token); if (result == SCAN_SAVE) saved = state; - else if (result == SCAN_BACK) - { - state = saved; - break; - } - else if (result == SCAN_DONE) + else if (result != SCAN_MORE) break; } - /* We have a token in 'token'. */ - mt.syntax.length = state.body.string - mt.syntax.string; - if (is_scan_type (token->type)) + + switch (result) { - if (token->type != SCAN_SKIP) - { - char *s = scan_token_to_error (token); - if (stack) - { - mt.token.type = T_STRING; - macro_error (stack, &mt, "%s", s); - } - else - msg (SE, "%s", s); - free (s); - } + case SCAN_BACK: + state = saved; + /* Fall through. */ + case SCAN_DONE: + mt.syntax.length = state.body.string - mt.syntax.string; + macro_tokens_add (mts, &mt); + break; + + case SCAN_EMPTY: + break; + + case SCAN_ERROR: + mt.syntax.length = state.body.string - mt.syntax.string; + if (stack) + macro_error (stack, &mt, "%s", token->string.string); + else + msg (SE, "%s", token->string.string); + break; + + case SCAN_MORE: + case SCAN_SAVE: + NOT_REACHED (); } - else - macro_tokens_add (mts, &mt); + token_uninit (token); } } @@ -1016,17 +1020,15 @@ unquote_string (const char *s, enum segmenter_mode segmenter_mode, string_lexer_init (&slex, s, strlen (s), segmenter_mode, true); struct token token1; - if (!string_lexer_next (&slex, &token1)) - return false; - - if (token1.type != T_STRING) + if (string_lexer_next (&slex, &token1) != SLR_TOKEN + || token1.type != T_STRING) { token_uninit (&token1); return false; } struct token token2; - if (string_lexer_next (&slex, &token2)) + if (string_lexer_next (&slex, &token2) != SLR_END) { token_uninit (&token1); token_uninit (&token2); diff --git a/src/language/lexer/scan.c b/src/language/lexer/scan.c index 3b9e3c5a22..a2f821c05d 100644 --- a/src/language/lexer/scan.c +++ b/src/language/lexer/scan.c @@ -71,7 +71,7 @@ digit_value (int c) } } -static bool +static char * scan_quoted_string__ (struct substring s, struct token *token) { int quote; @@ -97,118 +97,89 @@ scan_quoted_string__ (struct substring s, struct token *token) memcpy (ss_end (token->string), s.string, ss_length (s)); token->string.length += ss_length (s); - return true; + return NULL; } -static bool +static char * scan_hex_string__ (struct substring s, struct token *token) { - uint8_t *dst; - size_t i; - /* Trim X' from front and ' from back. */ s.string += 2; s.length -= 3; if (s.length % 2 != 0) - { - token->type = SCAN_BAD_HEX_LENGTH; - token->number = s.length; - return false; - } + return xasprintf (_("String of hex digits has %zu characters, which " + "is not a multiple of 2."), s.length); ss_realloc (&token->string, token->string.length + s.length / 2 + 1); - dst = CHAR_CAST (uint8_t *, ss_end (token->string)); + uint8_t *dst = CHAR_CAST (uint8_t *, ss_end (token->string)); token->string.length += s.length / 2; - for (i = 0; i < s.length; i += 2) + for (size_t i = 0; i < s.length; i += 2) { int hi = digit_value (s.string[i]); int lo = digit_value (s.string[i + 1]); if (hi >= 16 || lo >= 16) - { - token->type = SCAN_BAD_HEX_DIGIT; - token->number = s.string[hi >= 16 ? i : i + 1]; - return false; - } + return xasprintf (_("`%c' is not a valid hex digit."), + s.string[hi >= 16 ? i : i + 1]); *dst++ = hi * 16 + lo; } - return true; + return NULL; } -static bool +static char * scan_unicode_string__ (struct substring s, struct token *token) { - uint8_t *dst; - ucs4_t uc; - size_t i; - /* Trim U' from front and ' from back. */ s.string += 2; s.length -= 3; if (s.length < 1 || s.length > 8) - { - token->type = SCAN_BAD_UNICODE_LENGTH; - token->number = s.length; - return 0; - } + return xasprintf (_("Unicode string contains %zu bytes, which is " + "not in the valid range of 1 to 8 bytes."), + s.length); ss_realloc (&token->string, token->string.length + 4 + 1); - uc = 0; - for (i = 0; i < s.length; i++) + ucs4_t uc = 0; + for (size_t i = 0; i < s.length; i++) { int digit = digit_value (s.string[i]); if (digit >= 16) - { - token->type = SCAN_BAD_UNICODE_DIGIT; - token->number = s.string[i]; - return 0; - } + return xasprintf (_("`%c' is not a valid hex digit."), + s.string[i]); uc = uc * 16 + digit; } if ((uc >= 0xd800 && uc < 0xe000) || uc > 0x10ffff) - { - token->type = SCAN_BAD_UNICODE_CODE_POINT; - token->number = uc; - return 0; - } + return xasprintf (_("U+%04llX is not a valid Unicode code point."), + (long long) uc); - dst = CHAR_CAST (uint8_t *, ss_end (token->string)); + uint8_t *dst = CHAR_CAST (uint8_t *, ss_end (token->string)); token->string.length += u8_uctomb (dst, uc, 4); - return true; + return NULL; +} + +static enum scan_result +scan_error__ (struct token *token, char *error) +{ + ss_dealloc (&token->string); + token->type = T_STRING; + token->string = ss_cstr (error); + return SCAN_ERROR; } static enum scan_result scan_string_segment__ (struct scanner *scanner, enum segment_type type, struct substring s, struct token *token) { - bool ok; - - switch (type) - { - case SEG_QUOTED_STRING: - ok = scan_quoted_string__ (s, token); - break; - - case SEG_HEX_STRING: - ok = scan_hex_string__ (s, token); - break; - - case SEG_UNICODE_STRING: - ok = scan_unicode_string__ (s, token); - break; - - default: - NOT_REACHED (); - } - - if (ok) + char *error = (type == SEG_QUOTED_STRING ? scan_quoted_string__ (s, token) + : type == SEG_HEX_STRING ? scan_hex_string__ (s, token) + : scan_unicode_string__ (s, token)); + if (!error) { token->type = T_STRING; token->string.string[token->string.length] = '\0'; @@ -217,14 +188,7 @@ scan_string_segment__ (struct scanner *scanner, enum segment_type type, return SCAN_SAVE; } else - { - /* The function we called above should have filled in token->type and - token->number properly to describe the error. */ - ss_dealloc (&token->string); - token->string = ss_empty (); - return SCAN_DONE; - } - + return scan_error__ (token, error); } static enum scan_result @@ -397,76 +361,11 @@ static enum scan_result scan_unexpected_char (const struct substring *s, struct token *token) { ucs4_t uc; - - token->type = SCAN_UNEXPECTED_CHAR; u8_mbtouc (&uc, CHAR_CAST (const uint8_t *, s->string), s->length); - token->number = uc; - - return SCAN_DONE; -} - -const char * -scan_type_to_string (enum scan_type type) -{ - switch (type) - { -#define SCAN_TYPE(NAME) case SCAN_##NAME: return #NAME; - SCAN_TYPES -#undef SCAN_TYPE - - default: - return token_type_to_name ((enum token_type) type); - } -} - -bool -is_scan_type (enum scan_type type) -{ - return type > SCAN_FIRST && type < SCAN_LAST; -} - -/* If TOKEN has the type of a scan error (a subset of those identified by - is_scan_type()), returns an appropriate error message. Otherwise, returns - NULL. */ -char * -scan_token_to_error (const struct token *token) -{ - switch (token->type) - { - case SCAN_BAD_HEX_LENGTH: - return xasprintf (_("String of hex digits has %d characters, which " - "is not a multiple of 2."), (int) token->number); - - case SCAN_BAD_HEX_DIGIT: - case SCAN_BAD_UNICODE_DIGIT: - return xasprintf (_("`%c' is not a valid hex digit."), - (int) token->number); - - case SCAN_BAD_UNICODE_LENGTH: - return xasprintf (_("Unicode string contains %d bytes, which is " - "not in the valid range of 1 to 8 bytes."), - (int) token->number); - - case SCAN_BAD_UNICODE_CODE_POINT: - return xasprintf (_("U+%04X is not a valid Unicode code point."), - (int) token->number); - - case SCAN_EXPECTED_QUOTE: - return xasprintf (_("Unterminated string constant.")); - - case SCAN_EXPECTED_EXPONENT: - return xasprintf (_("Missing exponent following `%s'."), - token->string.string); - - case SCAN_UNEXPECTED_CHAR: - { - char c_name[16]; - return xasprintf (_("Bad character %s in input."), - uc_name (token->number, c_name)); - } - } - return NULL; + char c_name[16]; + return scan_error__ (token, xasprintf (_("Bad character %s in input."), + uc_name (uc, c_name))); } static enum scan_result @@ -527,8 +426,7 @@ scan_start__ (struct scanner *scanner, enum segment_type type, case SEG_COMMENT: case SEG_NEWLINE: case SEG_COMMENT_COMMAND: - token->type = SCAN_SKIP; - return SCAN_DONE; + return SCAN_EMPTY; case SEG_START_DOCUMENT: token->type = T_ID; @@ -546,13 +444,13 @@ scan_start__ (struct scanner *scanner, enum segment_type type, return SCAN_DONE; case SEG_EXPECTED_QUOTE: - token->type = SCAN_EXPECTED_QUOTE; - return SCAN_DONE; + return scan_error__ (token, + xasprintf (_("Unterminated string constant."))); case SEG_EXPECTED_EXPONENT: - token->type = SCAN_EXPECTED_EXPONENT; - ss_alloc_substring (&token->string, s); - return SCAN_DONE; + return scan_error__ (token, + xasprintf (_("Missing exponent following `%.*s'."), + (int) s.length, s.string)); case SEG_UNEXPECTED_CHAR: return scan_unexpected_char (&s, token); @@ -626,6 +524,14 @@ scanner_init (struct scanner *scanner, struct token *token) the segments up to and including the segment for which SCAN_SAVE was most recently returned. Segments following that one should be passed to the next scanner to be initialized. + + - SCAN_EMPTY: This is similar to SCAN_DONE, but there's no token because + the scanner consumed white space or comments or other syntax that + doesn't produce a token. + + - SCAN_ERROR: This is simila to SCAN_DONE, but the token is a T_STRING + that describes some lexical error. The caller should report the error + and discard the token. */ enum scan_result scanner_push (struct scanner *scanner, enum segment_type type, @@ -664,14 +570,14 @@ string_lexer_init (struct string_lexer *slex, const char *input, size_t length, } /* */ -bool +enum string_lexer_result string_lexer_next (struct string_lexer *slex, struct token *token) { struct segmenter saved_segmenter; size_t saved_offset = 0; struct scanner scanner; - +next: scanner_init (&scanner, token); for (;;) { @@ -691,7 +597,7 @@ string_lexer_next (struct string_lexer *slex, struct token *token) slex->offset = saved_offset; /* Fall through. */ case SCAN_DONE: - return token->type != T_STOP; + return token->type == T_STOP ? SLR_END : SLR_TOKEN; case SCAN_MORE: break; @@ -700,6 +606,12 @@ string_lexer_next (struct string_lexer *slex, struct token *token) saved_segmenter = slex->segmenter; saved_offset = slex->offset; break; + + case SCAN_ERROR: + return SLR_ERROR; + + case SCAN_EMPTY: + goto next; } } } diff --git a/src/language/lexer/scan.h b/src/language/lexer/scan.h index 0dde273804..bd76e3ffda 100644 --- a/src/language/lexer/scan.h +++ b/src/language/lexer/scan.h @@ -35,39 +35,6 @@ struct token; types. */ -#define SCAN_TYPES \ - SCAN_TYPE(BAD_HEX_LENGTH) \ - SCAN_TYPE(BAD_HEX_DIGIT) \ - \ - SCAN_TYPE(BAD_UNICODE_LENGTH) \ - SCAN_TYPE(BAD_UNICODE_DIGIT) \ - SCAN_TYPE(BAD_UNICODE_CODE_POINT) \ - \ - SCAN_TYPE(EXPECTED_QUOTE) \ - SCAN_TYPE(EXPECTED_EXPONENT) \ - SCAN_TYPE(UNEXPECTED_CHAR) \ - \ - SCAN_TYPE(SKIP) - -/* Types of scan tokens. - - Scan token types are a superset of enum token_type. Only the additional - scan token types are defined here, so see the definition of enum token_type - for the others. */ -enum scan_type - { -#define SCAN_TYPE(TYPE) SCAN_##TYPE, - SCAN_FIRST = 255, - SCAN_TYPES - SCAN_LAST -#undef SCAN_TYPE - }; - -const char *scan_type_to_string (enum scan_type); -bool is_scan_type (enum scan_type); - -char *scan_token_to_error (const struct token *); - /* A scanner. Opaque. */ struct scanner { @@ -80,6 +47,8 @@ enum scan_result { /* Complete token. */ SCAN_DONE, /* Token successfully scanned. */ + SCAN_EMPTY, /* This segment does not produce any token. */ + SCAN_ERROR, /* This segment yields an error message. */ SCAN_MORE, /* More segments needed to scan token. */ /* Incomplete token. */ @@ -101,8 +70,16 @@ struct string_lexer struct segmenter segmenter; }; +enum string_lexer_result + { + SLR_END, + SLR_TOKEN, + SLR_ERROR + }; + void string_lexer_init (struct string_lexer *, const char *input, size_t length, enum segmenter_mode, bool is_snippet); -bool string_lexer_next (struct string_lexer *, struct token *); +enum string_lexer_result string_lexer_next (struct string_lexer *, + struct token *); #endif /* scan.h */ diff --git a/src/language/lexer/token.h b/src/language/lexer/token.h index dca1452c6a..8ec28f3714 100644 --- a/src/language/lexer/token.h +++ b/src/language/lexer/token.h @@ -23,13 +23,10 @@ #include "libpspp/str.h" #include "data/identifier.h" -/* A PSPP syntax token. - - The 'type' member is used by the scanner (see scan.h) for SCAN_* values as - well, which is why it is not declared as type "enum token_type". */ +/* A PSPP syntax token. */ struct token { - int type; /* Usually a "enum token_type" value. */ + enum token_type type; double number; struct substring string; }; diff --git a/tests/language/lexer/scan-test.c b/tests/language/lexer/scan-test.c index 2a77e127ac..53163bcad6 100644 --- a/tests/language/lexer/scan-test.c +++ b/tests/language/lexer/scan-test.c @@ -54,7 +54,6 @@ main (int argc, char *argv[]) char *input; struct string_lexer slex; - bool more; set_program_name (argv[0]); file_name = parse_options (argc, argv); @@ -74,13 +73,13 @@ main (int argc, char *argv[]) } string_lexer_init (&slex, input, length, mode, false); + enum string_lexer_result result; do { struct token token; + result = string_lexer_next (&slex, &token); - more = string_lexer_next (&slex, &token); - - printf ("%s", scan_type_to_string (token.type)); + printf ("%s", result == SLR_ERROR ? "error" : token_type_to_name (token.type)); if (token.number != 0.0) { double x = token.number; @@ -96,7 +95,7 @@ main (int argc, char *argv[]) token_uninit (&token); } - while (more); + while (result != SLR_END); free (input); diff --git a/tests/language/lexer/scan.at b/tests/language/lexer/scan.at index 146b891e1c..56711cbac0 100644 --- a/tests/language/lexer/scan.at +++ b/tests/language/lexer/scan.at @@ -34,52 +34,29 @@ WXYZ. /* unterminated end of line comment ]) AT_DATA([expout-base], [dnl ID "a" -SKIP ID "aB" -SKIP ID "i5" -SKIP ID "$x" -SKIP ID "@efg" -SKIP ID "@@." -SKIP MACRO_ID "!abcd" -SKIP ID "#.#" -SKIP MACRO_PUNCT "." ID "x" -SKIP MACRO_PUNCT "_" ID "z" ENDCMD -SKIP ID "abcd." -SKIP ID "abcd" ENDCMD -SKIP ID "QRSTUV" ENDCMD -SKIP -SKIP ID "QrStUv" ENDCMD -SKIP -SKIP -SKIP ID "WXYZ" ENDCMD -SKIP -SKIP -SKIP -UNEXPECTED_CHAR 65533 +error "Bad character U+FFFD in input." ENDCMD -SKIP -SKIP --SKIP STOP ]) PSPP_CHECK_SCAN([-i]) @@ -95,88 +72,47 @@ and. with. ]) AT_DATA([expout-base], [dnl AND -SKIP OR -SKIP NOT -SKIP EQ -SKIP GE -SKIP GT -SKIP LE -SKIP LT -SKIP NE -SKIP ALL -SKIP BY -SKIP TO -SKIP WITH -SKIP AND -SKIP OR -SKIP NOT -SKIP EQ -SKIP GE -SKIP GT -SKIP LE -SKIP LT -SKIP NE -SKIP ALL -SKIP BY -SKIP TO -SKIP WITH -SKIP ID "andx" -SKIP ID "orx" -SKIP ID "notx" -SKIP ID "eqx" -SKIP ID "gex" -SKIP ID "gtx" -SKIP ID "lex" -SKIP ID "ltx" -SKIP ID "nex" -SKIP ID "allx" -SKIP ID "byx" -SKIP ID "tox" -SKIP ID "withx" -SKIP ID "and." -SKIP WITH ENDCMD --SKIP STOP ]) PSPP_CHECK_SCAN([-i]) @@ -191,45 +127,25 @@ AT_DATA([input], [dnl ]) AT_DATA([expout-base], [dnl NOT -SKIP AND -SKIP OR -SKIP EQUALS -SKIP GE -SKIP GT -SKIP LE -SKIP LT -SKIP NE -SKIP NE -SKIP LPAREN -SKIP RPAREN -SKIP COMMA -SKIP DASH -SKIP PLUS -SKIP ASTERISK -SKIP SLASH -SKIP LBRACK -SKIP RBRACK -SKIP EXP -SKIP NOT AND OR @@ -250,25 +166,15 @@ SLASH LBRACK RBRACK EXP -SKIP MACRO_PUNCT "%" -SKIP MACRO_PUNCT ":" -SKIP MACRO_PUNCT ";" -SKIP MACRO_PUNCT "?" -SKIP MACRO_PUNCT "_" -SKIP MACRO_PUNCT "`" -SKIP MACRO_PUNCT "{" -SKIP MACRO_PUNCT "}" -SKIP NOT --SKIP STOP ]) PSPP_CHECK_SCAN([-i]) @@ -287,73 +193,39 @@ AT_DATA([input], [dnl ]) AT_DATA([expout-base], [dnl POS_NUM -SKIP POS_NUM 1 -SKIP POS_NUM 1 -SKIP POS_NUM 1 -SKIP POS_NUM 1 ENDCMD -SKIP POS_NUM 123 ENDCMD -SKIP -SKIP -SKIP -SKIP -SKIP ENDCMD POS_NUM 1 -SKIP POS_NUM 0.1 -SKIP POS_NUM 0.1 -SKIP POS_NUM 0.1 -SKIP POS_NUM 50 -SKIP POS_NUM 0.6 -SKIP POS_NUM 70 -SKIP POS_NUM 60 -SKIP POS_NUM 0.006 -SKIP ENDCMD POS_NUM 30 -SKIP POS_NUM 0.04 -SKIP POS_NUM 5 -SKIP POS_NUM 6 -SKIP POS_NUM 0.0007 -SKIP POS_NUM 12.3 -SKIP POS_NUM 4.56 -SKIP POS_NUM 789 -SKIP POS_NUM 999 -SKIP POS_NUM 0.0112 -SKIP ENDCMD -SKIP -EXPECTED_EXPONENT "1e" -SKIP +error "Missing exponent following `1e'." ID "e1" -SKIP -EXPECTED_EXPONENT "1e+" -SKIP -EXPECTED_EXPONENT "1e-" --SKIP +error "Missing exponent following `1e+'." +error "Missing exponent following `1e-'." STOP ]) PSPP_CHECK_SCAN([-i]) @@ -394,61 +266,33 @@ x"4142" ]) AT_DATA([expout-base], [dnl STRING "x" -SKIP STRING "y" -SKIP STRING "abc" -SKIP STRING "Don't" -SKIP STRING "Can't" -SKIP STRING "Won't" -SKIP STRING ""quoted"" -SKIP STRING ""quoted"" -SKIP STRING "" -SKIP STRING "" -SKIP STRING "'" -SKIP STRING """ -SKIP -EXPECTED_QUOTE -SKIP -EXPECTED_QUOTE -SKIP +error "Unterminated string constant." +error "Unterminated string constant." STRING "xyzabcde" -SKIP STRING "foobar" -SKIP STRING "foobar" -SKIP STRING "foo" -SKIP PLUS -SKIP ENDCMD -SKIP STRING "bar" -SKIP ENDCMD -SKIP PLUS -SKIP STRING "AB5152" -SKIP STRING "4142QR" -SKIP STRING "ABお" -SKIP STRING "�あいうえお" -SKIP STRING "abc�えxyz" --SKIP STOP ]) PSPP_CHECK_SCAN([-i]) @@ -461,18 +305,14 @@ AT_DATA([input], [dnl #! /usr/bin/pspp ]) AT_DATA([expout-base], [dnl -SKIP -SKIP ID "#" MACRO_ID "!" -SKIP SLASH ID "usr" SLASH ID "bin" SLASH ID "pspp" --SKIP STOP ]) PSPP_CHECK_SCAN([-i]) @@ -499,57 +339,27 @@ next command. ]) AT_DATA([expout-base], [dnl -SKIP -SKIP -SKIP ENDCMD -SKIP ENDCMD -SKIP -SKIP ENDCMD -SKIP -SKIP ENDCMD -SKIP ENDCMD -SKIP -SKIP ENDCMD -SKIP -SKIP ENDCMD -SKIP ID "com" -SKIP ID "is" -SKIP ID "ambiguous" -SKIP WITH -SKIP ID "COMPUTE" ENDCMD -SKIP ENDCMD -SKIP -SKIP -SKIP ENDCMD -SKIP ENDCMD -SKIP -SKIP -SKIP ENDCMD -SKIP ID "next" -SKIP ID "command" ENDCMD -SKIP -ENDCMD --SKIP STOP ]) PSPP_CHECK_SCAN([-i]) @@ -574,31 +384,21 @@ ID "DOCUMENT" STRING "DOCUMENT one line." ENDCMD ENDCMD -SKIP ID "DOCUMENT" STRING "DOC more" -SKIP STRING " than" -SKIP STRING " one" -SKIP STRING " line." ENDCMD ENDCMD -SKIP ID "DOCUMENT" STRING "docu" -SKIP STRING "first.paragraph" -SKIP STRING "isn't parsed as tokens" -SKIP STRING "" -SKIP STRING "second paragraph." -ENDCMD -ENDCMD --SKIP STOP ]) PSPP_CHECK_SCAN([-i]) @@ -616,32 +416,17 @@ FILE /* ]) AT_DATA([expout-base], [dnl ID "FIL" -SKIP ID "label" -SKIP STRING "isn't quoted" ENDCMD -SKIP ID "FILE" -SKIP -SKIP ID "lab" -SKIP STRING "is quoted" ENDCMD -SKIP ID "FILE" -SKIP -SKIP -SKIP -SKIP -SKIP ID "lab" -SKIP STRING "not quoted here either" -SKIP -ENDCMD --SKIP STOP ]) PSPP_CHECK_SCAN([-i]) @@ -664,41 +449,22 @@ end data ]) AT_DATA([expout-base], [dnl ID "begin" -SKIP ID "data" ENDCMD -SKIP STRING "123" -SKIP STRING "xxx" -SKIP ID "end" -SKIP ID "data" ENDCMD -SKIP ENDCMD -SKIP ID "BEG" -SKIP -SKIP -SKIP ID "DAT" -SKIP -SKIP -SKIP STRING "5 6 7 /* x" -SKIP STRING "" -SKIP STRING "end data" -SKIP ID "end" -SKIP ID "data" -SKIP ENDCMD --SKIP STOP ]) PSPP_CHECK_SCAN([-i]) @@ -719,43 +485,26 @@ end ]) AT_DATA([expout-base], [dnl ID "do" -SKIP ID "repeat" -SKIP ID "x" EQUALS ID "a" -SKIP ID "b" -SKIP ID "c" -SKIP -SKIP ID "y" EQUALS ID "d" -SKIP ID "e" -SKIP ID "f" ENDCMD -SKIP STRING " do repeat a=1 thru 5." -SKIP STRING "another command." -SKIP STRING "second command" -SKIP STRING "+ third command." -SKIP STRING "end /* x */ /* y */ repeat print." -SKIP ID "end" -SKIP -SKIP ID "repeat" ENDCMD --SKIP STOP ]) PSPP_CHECK_SCAN([-i]) @@ -781,60 +530,35 @@ end repeat ]) AT_DATA([expout-base], [dnl ID "do" -SKIP ID "repeat" -SKIP ID "x" EQUALS ID "a" -SKIP ID "b" -SKIP ID "c" -SKIP -SKIP ID "y" EQUALS ID "d" -SKIP ID "e" -SKIP ID "f" -SKIP ENDCMD STRING "do repeat a=1 thru 5" -SKIP STRING "another command" -SKIP STRING "second command" -SKIP STRING "+ third command" -SKIP STRING "end /* x */ /* y */ repeat print" -SKIP ID "end" -SKIP -SKIP ID "repeat" -SKIP ENDCMD ID "do" -SKIP -SKIP ID "repeat" -SKIP ID "#a" EQUALS POS_NUM 1 -SKIP ENDCMD -SKIP STRING " inner command" -SKIP ID "end" -SKIP ID "repeat" --SKIP STOP ]) PSPP_CHECK_SCAN([-b]) @@ -849,17 +573,12 @@ var1 var2 var3 ]) AT_DATA([expout-base], [dnl ID "define" -SKIP MACRO_ID "!macro1" LPAREN RPAREN -SKIP -SKIP STRING "var1 var2 var3" -SKIP MACRO_ID "!enddefine" ENDCMD --SKIP STOP ]) PSPP_CHECK_SCAN([-i]) @@ -873,15 +592,12 @@ define !macro1() var1 var2 var3 ]) AT_DATA([expout-base], [dnl ID "define" -SKIP MACRO_ID "!macro1" LPAREN RPAREN STRING " var1 var2 var3" -SKIP MACRO_ID "!enddefine" ENDCMD --SKIP STOP ]) PSPP_CHECK_SCAN([-i]) @@ -895,16 +611,12 @@ var1 var2 var3!enddefine. ]) AT_DATA([expout-base], [dnl ID "define" -SKIP MACRO_ID "!macro1" LPAREN RPAREN -SKIP -SKIP STRING "var1 var2 var3" MACRO_ID "!enddefine" ENDCMD --SKIP STOP ]) PSPP_CHECK_SCAN([-i]) @@ -917,14 +629,12 @@ define !macro1()var1 var2 var3!enddefine. ]) AT_DATA([expout-base], [dnl ID "define" -SKIP MACRO_ID "!macro1" LPAREN RPAREN STRING "var1 var2 var3" MACRO_ID "!enddefine" ENDCMD --SKIP STOP ]) PSPP_CHECK_SCAN([-i]) @@ -938,15 +648,11 @@ define !macro1() ]) AT_DATA([expout-base], [dnl ID "define" -SKIP MACRO_ID "!macro1" LPAREN RPAREN -SKIP -SKIP MACRO_ID "!enddefine" ENDCMD --SKIP STOP ]) PSPP_CHECK_SCAN([-i]) @@ -962,19 +668,13 @@ define !macro1() ]) AT_DATA([expout-base], [dnl ID "define" -SKIP MACRO_ID "!macro1" LPAREN RPAREN -SKIP -SKIP STRING "" -SKIP STRING "" -SKIP MACRO_ID "!enddefine" ENDCMD --SKIP STOP ]) PSPP_CHECK_SCAN([-i]) @@ -988,28 +688,22 @@ define !macro1(a(), b(), c()) ]) AT_DATA([expout-base], [dnl ID "define" -SKIP MACRO_ID "!macro1" LPAREN ID "a" LPAREN RPAREN COMMA -SKIP ID "b" LPAREN RPAREN COMMA -SKIP ID "c" LPAREN RPAREN RPAREN -SKIP -SKIP MACRO_ID "!enddefine" ENDCMD --SKIP STOP ]) PSPP_CHECK_SCAN([-i]) @@ -1027,34 +721,22 @@ define !macro1( ]) AT_DATA([expout-base], [dnl ID "define" -SKIP MACRO_ID "!macro1" LPAREN -SKIP -SKIP ID "a" LPAREN RPAREN COMMA -SKIP ID "b" LPAREN -SKIP -SKIP RPAREN COMMA -SKIP -SKIP ID "c" LPAREN RPAREN -SKIP RPAREN -SKIP -SKIP MACRO_ID "!enddefine" ENDCMD --SKIP STOP ]) PSPP_CHECK_SCAN([-i]) @@ -1072,26 +754,18 @@ content 2 ]) AT_DATA([expout-base], [dnl ID "define" -SKIP MACRO_ID "!macro1" -SKIP LPAREN ID "x" COMMA ID "y" COMMA ID "z" -SKIP RPAREN -SKIP -SKIP STRING "content 1" -SKIP STRING "content 2" -SKIP MACRO_ID "!enddefine" ENDCMD --SKIP STOP ]) PSPP_CHECK_SCAN([-i]) @@ -1105,20 +779,14 @@ data list /x 1. ]) AT_DATA([expout-base], [dnl ID "define" -SKIP MACRO_ID "!macro1" ENDCMD -SKIP ID "data" -SKIP ID "list" -SKIP SLASH ID "x" -SKIP POS_NUM 1 ENDCMD --SKIP STOP ]) PSPP_CHECK_SCAN([-i]) @@ -1133,22 +801,15 @@ data list /x 1. ]) AT_DATA([expout-base], [dnl ID "define" -SKIP MACRO_ID "!macro1" -SKIP ID "x" ENDCMD -SKIP ID "data" -SKIP ID "list" -SKIP SLASH ID "x" -SKIP POS_NUM 1 ENDCMD --SKIP STOP ]) PSPP_CHECK_SCAN([-i]) @@ -1163,24 +824,17 @@ data list /x 1. ]) AT_DATA([expout-base], [dnl ID "define" -SKIP MACRO_ID "!macro1" LPAREN ENDCMD -SKIP ID "x" ENDCMD -SKIP ID "data" -SKIP ID "list" -SKIP SLASH ID "x" -SKIP POS_NUM 1 ENDCMD --SKIP STOP ]) PSPP_CHECK_SCAN([-i]) @@ -1196,20 +850,14 @@ data list /x 1. ]) AT_DATA([expout-base], [dnl ID "define" -SKIP MACRO_ID "!macro1" ENDCMD -SKIP ID "data" -SKIP ID "list" -SKIP SLASH ID "x" -SKIP POS_NUM 1 ENDCMD --SKIP STOP ]) PSPP_CHECK_SCAN([-i]) @@ -1224,16 +872,11 @@ content line 2 ]) AT_DATA([expout-base], [dnl ID "define" -SKIP MACRO_ID "!macro1" LPAREN RPAREN -SKIP -SKIP STRING "content line 1" -SKIP STRING "content line 2" --SKIP STOP ]) PSPP_CHECK_SCAN([-i]) @@ -1252,44 +895,25 @@ fourth command. ]) AT_DATA([expout-base], [dnl ID "first" -SKIP ID "command" -SKIP -SKIP ID "another" -SKIP ID "line" -SKIP ID "of" -SKIP ID "first" -SKIP ID "command" -SKIP ENDCMD -SKIP ID "second" -SKIP ID "command" -SKIP ENDCMD ID "third" -SKIP ID "command" -SKIP ENDCMD -SKIP ID "fourth" -SKIP ID "command" ENDCMD -SKIP -SKIP ID "fifth" -SKIP ID "command" ENDCMD --SKIP STOP ]) PSPP_CHECK_SCAN([-b])