sys_stat \
tempname \
trunc \
+ unictype/ctype-print \
unictype/property-id-continue \
unictype/property-id-start \
unigbrk/uc-is-grapheme-break \
src/language/lexer/subcommand-list.h \
src/language/lexer/format-parser.c \
src/language/lexer/format-parser.h \
+ src/language/lexer/scan.c \
+ src/language/lexer/scan.h \
src/language/lexer/segment.c \
src/language/lexer/segment.h \
+ src/language/lexer/token.c \
+ src/language/lexer/token.h \
src/language/lexer/value-parser.c \
src/language/lexer/value-parser.h \
src/language/lexer/variable-parser.c \
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2010, 2011 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include "language/lexer/scan.h"
+
+#include <limits.h>
+#include <unistr.h>
+
+#include "data/identifier.h"
+#include "language/lexer/token.h"
+#include "libpspp/assertion.h"
+#include "libpspp/cast.h"
+
+#include "gl/c-ctype.h"
+#include "gl/xmemdup0.h"
+
+enum
+ {
+ S_START,
+ S_DASH,
+ S_STRING
+ };
+
+#define SS_NL_BEFORE_PLUS (1u << 0)
+#define SS_PLUS (1u << 1)
+#define SS_NL_AFTER_PLUS (1u << 2)
+
+/* Returns the integer value of (hex) digit C. */
+static int
+digit_value (int c)
+{
+ switch (c)
+ {
+ case '0': return 0;
+ case '1': return 1;
+ case '2': return 2;
+ case '3': return 3;
+ case '4': return 4;
+ case '5': return 5;
+ case '6': return 6;
+ case '7': return 7;
+ case '8': return 8;
+ case '9': return 9;
+ case 'a': case 'A': return 10;
+ case 'b': case 'B': return 11;
+ case 'c': case 'C': return 12;
+ case 'd': case 'D': return 13;
+ case 'e': case 'E': return 14;
+ case 'f': case 'F': return 15;
+ default: return INT_MAX;
+ }
+}
+
+static bool
+scan_quoted_string__ (struct substring s, struct token *token)
+{
+ int quote;
+
+ /* Trim ' or " from front and back. */
+ quote = s.string[s.length - 1];
+ s.string++;
+ s.length -= 2;
+
+ ss_realloc (&token->string, token->string.length + s.length + 1);
+
+ for (;;)
+ {
+ size_t pos = ss_find_byte (s, quote);
+ if (pos == SIZE_MAX)
+ break;
+
+ memcpy (ss_end (token->string), s.string, pos + 1);
+ token->string.length += pos + 1;
+ ss_advance (&s, pos + 2);
+ }
+
+ memcpy (ss_end (token->string), s.string, ss_length (s));
+ token->string.length += ss_length (s);
+
+ return true;
+}
+
+static bool
+scan_hex_string__ (struct substring s, struct token *token)
+{
+ uint8_t *dst;
+ size_t i;
+
+ /* Trim X' from front and ' from back. */
+ s.string += 2;
+ s.length -= 3;
+
+ if (s.length % 2 != 0)
+ {
+ token->type = SCAN_BAD_HEX_LENGTH;
+ token->number = s.length;
+ return false;
+ }
+
+ ss_realloc (&token->string, token->string.length + s.length / 2 + 1);
+ dst = CHAR_CAST (uint8_t *, ss_end (token->string));
+ token->string.length += s.length / 2;
+ for (i = 0; i < s.length; i += 2)
+ {
+ int hi = digit_value (s.string[i]);
+ int lo = digit_value (s.string[i + 1]);
+
+ if (hi >= 16 || lo >= 16)
+ {
+ token->type = SCAN_BAD_HEX_DIGIT;
+ token->number = s.string[hi >= 16 ? i : i + 1];
+ return false;
+ }
+
+ *dst++ = hi * 16 + lo;
+ }
+
+ return true;
+}
+
+static bool
+scan_unicode_string__ (struct substring s, struct token *token)
+{
+ uint8_t *dst;
+ ucs4_t uc;
+ size_t i;
+
+ /* Trim U' from front and ' from back. */
+ s.string += 2;
+ s.length -= 3;
+
+ if (s.length < 1 || s.length > 8)
+ {
+ token->type = SCAN_BAD_UNICODE_LENGTH;
+ token->number = s.length;
+ return 0;
+ }
+
+ ss_realloc (&token->string, token->string.length + 4 + 1);
+
+ uc = 0;
+ for (i = 0; i < s.length; i++)
+ {
+ int digit = digit_value (s.string[i]);
+ if (digit >= 16)
+ {
+ token->type = SCAN_BAD_UNICODE_DIGIT;
+ token->number = s.string[i];
+ return 0;
+ }
+ uc = uc * 16 + digit;
+ }
+
+ if ((uc >= 0xd800 && uc < 0xe000) || uc > 0x10ffff)
+ {
+ token->type = SCAN_BAD_UNICODE_CODE_POINT;
+ token->number = uc;
+ return 0;
+ }
+
+ dst = CHAR_CAST (uint8_t *, ss_end (token->string));
+ token->string.length += u8_uctomb (dst, uc, 4);
+
+ return true;
+}
+
+static enum scan_result
+scan_string_segment__ (struct scanner *scanner, enum segment_type type,
+ struct substring s, struct token *token)
+{
+ bool ok;
+
+ switch (type)
+ {
+ case SEG_QUOTED_STRING:
+ ok = scan_quoted_string__ (s, token);
+ break;
+
+ case SEG_HEX_STRING:
+ ok = scan_hex_string__ (s, token);
+ break;
+
+ case SEG_UNICODE_STRING:
+ ok = scan_unicode_string__ (s, token);
+ break;
+
+ default:
+ NOT_REACHED ();
+ }
+
+ if (ok)
+ {
+ token->type = T_STRING;
+ token->string.string[token->string.length] = '\0';
+ scanner->state = S_STRING;
+ scanner->substate = 0;
+ return SCAN_SAVE;
+ }
+ else
+ {
+ /* The function we called above should have filled in token->type and
+ token->number properly to describe the error. */
+ ss_dealloc (&token->string);
+ token->string = ss_empty ();
+ return SCAN_DONE;
+ }
+
+}
+
+static enum scan_result
+add_bit (struct scanner *scanner, unsigned int bit)
+{
+ if (!(scanner->substate & bit))
+ {
+ scanner->substate |= bit;
+ return SCAN_MORE;
+ }
+ else
+ return SCAN_BACK;
+}
+
+static enum scan_result
+scan_string__ (struct scanner *scanner, enum segment_type type,
+ struct substring s, struct token *token)
+{
+ switch (type)
+ {
+ case SEG_SPACES:
+ case SEG_COMMENT:
+ return SCAN_MORE;
+
+ case SEG_NEWLINE:
+ if (scanner->substate & SS_PLUS)
+ return add_bit (scanner, SS_NL_AFTER_PLUS);
+ else
+ return add_bit (scanner, SS_NL_BEFORE_PLUS);
+
+ case SEG_PUNCT:
+ return (s.length == 1 && s.string[0] == '+'
+ ? add_bit (scanner, SS_PLUS)
+ : SCAN_BACK);
+
+ case SEG_QUOTED_STRING:
+ case SEG_HEX_STRING:
+ case SEG_UNICODE_STRING:
+ return (scanner->substate & SS_PLUS
+ ? scan_string_segment__ (scanner, type, s, token)
+ : SCAN_BACK);
+
+ default:
+ return SCAN_BACK;
+ }
+}
+
+static enum token_type
+scan_reserved_word__ (struct substring word)
+{
+ switch (c_toupper (word.string[0]))
+ {
+ case 'B':
+ return T_BY;
+
+ case 'E':
+ return T_EQ;
+
+ case 'G':
+ return c_toupper (word.string[1]) == 'E' ? T_GE : T_GT;
+
+ case 'L':
+ return c_toupper (word.string[1]) == 'E' ? T_LE : T_LT;
+
+ case 'N':
+ return word.length == 2 ? T_NE : T_NOT;
+
+ case 'O':
+ return T_OR;
+
+ case 'T':
+ return T_TO;
+
+ case 'A':
+ return c_toupper (word.string[1]) == 'L' ? T_ALL : T_AND;
+
+ case 'W':
+ return T_WITH;
+ }
+
+ NOT_REACHED ();
+}
+
+static enum token_type
+scan_punct1__ (char c0)
+{
+ switch (c0)
+ {
+ case '(': return T_LPAREN;
+ case ')': return T_RPAREN;
+ case ',': return T_COMMA;
+ case '=': return T_EQUALS;
+ case '-': return T_DASH;
+ case '[': return T_LBRACK;
+ case ']': return T_RBRACK;
+ case '&': return T_AND;
+ case '|': return T_OR;
+ case '+': return T_PLUS;
+ case '/': return T_SLASH;
+ case '*': return T_ASTERISK;
+ case '<': return T_LT;
+ case '>': return T_GT;
+ case '~': return T_NOT;
+ }
+
+ NOT_REACHED ();
+}
+
+static enum token_type
+scan_punct2__ (char c0, char c1)
+{
+ switch (c0)
+ {
+ case '*':
+ return T_EXP;
+
+ case '<':
+ return c1 == '=' ? T_LE : T_NE;
+
+ case '>':
+ return T_GE;
+
+ case '~':
+ return T_NE;
+
+ case '&':
+ return T_AND;
+
+ case '|':
+ return T_OR;
+ }
+
+ NOT_REACHED ();
+}
+
+static enum token_type
+scan_punct__ (struct substring s)
+{
+ return (s.length == 1
+ ? scan_punct1__ (s.string[0])
+ : scan_punct2__ (s.string[0], s.string[1]));
+}
+
+static double
+scan_number__ (struct substring s)
+{
+ char buf[128];
+ double number;
+ char *p;
+
+ if (s.length < sizeof buf)
+ {
+ p = buf;
+ memcpy (buf, s.string, s.length);
+ buf[s.length] = '\0';
+ }
+ else
+ p = xmemdup0 (s.string, s.length);
+
+ number = strtod (p, NULL);
+
+ if (p != buf)
+ free (p);
+
+ return number;
+}
+
+static enum scan_result
+scan_unexpected_char (const struct substring *s, struct token *token)
+{
+ ucs4_t uc;
+
+ token->type = SCAN_UNEXPECTED_CHAR;
+ u8_mbtouc (&uc, CHAR_CAST (const uint8_t *, s->string), s->length);
+ token->number = uc;
+
+ return SCAN_DONE;
+}
+
+const char *
+scan_type_to_string (enum scan_type type)
+{
+ switch (type)
+ {
+#define SCAN_TYPE(NAME) case SCAN_##NAME: return #NAME;
+ SCAN_TYPES
+#undef SCAN_TYPE
+
+ default:
+ return token_type_to_name (type);
+ }
+}
+
+bool
+is_scan_type (enum scan_type type)
+{
+ return type > SCAN_FIRST && type < SCAN_LAST;
+}
+
+static enum scan_result
+scan_start__ (struct scanner *scanner, enum segment_type type,
+ struct substring s, struct token *token)
+{
+ switch (type)
+ {
+ case SEG_NUMBER:
+ token->type = T_POS_NUM;
+ token->number = scan_number__ (s);
+ return SCAN_DONE;
+
+ case SEG_QUOTED_STRING:
+ case SEG_HEX_STRING:
+ case SEG_UNICODE_STRING:
+ return scan_string_segment__ (scanner, type, s, token);
+
+ case SEG_UNQUOTED_STRING:
+ case SEG_DO_REPEAT_COMMAND:
+ case SEG_INLINE_DATA:
+ case SEG_DOCUMENT:
+ token->type = T_STRING;
+ ss_alloc_substring (&token->string, s);
+ return SCAN_DONE;
+
+ case SEG_RESERVED_WORD:
+ token->type = scan_reserved_word__ (s);
+ return SCAN_DONE;
+
+ case SEG_IDENTIFIER:
+ token->type = T_ID;
+ ss_alloc_substring (&token->string, s);
+ return SCAN_DONE;
+
+ case SEG_PUNCT:
+ if (s.length == 1 && s.string[0] == '-')
+ {
+ scanner->state = S_DASH;
+ return SCAN_SAVE;
+ }
+ else
+ {
+ token->type = scan_punct__ (s);
+ return SCAN_DONE;
+ }
+
+ case SEG_SHBANG:
+ case SEG_SPACES:
+ case SEG_COMMENT:
+ case SEG_NEWLINE:
+ case SEG_COMMENT_COMMAND:
+ token->type = SCAN_SKIP;
+ return SCAN_DONE;
+
+ case SEG_START_DOCUMENT:
+ token->type = T_ID;
+ ss_alloc_substring (&token->string, ss_cstr ("DOCUMENT"));
+ return SCAN_DONE;
+
+ case SEG_START_COMMAND:
+ case SEG_SEPARATE_COMMANDS:
+ case SEG_END_COMMAND:
+ token->type = T_ENDCMD;
+ return SCAN_DONE;
+
+ case SEG_END:
+ token->type = T_STOP;
+ return SCAN_DONE;
+
+ case SEG_EXPECTED_QUOTE:
+ token->type = SCAN_EXPECTED_QUOTE;
+ return SCAN_DONE;
+
+ case SEG_EXPECTED_EXPONENT:
+ token->type = SCAN_EXPECTED_EXPONENT;
+ ss_alloc_substring (&token->string, s);
+ return SCAN_DONE;
+
+ case SEG_UNEXPECTED_DOT:
+ token->type = SCAN_UNEXPECTED_DOT;
+ return SCAN_DONE;
+
+ case SEG_UNEXPECTED_CHAR:
+ return scan_unexpected_char (&s, token);
+
+ case SEG_N_TYPES:
+ NOT_REACHED ();
+ }
+
+ NOT_REACHED ();
+}
+
+static enum scan_result
+scan_dash__ (enum segment_type type, struct substring s, struct token *token)
+{
+ switch (type)
+ {
+ case SEG_SPACES:
+ case SEG_COMMENT:
+ return SCAN_MORE;
+
+ case SEG_NUMBER:
+ token->type = T_NEG_NUM;
+ token->number = -scan_number__ (s);
+ return SCAN_DONE;
+
+ default:
+ token->type = T_DASH;
+ return SCAN_BACK;
+ }
+}
+
+/* Initializes SCANNER for scanning a token from a sequence of segments.
+ Initializes TOKEN as the output token. (The client retains ownership of
+ TOKEN, but it must be preserved across subsequent calls to scanner_push()
+ for SCANNER.)
+
+ A scanner only produces a single token. To obtain the next token,
+ re-initialize it by calling this function again.
+
+ A scanner does not contain any external references, so nothing needs to be
+ done to destroy one. For the same reason, scanners may be copied with plain
+ struct assignment (or memcpy). */
+void
+scanner_init (struct scanner *scanner, struct token *token)
+{
+ scanner->state = S_START;
+ token_init (token);
+}
+
+/* Adds the segment with type TYPE and UTF-8 text S to SCANNER. TOKEN must be
+ the same token passed to scanner_init() for SCANNER, or a copy of it.
+ scanner_push() may modify TOKEN. The client retains ownership of TOKEN,
+
+ The possible return values are:
+
+ - SCAN_DONE: All of the segments that have been passed to scanner_push()
+ form the token now stored in TOKEN. SCANNER is now "used up" and must
+ be reinitialized with scanner_init() if it is to be used again.
+
+ Most tokens only consist of a single segment, so this is the most common
+ return value.
+
+ - SCAN_MORE: The segments passed to scanner_push() don't yet determine a
+ token. The caller should call scanner_push() again with the next token.
+ (This won't happen if TYPE is SEG_END indicating the end of input.)
+
+ - SCAN_SAVE: This is similar to SCAN_MORE, with one difference: the caller
+ needs to "save its place" in the stream of segments for a possible
+ future SCAN_BACK return. This value can be returned more than once in a
+ sequence of scanner_push() calls for SCANNER, but the caller only needs
+ to keep track of the most recent position.
+
+ - SCAN_BACK: This is similar to SCAN_DONE, but the token consists of only
+ the segments up to and including the segment for which SCAN_SAVE was
+ most recently returned. Segments following that one should be passed to
+ the next scanner to be initialized.
+*/
+enum scan_result
+scanner_push (struct scanner *scanner, enum segment_type type,
+ struct substring s, struct token *token)
+{
+ switch (scanner->state)
+ {
+ case S_START:
+ return scan_start__ (scanner, type, s, token);
+
+ case S_DASH:
+ return scan_dash__ (type, s, token);
+
+ case S_STRING:
+ return scan_string__ (scanner, type, s, token);
+ }
+
+ NOT_REACHED ();
+}
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2010, 2011 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef SCAN_H
+#define SCAN_H 1
+
+#include "language/lexer/segment.h"
+#include "libpspp/str.h"
+
+struct token;
+
+/* PSPP syntax scanning.
+
+ PSPP divides traditional "lexical analysis" or "tokenization" into two
+ phases: a lower-level phase called "segmentation" and a higher-level phase
+ called "scanning". segment.h provides declarations for the segmentation
+ phase. This header file contains declarations for the scanning phase.
+
+ Scanning accepts as input a stream of segments, which are UTF-8 strings each
+ labeled with a segment type. It outputs a stream of "scan tokens", which
+ are the same as the tokens used by the PSPP parser with a few additional
+ types.
+*/
+
+#define SCAN_TYPES \
+ SCAN_TYPE(BAD_HEX_LENGTH) \
+ SCAN_TYPE(BAD_HEX_DIGIT) \
+ \
+ SCAN_TYPE(BAD_UNICODE_LENGTH) \
+ SCAN_TYPE(BAD_UNICODE_DIGIT) \
+ SCAN_TYPE(BAD_UNICODE_CODE_POINT) \
+ \
+ SCAN_TYPE(EXPECTED_QUOTE) \
+ SCAN_TYPE(EXPECTED_EXPONENT) \
+ SCAN_TYPE(UNEXPECTED_DOT) \
+ SCAN_TYPE(UNEXPECTED_CHAR) \
+ \
+ SCAN_TYPE(SKIP)
+
+/* Types of scan tokens.
+
+ Scan token types are a superset of enum token_type. Only the additional
+ scan token types are defined here, so see the definition of enum token_type
+ for the others. */
+enum scan_type
+ {
+#define SCAN_TYPE(TYPE) SCAN_##TYPE,
+ SCAN_FIRST = 255,
+ SCAN_TYPES
+ SCAN_LAST
+#undef SCAN_TYPE
+ };
+
+const char *scan_type_to_string (enum scan_type);
+bool is_scan_type (enum scan_type);
+
+/* A scanner. Opaque. */
+struct scanner
+ {
+ unsigned char state;
+ unsigned char substate;
+ };
+
+/* scanner_push() return type. */
+enum scan_result
+ {
+ /* Complete token. */
+ SCAN_DONE, /* Token successfully scanned. */
+ SCAN_MORE, /* More segments needed to scan token. */
+
+ /* Incomplete token. */
+ SCAN_BACK, /* Done, but go back to saved position too. */
+ SCAN_SAVE /* Need more segments, and save position. */
+ };
+
+void scanner_init (struct scanner *, struct token *);
+enum scan_result scanner_push (struct scanner *, enum segment_type,
+ struct substring, struct token *);
+
+#endif /* scan.h */
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2010, 2011 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include "language/lexer/token.h"
+
+#include <math.h>
+#include <unictype.h>
+#include <unistr.h>
+
+#include "data/identifier.h"
+#include "libpspp/assertion.h"
+#include "libpspp/cast.h"
+
+#include "gl/ftoastr.h"
+#include "gl/xalloc.h"
+
+/* Initializes TOKEN with an arbitrary type, number 0, and a null string. */
+void
+token_init (struct token *token)
+{
+ token->type = 0;
+ token->number = 0.0;
+ token->string = ss_empty ();
+}
+
+/* Frees the string that TOKEN contains. */
+void
+token_destroy (struct token *token)
+{
+ if (token != NULL)
+ ss_dealloc (&token->string);
+}
+
+static char *
+number_token_to_string (const struct token *token)
+{
+ char buffer[DBL_BUFSIZE_BOUND];
+
+ dtoastr (buffer, sizeof buffer, 0, 0, fabs (token->number));
+ return (token->type == T_POS_NUM
+ ? xstrdup (buffer)
+ : xasprintf ("-%s", buffer));
+}
+
+static char *
+quoted_string_representation (struct substring ss, size_t n_quotes)
+{
+ char *rep;
+ size_t i;
+ char *p;
+
+ p = rep = xmalloc (1 + ss.length + n_quotes + 1 + 1);
+ *p++ = '\'';
+ for (i = 0; i < ss.length; i++)
+ {
+ uint8_t c = ss.string[i];
+ if (c == '\'')
+ *p++ = c;
+ *p++ = c;
+ }
+ *p++ = '\'';
+ *p = '\0';
+
+ return rep;
+}
+
+static char *
+hex_string_representation (struct substring ss)
+{
+ char *rep;
+ size_t i;
+ char *p;
+
+ p = rep = xmalloc (2 + 2 * ss.length + 1 + 1);
+ *p++ = 'X';
+ *p++ = '\'';
+ for (i = 0; i < ss.length; i++)
+ {
+ static const char hex_digits[] = "0123456789abcdef";
+ uint8_t c = ss.string[i];
+ *p++ = hex_digits[c >> 4];
+ *p++ = hex_digits[c & 15];
+ }
+ *p++ = '\'';
+ *p = '\0';
+
+ return rep;
+}
+
+static char *
+string_representation (struct substring ss)
+{
+ size_t n_quotes;
+ size_t ofs;
+ int mblen;
+
+ n_quotes = 0;
+ for (ofs = 0; ofs < ss.length; ofs += mblen)
+ {
+ ucs4_t uc;
+
+ mblen = u8_mbtoucr (&uc,
+ CHAR_CAST (const uint8_t *, ss.string + ofs),
+ ss.length - ofs);
+ if (mblen < 0 || !uc_is_print (uc))
+ return hex_string_representation (ss);
+ else if (uc == '\'')
+ n_quotes++;
+ }
+ return quoted_string_representation (ss, n_quotes);
+}
+
+/* Returns a UTF-8 string that would yield TOKEN if it appeared in a syntax
+ file. The caller should free the returned string, with free(), when it is
+ no longer needed.
+
+ The T_STOP token has no representation, so this function returns NULL. */
+char *
+token_to_string (const struct token *token)
+{
+ const char *name;
+
+ switch (token->type)
+ {
+ case T_POS_NUM:
+ case T_NEG_NUM:
+ return number_token_to_string (token);
+
+ case T_ID:
+ return ss_xstrdup (token->string);
+
+ case T_STRING:
+ return string_representation (token->string);
+
+ default:
+ name = token_type_to_name (token->type);
+ return name != NULL ? xstrdup (name) : NULL;
+ }
+}
+
+/* Prints TOKEN on STREAM, for debugging. */
+void
+token_print (const struct token *token, FILE *stream)
+{
+ fputs (token_type_to_name (token->type), stream);
+ if (token->type == T_POS_NUM || token->type == T_NEG_NUM
+ || token->number != 0.0)
+ {
+ char s[DBL_BUFSIZE_BOUND];
+
+ dtoastr (s, sizeof s, 0, 0, token->number);
+ fprintf (stream, "\t%s", s);
+ }
+ if (token->type == T_ID || token->type == T_STRING || token->string.length)
+ fprintf (stream, "\t\"%.*s\"",
+ (int) token->string.length, token->string.string);
+ putc ('\n', stream);
+}
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2010, 2011 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef TOKEN_H
+#define TOKEN_H 1
+
+#include <stdio.h>
+#include "libpspp/str.h"
+#include "data/identifier.h"
+
+/* A PSPP syntax token.
+
+ The 'type' member is used by the scanner (see scan.h) for SCAN_* values as
+ well, which is why it is not declared as type "enum token_type". */
+struct token
+ {
+ int type; /* Usually a "enum token_type" value. */
+ double number;
+ struct substring string;
+ };
+
+#define TOKEN_INITIALIZER(TYPE, NUMBER, STRING) \
+ { TYPE, NUMBER, SS_LITERAL_INITIALIZER (STRING) }
+
+void token_init (struct token *);
+void token_destroy (struct token *);
+
+char *token_to_string (const struct token *);
+
+void token_print (const struct token *, FILE *);
+
+#endif /* token.h */
tests/data/sack \
tests/data/inexactify \
tests/language/lexer/command-name-test \
+ tests/language/lexer/scan-test \
tests/language/lexer/segment-test \
tests/libpspp/abt-test \
tests/libpspp/bt-test \
$(LIBINTL)
tests_language_lexer_command_name_test_CFLAGS = $(AM_CFLAGS)
+check_PROGRAMS += tests/language/lexer/scan-test
+tests_language_lexer_scan_test_SOURCES = \
+ src/data/identifier.c \
+ src/language/lexer/command-name.c \
+ src/language/lexer/scan.c \
+ src/language/lexer/segment.c \
+ src/language/lexer/token.c \
+ src/libpspp/pool.c \
+ src/libpspp/prompt.c \
+ src/libpspp/str.c \
+ src/libpspp/temp-file.c \
+ tests/language/lexer/scan-test.c
+tests_language_lexer_scan_test_LDADD = gl/libgl.la $(LIBINTL)
+tests_language_lexer_scan_test_CFLAGS = $(AM_CFLAGS)
check_PROGRAMS += tests/language/lexer/segment-test
tests_language_lexer_segment_test_SOURCES = \
tests/language/lexer/command-name.at \
tests/language/lexer/lexer.at \
tests/language/lexer/q2c.at \
+ tests/language/lexer/scan.at \
tests/language/lexer/segment.at \
tests/language/lexer/variable-parser.at \
tests/language/stats/aggregate.at \
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2010, 2011 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "libpspp/assertion.h"
+#include "libpspp/compiler.h"
+#include "libpspp/misc.h"
+#include "language/lexer/scan.h"
+#include "language/lexer/token.h"
+
+#include "gl/error.h"
+#include "gl/ftoastr.h"
+#include "gl/progname.h"
+#include "gl/read-file.h"
+#include "gl/xalloc.h"
+
+/* -a/--auto, -b/--batch, -i/--interactive: syntax mode. */
+static enum segmenter_mode mode = SEG_MODE_AUTO;
+
+static const char *parse_options (int argc, char **argv);
+static void usage (void) NO_RETURN;
+
+int
+main (int argc, char *argv[])
+{
+ struct segment
+ {
+ enum segment_type type;
+ struct substring string;
+ };
+
+ size_t offset;
+ const char *file_name;
+ char *input;
+ struct segmenter s;
+ struct segment *segs;
+ size_t n_segs, allocated_segs;
+ size_t length;
+ size_t i;
+ int n;
+
+ set_program_name (argv[0]);
+ file_name = parse_options (argc, argv);
+
+ /* Read from stdin into 'input'. Ensure that 'input' ends in a new-line
+ followed by a null byte. */
+ input = (!strcmp (file_name, "-")
+ ? fread_file (stdin, &length)
+ : read_file (file_name, &length));
+ if (input == NULL)
+ error (EXIT_FAILURE, errno, "reading %s failed", file_name);
+ input = xrealloc (input, length + 3);
+ if (length == 0 || input[length - 1] != '\n')
+ input[length++] = '\n';
+ input[length++] = '\0';
+
+ segs = NULL;
+ n_segs = allocated_segs = 0;
+
+ segmenter_init (&s, mode);
+ for (offset = 0; offset < length; offset += n)
+ {
+ enum segment_type type;
+
+ n = segmenter_push (&s, input + offset, length - offset, &type);
+ assert (n >= 0);
+ assert (offset + n <= length);
+
+ if (n_segs >= allocated_segs)
+ segs = x2nrealloc (segs, &allocated_segs, sizeof *segs);
+
+ segs[n_segs].type = type;
+ segs[n_segs].string.string = input + offset;
+ segs[n_segs].string.length = n;
+ n_segs++;
+ }
+
+ for (i = 0; i < n_segs; )
+ {
+ enum scan_result result;
+ struct scanner scanner;
+ struct token token;
+ int saved = -1;
+
+ scanner_init (&scanner, &token);
+ do
+ {
+ struct segment *seg;
+
+ assert (i < n_segs);
+
+ seg = &segs[i++];
+ result = scanner_push (&scanner, seg->type, seg->string, &token);
+ if (result == SCAN_SAVE)
+ saved = i;
+ }
+ while (result == SCAN_MORE || result == SCAN_SAVE);
+
+ if (result == SCAN_BACK)
+ {
+ assert (saved >= 0);
+ i = saved;
+ }
+
+ printf ("%s", scan_type_to_string (token.type));
+ if (token.number != 0.0)
+ {
+ char s[DBL_BUFSIZE_BOUND];
+
+ dtoastr (s, sizeof s, 0, 0, token.number);
+ printf (" %s", s);
+ }
+ if (token.string.string != NULL || token.string.length > 0)
+ printf (" \"%.*s\"", (int) token.string.length, token.string.string);
+ printf ("\n");
+
+ token_destroy (&token);
+ }
+
+ free (input);
+
+ return 0;
+}
+
+static const char *
+parse_options (int argc, char **argv)
+{
+ for (;;)
+ {
+ static const struct option options[] =
+ {
+ {"auto", no_argument, NULL, 'a'},
+ {"batch", no_argument, NULL, 'b'},
+ {"interactive", no_argument, NULL, 'i'},
+ {"help", no_argument, NULL, 'h'},
+ {NULL, 0, NULL, 0},
+ };
+
+ int c = getopt_long (argc, argv, "abih", options, NULL);
+ if (c == -1)
+ break;
+
+ switch (c)
+ {
+ case 'a':
+ mode = SEG_MODE_AUTO;
+ break;
+
+ case 'b':
+ mode = SEG_MODE_BATCH;
+ break;
+
+ case 'i':
+ mode = SEG_MODE_INTERACTIVE;
+ break;
+
+ case 'h':
+ usage ();
+
+ case 0:
+ break;
+
+ case '?':
+ exit (EXIT_FAILURE);
+ break;
+
+ default:
+ NOT_REACHED ();
+ }
+
+ }
+
+ if (optind + 1 != argc)
+ error (1, 0, "exactly one non-option argument required; "
+ "use --help for help");
+ return argv[optind];
+}
+
+static void
+usage (void)
+{
+ printf ("\
+%s, to test breaking PSPP syntax into tokens\n\
+usage: %s [OPTIONS] INPUT\n\
+\n\
+Options:\n\
+ -1, --one-segment feed one segment at a time\n\
+ -a, --auto use \"auto\" syntax mode\n\
+ -b, --batch use \"batch\" syntax mode\n\
+ -i, --interactive use \"interactive\" syntax mode (default)\n\
+ -v, --verbose include rows and column numbers in output\n\
+ -h, --help print this help message\n",
+ program_name, program_name);
+ exit (EXIT_SUCCESS);
+}
--- /dev/null
+AT_BANNER([syntax scanning])
+m4_define([PSPP_CHECK_SCAN],
+ [AT_CHECK([scan-test $1 input], [0], [expout])])
+\f
+AT_SETUP([identifiers])
+AT_KEYWORDS([scan])
+AT_DATA([input], [dnl
+a aB i5 $x @efg @@. #.# .x _z.
+abcd. abcd.
+QRSTUV./* end of line comment */
+QrStUv./* end of line comment */ @&t@
+WXYZ. /* unterminated end of line comment
+�. /* U+FFFD is not valid in an identifier
+])
+AT_DATA([expout], [dnl
+ID "a"
+SKIP
+ID "aB"
+SKIP
+ID "i5"
+SKIP
+ID "$x"
+SKIP
+ID "@efg"
+SKIP
+ID "@@."
+SKIP
+ID "#.#"
+SKIP
+UNEXPECTED_DOT
+ID "x"
+SKIP
+UNEXPECTED_CHAR 95
+ID "z"
+ENDCMD
+SKIP
+ID "abcd."
+SKIP
+ID "abcd"
+ENDCMD
+SKIP
+ID "QRSTUV"
+ENDCMD
+SKIP
+SKIP
+ID "QrStUv"
+ENDCMD
+SKIP
+SKIP
+SKIP
+ID "WXYZ"
+ENDCMD
+SKIP
+SKIP
+SKIP
+UNEXPECTED_CHAR 65533
+ENDCMD
+SKIP
+SKIP
+SKIP
+STOP
+])
+PSPP_CHECK_SCAN([-i])
+AT_CLEANUP
+\f
+AT_SETUP([reserved words])
+AT_KEYWORDS([scan])
+AT_DATA([input], [dnl
+and or not eq ge gt le lt ne all by to with
+AND OR NOT EQ GE GT LE LT NE ALL BY TO WITH
+andx orx notx eqx gex gtx lex ltx nex allx byx tox withx
+and. with.
+])
+AT_DATA([expout], [dnl
+AND
+SKIP
+OR
+SKIP
+NOT
+SKIP
+EQ
+SKIP
+GE
+SKIP
+GT
+SKIP
+LE
+SKIP
+LT
+SKIP
+NE
+SKIP
+ALL
+SKIP
+BY
+SKIP
+TO
+SKIP
+WITH
+SKIP
+AND
+SKIP
+OR
+SKIP
+NOT
+SKIP
+EQ
+SKIP
+GE
+SKIP
+GT
+SKIP
+LE
+SKIP
+LT
+SKIP
+NE
+SKIP
+ALL
+SKIP
+BY
+SKIP
+TO
+SKIP
+WITH
+SKIP
+ID "andx"
+SKIP
+ID "orx"
+SKIP
+ID "notx"
+SKIP
+ID "eqx"
+SKIP
+ID "gex"
+SKIP
+ID "gtx"
+SKIP
+ID "lex"
+SKIP
+ID "ltx"
+SKIP
+ID "nex"
+SKIP
+ID "allx"
+SKIP
+ID "byx"
+SKIP
+ID "tox"
+SKIP
+ID "withx"
+SKIP
+ID "and."
+SKIP
+WITH
+ENDCMD
+SKIP
+STOP
+])
+PSPP_CHECK_SCAN([-i])
+AT_CLEANUP
+\f
+AT_SETUP([punctuation])
+AT_KEYWORDS([scan])
+AT_DATA([input], [dnl
+~ & | = >= > <= < ~= <> ( ) , - + * / [[ ]] **
+~&|=>=><=<~=<>(),-+*/[[]]**
+])
+AT_DATA([expout], [dnl
+NOT
+SKIP
+AND
+SKIP
+OR
+SKIP
+EQUALS
+SKIP
+GE
+SKIP
+GT
+SKIP
+LE
+SKIP
+LT
+SKIP
+NE
+SKIP
+NE
+SKIP
+LPAREN
+SKIP
+RPAREN
+SKIP
+COMMA
+SKIP
+DASH
+SKIP
+PLUS
+SKIP
+ASTERISK
+SKIP
+SLASH
+SKIP
+LBRACK
+SKIP
+RBRACK
+SKIP
+EXP
+SKIP
+NOT
+AND
+OR
+EQUALS
+GE
+GT
+LE
+LT
+NE
+NE
+LPAREN
+RPAREN
+COMMA
+DASH
+PLUS
+ASTERISK
+SLASH
+LBRACK
+RBRACK
+EXP
+SKIP
+STOP
+])
+PSPP_CHECK_SCAN([-i])
+AT_CLEANUP
+\f
+AT_SETUP([numbers])
+AT_KEYWORDS([scan])
+AT_DATA([input], [dnl
+0 1 01 001. 1.
+123. /* comment 1 */ /* comment 2 */
+.1 0.1 00.1 00.10
+5e1 6E-1 7e+1 6E+01 6e-03
+.3E1 .4e-1 .5E+1 .6e+01 .7E-03
+1.23e1 45.6E-1 78.9e+1 99.9E+01 11.2e-03
+. 1e e1 1e+ 1e-
+])
+AT_DATA([expout], [dnl
+POS_NUM
+SKIP
+POS_NUM 1
+SKIP
+POS_NUM 1
+SKIP
+POS_NUM 1
+SKIP
+POS_NUM 1
+ENDCMD
+SKIP
+POS_NUM 123
+ENDCMD
+SKIP
+SKIP
+SKIP
+SKIP
+SKIP
+ENDCMD
+POS_NUM 1
+SKIP
+POS_NUM 0.1
+SKIP
+POS_NUM 0.1
+SKIP
+POS_NUM 0.1
+SKIP
+POS_NUM 50
+SKIP
+POS_NUM 0.6
+SKIP
+POS_NUM 70
+SKIP
+POS_NUM 60
+SKIP
+POS_NUM 0.006
+SKIP
+ENDCMD
+POS_NUM 30
+SKIP
+POS_NUM 0.04
+SKIP
+POS_NUM 5
+SKIP
+POS_NUM 6
+SKIP
+POS_NUM 0.0007
+SKIP
+POS_NUM 12.3
+SKIP
+POS_NUM 4.56
+SKIP
+POS_NUM 789
+SKIP
+POS_NUM 999
+SKIP
+POS_NUM 0.0112
+SKIP
+ENDCMD
+SKIP
+EXPECTED_EXPONENT "1e"
+SKIP
+ID "e1"
+SKIP
+EXPECTED_EXPONENT "1e+"
+SKIP
+EXPECTED_EXPONENT "1e-"
+SKIP
+STOP
+])
+PSPP_CHECK_SCAN([-i])
+AT_CLEANUP
+\f
+AT_SETUP([strings])
+AT_KEYWORDS([scan])
+AT_DATA([input], [dnl
+'x' "y" 'abc'
+'Don''t' "Can't" 'Won''t'
+"""quoted""" '"quoted"'
+'' "" '''' """"
+'missing end quote
+"missing double quote
+'x' + "y"
++ 'z' +
+'a' /* abc */ + "b" /*
++ 'c' +/* */"d"/* */+'e'
+'foo'
++ /* special case: + in column 0 would ordinarily start a new command
+'bar'
+'foo'
+ +
+'bar'
+'foo'
++
+
+'bar'
+
++
+x"4142"+'5152'
+"4142"+
+x'5152'
+x"4142"
++u'304a'
+"�あいうえお"
+"abc"+U"FFFD"+u'3048'+"xyz"
+])
+AT_DATA([expout], [dnl
+STRING "x"
+SKIP
+STRING "y"
+SKIP
+STRING "abc"
+SKIP
+STRING "Don't"
+SKIP
+STRING "Can't"
+SKIP
+STRING "Won't"
+SKIP
+STRING ""quoted""
+SKIP
+STRING ""quoted""
+SKIP
+STRING ""
+SKIP
+STRING ""
+SKIP
+STRING "'"
+SKIP
+STRING """
+SKIP
+EXPECTED_QUOTE
+SKIP
+EXPECTED_QUOTE
+SKIP
+STRING "xyzabcde"
+SKIP
+STRING "foobar"
+SKIP
+STRING "foobar"
+SKIP
+STRING "foo"
+SKIP
+PLUS
+SKIP
+ENDCMD
+SKIP
+STRING "bar"
+SKIP
+ENDCMD
+SKIP
+PLUS
+SKIP
+STRING "AB5152"
+SKIP
+STRING "4142QR"
+SKIP
+STRING "ABお"
+SKIP
+STRING "�あいうえお"
+SKIP
+STRING "abc�えxyz"
+SKIP
+STOP
+])
+PSPP_CHECK_SCAN([-i])
+AT_CLEANUP
+\f
+AT_SETUP([@%:@! construct])
+AT_KEYWORDS([scan])
+AT_DATA([input], [dnl
+#! /usr/bin/pspp
+#! /usr/bin/pspp
+])
+AT_DATA([expout], [dnl
+SKIP
+SKIP
+ID "#"
+UNEXPECTED_CHAR 33
+SKIP
+SLASH
+ID "usr"
+SLASH
+ID "bin"
+SLASH
+ID "pspp"
+SKIP
+STOP
+])
+PSPP_CHECK_SCAN([-i])
+AT_CLEANUP
+\f
+AT_SETUP([* and COMMENT commands])
+AT_KEYWORDS([scan])
+AT_DATA([input], [dnl
+* Comment commands "don't
+have to contain valid tokens.
+
+** Check ambiguity with ** token.
+****************.
+
+comment keyword works too.
+COMM also.
+com is ambiguous with COMPUTE.
+
+ * Comment need not start at left margin.
+
+* Comment ends with blank line
+
+next command.
+
+])
+AT_DATA([expout], [dnl
+SKIP
+SKIP
+SKIP
+ENDCMD
+SKIP
+ENDCMD
+SKIP
+SKIP
+ENDCMD
+SKIP
+SKIP
+ENDCMD
+SKIP
+ENDCMD
+SKIP
+SKIP
+ENDCMD
+SKIP
+SKIP
+ENDCMD
+SKIP
+ID "com"
+SKIP
+ID "is"
+SKIP
+ID "ambiguous"
+SKIP
+WITH
+SKIP
+ID "COMPUTE"
+ENDCMD
+SKIP
+ENDCMD
+SKIP
+SKIP
+SKIP
+ENDCMD
+SKIP
+ENDCMD
+SKIP
+SKIP
+SKIP
+ENDCMD
+SKIP
+ID "next"
+SKIP
+ID "command"
+ENDCMD
+SKIP
+ENDCMD
+SKIP
+STOP
+])
+PSPP_CHECK_SCAN([-i])
+AT_CLEANUP
+\f
+AT_SETUP([DOCUMENT command])
+AT_KEYWORDS([scan])
+AT_DATA([input], [dnl
+DOCUMENT one line.
+DOC more
+ than
+ one
+ line.
+docu
+first.paragraph
+isn't parsed as tokens
+
+second paragraph.
+])
+AT_DATA([expout], [dnl
+ID "DOCUMENT"
+STRING "DOCUMENT one line."
+ENDCMD
+ENDCMD
+SKIP
+ID "DOCUMENT"
+STRING "DOC more"
+SKIP
+STRING " than"
+SKIP
+STRING " one"
+SKIP
+STRING " line."
+ENDCMD
+ENDCMD
+SKIP
+ID "DOCUMENT"
+STRING "docu"
+SKIP
+STRING "first.paragraph"
+SKIP
+STRING "isn't parsed as tokens"
+SKIP
+STRING ""
+SKIP
+STRING "second paragraph."
+ENDCMD
+ENDCMD
+SKIP
+STOP
+])
+PSPP_CHECK_SCAN([-i])
+AT_CLEANUP
+\f
+AT_SETUP([TITLE, SUBTITLE, FILE LABEL commands])
+AT_KEYWORDS([scan])
+AT_DATA([input], [dnl
+title/**/'Quoted string title'.
+tit /*
+"Quoted string on second line".
+sub "Quoted string subtitle"
+ .
+
+TITL /* Not a */ quoted string title.
+SUBT Not a quoted string /* subtitle
+
+FIL label isn't quoted.
+FILE
+ lab 'is quoted'.
+FILE /*
+/**/ lab not quoted here either
+
+])
+AT_DATA([expout], [dnl
+ID "title"
+SKIP
+STRING "Quoted string title"
+ENDCMD
+SKIP
+ID "tit"
+SKIP
+SKIP
+SKIP
+STRING "Quoted string on second line"
+ENDCMD
+SKIP
+ID "sub"
+SKIP
+STRING "Quoted string subtitle"
+SKIP
+SKIP
+ENDCMD
+SKIP
+ENDCMD
+SKIP
+ID "TITL"
+SKIP
+STRING "/* Not a */ quoted string title"
+ENDCMD
+SKIP
+ID "SUBT"
+SKIP
+STRING "Not a quoted string /* subtitle"
+SKIP
+ENDCMD
+SKIP
+ID "FIL"
+SKIP
+ID "label"
+SKIP
+STRING "isn't quoted"
+ENDCMD
+SKIP
+ID "FILE"
+SKIP
+SKIP
+ID "lab"
+SKIP
+STRING "is quoted"
+ENDCMD
+SKIP
+ID "FILE"
+SKIP
+SKIP
+SKIP
+SKIP
+SKIP
+ID "lab"
+SKIP
+STRING "not quoted here either"
+SKIP
+ENDCMD
+SKIP
+STOP
+])
+PSPP_CHECK_SCAN([-i])
+AT_CLEANUP
+\f
+AT_SETUP([BEGIN DATA command])
+AT_KEYWORDS([scan])
+AT_DATA([input], [dnl
+begin data.
+123
+xxx
+end data.
+
+BEG /**/ DAT /*
+5 6 7 /* x
+
+end data
+end data
+.
+])
+AT_DATA([expout], [dnl
+ID "begin"
+SKIP
+ID "data"
+ENDCMD
+SKIP
+STRING "123"
+SKIP
+STRING "xxx"
+SKIP
+ID "end"
+SKIP
+ID "data"
+ENDCMD
+SKIP
+ENDCMD
+SKIP
+ID "BEG"
+SKIP
+SKIP
+SKIP
+ID "DAT"
+SKIP
+SKIP
+SKIP
+STRING "5 6 7 /* x"
+SKIP
+STRING ""
+SKIP
+STRING "end data"
+SKIP
+ID "end"
+SKIP
+ID "data"
+SKIP
+ENDCMD
+SKIP
+STOP
+])
+PSPP_CHECK_SCAN([-i])
+AT_CLEANUP
+\f
+AT_SETUP([DO REPEAT command])
+AT_KEYWORDS([scan])
+AT_DATA([input], [dnl
+do repeat x=a b c
+ y=d e f.
+ do repeat a=1 thru 5.
+another command.
+second command
++ third command.
+end /* x */ /* y */ repeat print.
+end
+ repeat.
+])
+AT_DATA([expout], [dnl
+ID "do"
+SKIP
+ID "repeat"
+SKIP
+ID "x"
+EQUALS
+ID "a"
+SKIP
+ID "b"
+SKIP
+ID "c"
+SKIP
+SKIP
+ID "y"
+EQUALS
+ID "d"
+SKIP
+ID "e"
+SKIP
+ID "f"
+ENDCMD
+SKIP
+STRING " do repeat a=1 thru 5."
+SKIP
+STRING "another command."
+SKIP
+STRING "second command"
+SKIP
+STRING "+ third command."
+SKIP
+STRING "end /* x */ /* y */ repeat print."
+SKIP
+ID "end"
+SKIP
+SKIP
+ID "repeat"
+ENDCMD
+SKIP
+STOP
+])
+PSPP_CHECK_SCAN([-i])
+AT_CLEANUP
+\f
+AT_SETUP([batch mode])
+AT_KEYWORDS([scan])
+AT_DATA([input], [dnl
+first command
+ another line of first command
++ second command
+third command
+
+fourth command.
+ fifth command.
+])
+AT_DATA([expout], [dnl
+ID "first"
+SKIP
+ID "command"
+SKIP
+SKIP
+ID "another"
+SKIP
+ID "line"
+SKIP
+ID "of"
+SKIP
+ID "first"
+SKIP
+ID "command"
+SKIP
+ENDCMD
+SKIP
+ID "second"
+SKIP
+ID "command"
+SKIP
+ENDCMD
+ID "third"
+SKIP
+ID "command"
+SKIP
+ENDCMD
+SKIP
+ID "fourth"
+SKIP
+ID "command"
+ENDCMD
+SKIP
+SKIP
+ID "fifth"
+SKIP
+ID "command"
+ENDCMD
+SKIP
+STOP
+])
+PSPP_CHECK_SCAN([-b])
+AT_CLEANUP