1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2010, 2011, 2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20 #include "language/lexer/segment.h"
21 #include "libpspp/str.h"
25 /* PSPP syntax scanning.
27 PSPP divides traditional "lexical analysis" or "tokenization" into two
28 phases: a lower-level phase called "segmentation" and a higher-level phase
29 called "scanning". segment.h provides declarations for the segmentation
30 phase. This header file contains declarations for the scanning phase.
32 Scanning accepts as input a stream of segments, which are UTF-8 strings each
33 labeled with a segment type. It outputs a stream of "scan tokens", which
34 are the same as the tokens used by the PSPP parser with a few additional
39 SCAN_TYPE(BAD_HEX_LENGTH) \
40 SCAN_TYPE(BAD_HEX_DIGIT) \
42 SCAN_TYPE(BAD_UNICODE_LENGTH) \
43 SCAN_TYPE(BAD_UNICODE_DIGIT) \
44 SCAN_TYPE(BAD_UNICODE_CODE_POINT) \
46 SCAN_TYPE(EXPECTED_QUOTE) \
47 SCAN_TYPE(EXPECTED_EXPONENT) \
48 SCAN_TYPE(UNEXPECTED_DOT) \
49 SCAN_TYPE(UNEXPECTED_CHAR) \
53 /* Types of scan tokens.
55 Scan token types are a superset of enum token_type. Only the additional
56 scan token types are defined here, so see the definition of enum token_type
60 #define SCAN_TYPE(TYPE) SCAN_##TYPE,
67 const char *scan_type_to_string (enum scan_type);
68 bool is_scan_type (enum scan_type);
70 /* A scanner. Opaque. */
74 unsigned char substate;
77 /* scanner_push() return type. */
81 SCAN_DONE, /* Token successfully scanned. */
82 SCAN_MORE, /* More segments needed to scan token. */
84 /* Incomplete token. */
85 SCAN_BACK, /* Done, but go back to saved position too. */
86 SCAN_SAVE /* Need more segments, and save position. */
89 void scanner_init (struct scanner *, struct token *);
90 enum scan_result scanner_push (struct scanner *, enum segment_type,
91 struct substring, struct token *);
93 /* A simplified lexer for handling syntax in a string. */
100 struct segmenter segmenter;
103 void string_lexer_init (struct string_lexer *, const char *input,
104 size_t length, enum segmenter_mode);
105 bool string_lexer_next (struct string_lexer *, struct token *);