X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Flexer%2Fscan.h;h=059d708175fef01bc1383b6d72a00057c8d674c2;hb=25a6f95a0cbe4a670b27943901b3af2cefc3e435;hp=fdb50801c44c34b8fdeae4181a705d3ad4ea5651;hpb=fe8dc2171009e90d2335f159d05f7e6660e24780;p=pspp diff --git a/src/language/lexer/scan.h b/src/language/lexer/scan.h index fdb50801c4..059d708175 100644 --- a/src/language/lexer/scan.h +++ b/src/language/lexer/scan.h @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 2010, 2011, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -35,59 +35,44 @@ struct token; types. */ -#define SCAN_TYPES \ - SCAN_TYPE(BAD_HEX_LENGTH) \ - SCAN_TYPE(BAD_HEX_DIGIT) \ - \ - SCAN_TYPE(BAD_UNICODE_LENGTH) \ - SCAN_TYPE(BAD_UNICODE_DIGIT) \ - SCAN_TYPE(BAD_UNICODE_CODE_POINT) \ - \ - SCAN_TYPE(EXPECTED_QUOTE) \ - SCAN_TYPE(EXPECTED_EXPONENT) \ - SCAN_TYPE(UNEXPECTED_DOT) \ - SCAN_TYPE(UNEXPECTED_CHAR) \ - \ - SCAN_TYPE(SKIP) - -/* Types of scan tokens. - - Scan token types are a superset of enum token_type. Only the additional - scan token types are defined here, so see the definition of enum token_type - for the others. */ -enum scan_type +enum tokenize_result { -#define SCAN_TYPE(TYPE) SCAN_##TYPE, - SCAN_FIRST = 255, - SCAN_TYPES - SCAN_LAST -#undef SCAN_TYPE + TOKENIZE_EMPTY, + TOKENIZE_TOKEN, + TOKENIZE_ERROR }; -const char *scan_type_to_string (enum scan_type); -bool is_scan_type (enum scan_type); +enum tokenize_result token_from_segment (enum segment_type, struct substring, + struct token *); -/* A scanner. Opaque. */ -struct scanner +struct merger { - unsigned char state; - unsigned char substate; + unsigned int state; }; +#define MERGER_INIT { 0 } -/* scanner_push() return type. */ -enum scan_result +int merger_add (struct merger *m, const struct token *in, struct token *out); + +/* A simplified lexer for handling syntax in a string. */ + +struct string_lexer { - /* Complete token. */ - SCAN_DONE, /* Token successfully scanned. */ - SCAN_MORE, /* More segments needed to scan token. */ + const char *input; + size_t length; + size_t offset; + struct segmenter segmenter; + }; - /* Incomplete token. */ - SCAN_BACK, /* Done, but go back to saved position too. */ - SCAN_SAVE /* Need more segments, and save position. */ +enum string_lexer_result + { + SLR_END, + SLR_TOKEN, + SLR_ERROR }; -void scanner_init (struct scanner *, struct token *); -enum scan_result scanner_push (struct scanner *, enum segment_type, - struct substring, struct token *); +void string_lexer_init (struct string_lexer *, const char *input, + size_t length, enum segmenter_mode, bool is_snippet); +enum string_lexer_result string_lexer_next (struct string_lexer *, + struct token *); #endif /* scan.h */