lexer: Reimplement for better testability and internationalization.
[pspp-builds.git] / src / data / identifier.h
1 /* PSPP - a program for statistical analysis.
2    Copyright (C) 1997-9, 2000, 2010, 2011 Free Software Foundation, Inc.
3
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 3 of the License, or
7    (at your option) any later version.
8
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
16
17 #ifndef DATA_IDENTIFIER_H
18 #define DATA_IDENTIFIER_H 1
19
20 #include <ctype.h>
21 #include <stdbool.h>
22 #include <unitypes.h>
23 #include "libpspp/str.h"
24
25 #define TOKEN_TYPES                                                     \
26     TOKEN_TYPE(ID)                         /* Identifier. */            \
27     TOKEN_TYPE(POS_NUM)                    /* Positive number. */       \
28     TOKEN_TYPE(NEG_NUM)                    /* Negative number. */       \
29     TOKEN_TYPE(STRING)                     /* Quoted string. */         \
30     TOKEN_TYPE(STOP)                       /* End of input. */          \
31                                                                         \
32     TOKEN_TYPE(ENDCMD)                     /* . */                      \
33     TOKEN_TYPE(PLUS)                       /* + */                      \
34     TOKEN_TYPE(DASH)                       /* - */                      \
35     TOKEN_TYPE(ASTERISK)                   /* * */                      \
36     TOKEN_TYPE(SLASH)                      /* / */                      \
37     TOKEN_TYPE(EQUALS)                     /* = */                      \
38     TOKEN_TYPE(LPAREN)                     /* ( */                      \
39     TOKEN_TYPE(RPAREN)                     /* ) */                      \
40     TOKEN_TYPE(LBRACK)                     /* [ */                      \
41     TOKEN_TYPE(RBRACK)                     /* ] */                      \
42     TOKEN_TYPE(COMMA)                      /* , */                      \
43                                                                         \
44     TOKEN_TYPE(AND)                        /* AND */                    \
45     TOKEN_TYPE(OR)                         /* OR */                     \
46     TOKEN_TYPE(NOT)                        /* NOT */                    \
47                                                                         \
48     TOKEN_TYPE(EQ)                         /* EQ */                     \
49     TOKEN_TYPE(GE)                         /* GE or >= */               \
50     TOKEN_TYPE(GT)                         /* GT or > */                \
51     TOKEN_TYPE(LE)                         /* LE or <= */               \
52     TOKEN_TYPE(LT)                         /* LT or < */                \
53     TOKEN_TYPE(NE)                         /* NE or ~= */               \
54                                                                         \
55     TOKEN_TYPE(ALL)                        /* ALL */                    \
56     TOKEN_TYPE(BY)                         /* BY */                     \
57     TOKEN_TYPE(TO)                         /* TO */                     \
58     TOKEN_TYPE(WITH)                       /* WITH */                   \
59                                                                         \
60     TOKEN_TYPE(EXP)                        /* ** */
61
62 /* Token types. */
63 enum token_type
64   {
65 #define TOKEN_TYPE(TYPE) T_##TYPE,
66     TOKEN_TYPES
67     TOKEN_N_TYPES
68 #undef TOKEN_TYPE
69   };
70
71 const char *token_type_to_name (enum token_type);
72 const char *token_type_to_string (enum token_type);
73
74 /* Tokens. */
75 bool lex_is_keyword (enum token_type);
76
77 /* Validating identifiers. */
78 #define ID_MAX_LEN 64          /* Maximum length of identifier, in bytes. */
79
80 bool id_is_valid (const char *id, const char *dict_encoding, bool issue_error);
81 bool id_is_plausible (const char *id, bool issue_error);
82
83 /* Recognizing identifiers. */
84 bool lex_is_id1 (char);
85 bool lex_is_idn (char);
86 bool lex_uc_is_id1 (ucs4_t);
87 bool lex_uc_is_idn (ucs4_t);
88 bool lex_uc_is_space (ucs4_t);
89 size_t lex_id_get_length (struct substring);
90
91 /* Comparing identifiers. */
92 bool lex_id_match (struct substring keyword, struct substring token);
93 bool lex_id_match_n (struct substring keyword, struct substring token,
94                      size_t n);
95 int lex_id_to_token (struct substring);
96
97 #endif /* !data/identifier.h */