lexer: Add tokens for '{', '}', ':', ';' for use in the matrix language.
[pspp] / src / data / identifier.h
1 /* PSPP - a program for statistical analysis.
2    Copyright (C) 1997-9, 2000, 2010, 2011 Free Software Foundation, Inc.
3
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 3 of the License, or
7    (at your option) any later version.
8
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
16
17 #ifndef DATA_IDENTIFIER_H
18 #define DATA_IDENTIFIER_H 1
19
20 #include <ctype.h>
21 #include <stdbool.h>
22 #include <unitypes.h>
23 #include "libpspp/str.h"
24 #include "gl/verify.h"
25
26 #define TOKEN_TYPES                                                     \
27     TOKEN_TYPE(STOP)                /* End of input. */                 \
28                                                                         \
29     TOKEN_TYPE(ID)                  /* Identifier. */                   \
30     TOKEN_TYPE(POS_NUM)             /* Positive number. */              \
31     TOKEN_TYPE(NEG_NUM)             /* Negative number. */              \
32     TOKEN_TYPE(STRING)              /* Quoted string. */                \
33                                                                         \
34     TOKEN_TYPE(ENDCMD)              /* . */                             \
35     TOKEN_TYPE(PLUS)                /* + */                             \
36     TOKEN_TYPE(DASH)                /* - */                             \
37     TOKEN_TYPE(ASTERISK)            /* * */                             \
38     TOKEN_TYPE(SLASH)               /* / */                             \
39     TOKEN_TYPE(EQUALS)              /* = */                             \
40     TOKEN_TYPE(LPAREN)              /* (*/                              \
41     TOKEN_TYPE(RPAREN)              /* ) */                             \
42     TOKEN_TYPE(LBRACK)              /* [ */                             \
43     TOKEN_TYPE(RBRACK)              /* ] */                             \
44     TOKEN_TYPE(LCURLY)              /* { */                             \
45     TOKEN_TYPE(RCURLY)              /* } */                             \
46     TOKEN_TYPE(COMMA)               /* , */                             \
47     TOKEN_TYPE(SEMICOLON)           /* ; */                             \
48     TOKEN_TYPE(COLON)               /* : */                             \
49                                                                         \
50     TOKEN_TYPE(AND)                 /* AND */                           \
51     TOKEN_TYPE(OR)                  /* OR */                            \
52     TOKEN_TYPE(NOT)                 /* NOT */                           \
53                                                                         \
54     TOKEN_TYPE(EQ)                  /* EQ */                            \
55     TOKEN_TYPE(GE)                  /* GE or >= */                      \
56     TOKEN_TYPE(GT)                  /* GT or > */                       \
57     TOKEN_TYPE(LE)                  /* LE or <= */                      \
58     TOKEN_TYPE(LT)                  /* LT or < */                       \
59     TOKEN_TYPE(NE)                  /* NE or ~= */                      \
60                                                                         \
61     TOKEN_TYPE(ALL)                 /* ALL */                           \
62     TOKEN_TYPE(BY)                  /* BY */                            \
63     TOKEN_TYPE(TO)                  /* TO */                            \
64     TOKEN_TYPE(WITH)                /* WITH */                          \
65                                                                         \
66     TOKEN_TYPE(EXP)                 /* ** */                            \
67                                                                         \
68     TOKEN_TYPE(MACRO_ID)            /* Identifier starting with '!'. */ \
69     TOKEN_TYPE(MACRO_PUNCT)         /* Miscellaneous punctuator. */
70 /* Token types. */
71 enum token_type
72   {
73 #define TOKEN_TYPE(TYPE) T_##TYPE,
74     TOKEN_TYPES
75 #undef TOKEN_TYPE
76   };
77 verify(T_STOP == 0);
78
79 #define TOKEN_TYPE(TYPE) + 1
80 enum { TOKEN_N_TYPES = TOKEN_TYPES };
81 #undef TOKEN_TYPE
82
83 const char *token_type_to_name (enum token_type);
84 const char *token_type_to_string (enum token_type);
85
86 /* Tokens. */
87 bool lex_is_keyword (enum token_type);
88
89 /* Validating identifiers. */
90 #define ID_MAX_LEN 64          /* Maximum length of identifier, in bytes. */
91
92 bool id_is_valid (const char *id, const char *dict_encoding, bool issue_error);
93 bool id_is_plausible (const char *id, bool issue_error);
94
95 /* Recognizing identifiers. */
96 bool lex_is_id1 (char);
97 bool lex_is_idn (char);
98 bool lex_uc_is_id1 (ucs4_t);
99 bool lex_uc_is_idn (ucs4_t);
100 bool lex_uc_is_space (ucs4_t);
101 size_t lex_id_get_length (struct substring);
102
103 /* Comparing identifiers. */
104 bool lex_id_match (struct substring keyword, struct substring token);
105 bool lex_id_match_n (struct substring keyword, struct substring token,
106                      size_t n);
107 int lex_id_to_token (struct substring);
108
109 #endif /* !data/identifier.h */