lexer: Add support for macro punctuation.
[pspp] / src / data / identifier.h
1 /* PSPP - a program for statistical analysis.
2    Copyright (C) 1997-9, 2000, 2010, 2011 Free Software Foundation, Inc.
3
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 3 of the License, or
7    (at your option) any later version.
8
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
16
17 #ifndef DATA_IDENTIFIER_H
18 #define DATA_IDENTIFIER_H 1
19
20 #include <ctype.h>
21 #include <stdbool.h>
22 #include <unitypes.h>
23 #include "libpspp/str.h"
24
25 #define TOKEN_TYPES                                                     \
26     TOKEN_TYPE(ID)                  /* Identifier. */                   \
27     TOKEN_TYPE(POS_NUM)             /* Positive number. */              \
28     TOKEN_TYPE(NEG_NUM)             /* Negative number. */              \
29     TOKEN_TYPE(STRING)              /* Quoted string. */                \
30     TOKEN_TYPE(STOP)                /* End of input. */                 \
31                                                                         \
32     TOKEN_TYPE(ENDCMD)              /* . */                             \
33     TOKEN_TYPE(PLUS)                /* + */                             \
34     TOKEN_TYPE(DASH)                /* - */                             \
35     TOKEN_TYPE(ASTERISK)            /* * */                             \
36     TOKEN_TYPE(SLASH)               /* / */                             \
37     TOKEN_TYPE(EQUALS)              /* = */                             \
38     TOKEN_TYPE(LPAREN)              /* (*/                              \
39     TOKEN_TYPE(RPAREN)              /* ) */                             \
40     TOKEN_TYPE(LBRACK)              /* [ */                             \
41     TOKEN_TYPE(RBRACK)              /* ] */                             \
42     TOKEN_TYPE(COMMA)               /* , */                             \
43                                                                         \
44     TOKEN_TYPE(AND)                 /* AND */                           \
45     TOKEN_TYPE(OR)                  /* OR */                            \
46     TOKEN_TYPE(NOT)                 /* NOT */                           \
47                                                                         \
48     TOKEN_TYPE(EQ)                  /* EQ */                            \
49     TOKEN_TYPE(GE)                  /* GE or >= */                      \
50     TOKEN_TYPE(GT)                  /* GT or > */                       \
51     TOKEN_TYPE(LE)                  /* LE or <= */                      \
52     TOKEN_TYPE(LT)                  /* LT or < */                       \
53     TOKEN_TYPE(NE)                  /* NE or ~= */                      \
54                                                                         \
55     TOKEN_TYPE(ALL)                 /* ALL */                           \
56     TOKEN_TYPE(BY)                  /* BY */                            \
57     TOKEN_TYPE(TO)                  /* TO */                            \
58     TOKEN_TYPE(WITH)                /* WITH */                          \
59                                                                         \
60     TOKEN_TYPE(EXP)                 /* ** */                            \
61                                                                         \
62     TOKEN_TYPE(MACRO_ID)            /* Identifier starting with '!'. */ \
63     TOKEN_TYPE(MACRO_PUNCT)         /* Miscellaneous punctuator. */
64 /* Token types. */
65 enum token_type
66   {
67 #define TOKEN_TYPE(TYPE) T_##TYPE,
68     TOKEN_TYPES
69     TOKEN_N_TYPES
70 #undef TOKEN_TYPE
71   };
72
73 const char *token_type_to_name (enum token_type);
74 const char *token_type_to_string (enum token_type);
75
76 /* Tokens. */
77 bool lex_is_keyword (enum token_type);
78
79 /* Validating identifiers. */
80 #define ID_MAX_LEN 64          /* Maximum length of identifier, in bytes. */
81
82 bool id_is_valid (const char *id, const char *dict_encoding, bool issue_error);
83 bool id_is_plausible (const char *id, bool issue_error);
84
85 /* Recognizing identifiers. */
86 bool lex_is_id1 (char);
87 bool lex_is_idn (char);
88 bool lex_uc_is_id1 (ucs4_t);
89 bool lex_uc_is_idn (ucs4_t);
90 bool lex_uc_is_space (ucs4_t);
91 size_t lex_id_get_length (struct substring);
92
93 /* Comparing identifiers. */
94 bool lex_id_match (struct substring keyword, struct substring token);
95 bool lex_id_match_n (struct substring keyword, struct substring token,
96                      size_t n);
97 int lex_id_to_token (struct substring);
98
99 #endif /* !data/identifier.h */