1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2005 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 This file is concerned with the definition of the PSPP syntax, NOT the
19 action of scanning/parsing code .
23 #include "identifier.h"
28 #include <libpspp/assertion.h>
30 /* Recognizing identifiers. */
32 /* Returns true if C may be the first character in an
33 identifier in the current locale. */
38 return isalpha (c) || c == '@' || c == '#' || c == '$';
42 /* Returns true if C may be a character in an identifier other
48 return lex_is_id1 (c) || isdigit (c) || c == '.' || c == '_';
51 /* Returns the length of the longest prefix of STRING that forms
52 a valid identifier. Returns zero if STRING does not begin
53 with a valid identifier. */
55 lex_id_get_length (struct substring string)
58 if (!ss_is_empty (string) && lex_is_id1 (ss_first (string)))
61 while (length < ss_length (string)
62 && lex_is_idn (ss_at (string, length)))
68 /* Comparing identifiers. */
70 /* Returns true if TOKEN is a case-insensitive match for KEYWORD.
72 Keywords match if one of the following is true: KEYWORD and
73 TOKEN are identical, or TOKEN is at least 3 characters long
74 and those characters are identical to KEYWORD. */
76 lex_id_match (struct substring keyword, struct substring token)
78 return lex_id_match_n (keyword, token, 3);
81 /* Returns true if TOKEN is a case-insensitive match for at least
82 the first N characters of KEYWORD. */
84 lex_id_match_n (struct substring keyword, struct substring token, size_t n)
86 size_t token_len = ss_length (token);
87 size_t keyword_len = ss_length (keyword);
89 if (token_len >= n && token_len < keyword_len)
90 return ss_equals_case (ss_head (keyword, token_len), token);
92 return ss_equals_case (keyword, token);
95 /* Table of keywords. */
99 const struct substring identifier;
102 static const struct keyword keywords[] =
104 { T_AND, SS_LITERAL_INITIALIZER ("AND") },
105 { T_OR, SS_LITERAL_INITIALIZER ("OR") },
106 { T_NOT, SS_LITERAL_INITIALIZER ("NOT") },
107 { T_EQ, SS_LITERAL_INITIALIZER ("EQ") },
108 { T_GE, SS_LITERAL_INITIALIZER ("GE") },
109 { T_GT, SS_LITERAL_INITIALIZER ("GT") },
110 { T_LE, SS_LITERAL_INITIALIZER ("LE") },
111 { T_LT, SS_LITERAL_INITIALIZER ("LT") },
112 { T_NE, SS_LITERAL_INITIALIZER ("NE") },
113 { T_ALL, SS_LITERAL_INITIALIZER ("ALL") },
114 { T_BY, SS_LITERAL_INITIALIZER ("BY") },
115 { T_TO, SS_LITERAL_INITIALIZER ("TO") },
116 { T_WITH, SS_LITERAL_INITIALIZER ("WITH") },
118 static const size_t keyword_cnt = sizeof keywords / sizeof *keywords;
120 /* Returns true if TOKEN is representable as a keyword. */
122 lex_is_keyword (int token)
124 const struct keyword *kw;
125 for (kw = keywords; kw < &keywords[keyword_cnt]; kw++)
126 if (kw->token == token)
131 /* Returns the proper token type, either T_ID or a reserved
132 keyword enum, for ID. */
134 lex_id_to_token (struct substring id)
136 if (ss_length (id) >= 2 && ss_length (id) <= 4)
138 const struct keyword *kw;
139 for (kw = keywords; kw < &keywords[keyword_cnt]; kw++)
140 if (ss_equals_case (kw->identifier, id))
147 /* Returns the name for the given keyword token type. */
149 lex_id_name (int token)
151 const struct keyword *kw;
153 for (kw = keywords; kw < &keywords[keyword_cnt]; kw++)
154 if (kw->token == token)
156 /* A "struct substring" is not guaranteed to be
157 null-terminated, as our caller expects, but in this
158 case it always will be. */
159 return ss_data (kw->identifier);