X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fidentifier.c;h=7bd2261e60f15ac8f162996b7ee346c1b0eb5468;hb=b5c82cc9aabe7e641011130240ae1b2e84348e23;hp=4f38e84fc0b798c5794f8760bd5238bfd86d36f5;hpb=985c40f2a83588b25f0e6fe7f7d82863c5d34d43;p=pspp-builds.git diff --git a/src/data/identifier.c b/src/data/identifier.c index 4f38e84f..7bd2261e 100644 --- a/src/data/identifier.c +++ b/src/data/identifier.c @@ -1,24 +1,21 @@ -/* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000, 2005 Free Software Foundation, Inc. - Written by John Darrington +/* PSPP - a program for statistical analysis. + Copyright (C) 1997-9, 2000, 2005, 2009 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ -/* - This file is concerned with the definition of the PSPP syntax, NOT the +/* + This file is concerned with the definition of the PSPP syntax, NOT the action of scanning/parsing code . */ @@ -28,26 +25,17 @@ #include #include - - -/* Table of keywords. */ -const char *keywords[T_N_KEYWORDS + 1] = - { - "AND", "OR", "NOT", - "EQ", "GE", "GT", "LE", "LT", "NE", - "ALL", "BY", "TO", "WITH", - NULL, - }; +#include /* Recognizing identifiers. */ /* Returns true if C may be the first character in an identifier in the current locale. */ bool -lex_is_id1 (char c_) +lex_is_id1 (char c_) { unsigned char c = c_; - return isalpha (c) || c == '@' || c == '#' || c == '$'; + return isalpha (c) || c == '@' || c == '#' || c == '$' || c >= 128; } @@ -60,72 +48,115 @@ lex_is_idn (char c_) return lex_is_id1 (c) || isdigit (c) || c == '.' || c == '_'; } -/* If string S begins with an identifier, returns the first - character following it. Otherwise, returns S unchanged. */ -char * -lex_skip_identifier (const char *s) +/* Returns the length of the longest prefix of STRING that forms + a valid identifier. Returns zero if STRING does not begin + with a valid identifier. */ +size_t +lex_id_get_length (struct substring string) { - if (lex_is_id1 (*s)) + size_t length = 0; + if (!ss_is_empty (string) && lex_is_id1 (ss_first (string))) { - s++; - while (lex_is_idn (*s)) - s++; + length = 1; + while (length < ss_length (string) + && lex_is_idn (ss_at (string, length))) + length++; } - return (char *) s; + return length; } /* Comparing identifiers. */ -/* Keywords match if one of the following is true: KW and TOK are - identical (except for differences in case), or TOK is at least 3 - characters long and those characters are identical to KW. KW_LEN - is the length of KW, TOK_LEN is the length of TOK. */ +/* Returns true if TOKEN is a case-insensitive match for KEYWORD. + + Keywords match if one of the following is true: KEYWORD and + TOKEN are identical, or TOKEN is at least 3 characters long + and those characters are identical to KEYWORD. */ bool -lex_id_match_len (const char *kw, size_t kw_len, - const char *tok, size_t tok_len) +lex_id_match (struct substring keyword, struct substring token) { - size_t i = 0; - - assert (kw && tok); - for (;;) - { - if (i == kw_len && i == tok_len) - return true; - else if (i == tok_len) - return i >= 3; - else if (i == kw_len) - return false; - else if (toupper ((unsigned char) kw[i]) - != toupper ((unsigned char) tok[i])) - return false; - - i++; - } + return lex_id_match_n (keyword, token, 3); } -/* Same as lex_id_match_len() minus the need to pass in the lengths. */ +/* Returns true if TOKEN is a case-insensitive match for at least + the first N characters of KEYWORD. */ bool -lex_id_match (const char *kw, const char *tok) +lex_id_match_n (struct substring keyword, struct substring token, size_t n) { - return lex_id_match_len (kw, strlen (kw), tok, strlen (tok)); + size_t token_len = ss_length (token); + size_t keyword_len = ss_length (keyword); + + if (token_len >= n && token_len < keyword_len) + return ss_equals_case (ss_head (keyword, token_len), token); + else + return ss_equals_case (keyword, token); } + +/* Table of keywords. */ +struct keyword + { + int token; + const struct substring identifier; + }; +static const struct keyword keywords[] = + { + { T_AND, SS_LITERAL_INITIALIZER ("AND") }, + { T_OR, SS_LITERAL_INITIALIZER ("OR") }, + { T_NOT, SS_LITERAL_INITIALIZER ("NOT") }, + { T_EQ, SS_LITERAL_INITIALIZER ("EQ") }, + { T_GE, SS_LITERAL_INITIALIZER ("GE") }, + { T_GT, SS_LITERAL_INITIALIZER ("GT") }, + { T_LE, SS_LITERAL_INITIALIZER ("LE") }, + { T_LT, SS_LITERAL_INITIALIZER ("LT") }, + { T_NE, SS_LITERAL_INITIALIZER ("NE") }, + { T_ALL, SS_LITERAL_INITIALIZER ("ALL") }, + { T_BY, SS_LITERAL_INITIALIZER ("BY") }, + { T_TO, SS_LITERAL_INITIALIZER ("TO") }, + { T_WITH, SS_LITERAL_INITIALIZER ("WITH") }, + }; +static const size_t keyword_cnt = sizeof keywords / sizeof *keywords; +/* Returns true if TOKEN is representable as a keyword. */ +bool +lex_is_keyword (int token) +{ + const struct keyword *kw; + for (kw = keywords; kw < &keywords[keyword_cnt]; kw++) + if (kw->token == token) + return true; + return false; +} -/* Returns the proper token type, either T_ID or a reserved keyword - enum, for ID[], which must contain LEN characters. */ +/* Returns the proper token type, either T_ID or a reserved + keyword enum, for ID. */ int -lex_id_to_token (const char *id, size_t len) +lex_id_to_token (struct substring id) { - const char **kwp; - - if (len < 2 || len > 4) - return T_ID; - - for (kwp = keywords; *kwp; kwp++) - if (!strcasecmp (*kwp, id)) - return T_FIRST_KEYWORD + (kwp - keywords); + if (ss_length (id) >= 2 && ss_length (id) <= 4) + { + const struct keyword *kw; + for (kw = keywords; kw < &keywords[keyword_cnt]; kw++) + if (ss_equals_case (kw->identifier, id)) + return kw->token; + } return T_ID; } - + +/* Returns the name for the given keyword token type. */ +const char * +lex_id_name (int token) +{ + const struct keyword *kw; + + for (kw = keywords; kw < &keywords[keyword_cnt]; kw++) + if (kw->token == token) + { + /* A "struct substring" is not guaranteed to be + null-terminated, as our caller expects, but in this + case it always will be. */ + return ss_data (kw->identifier); + } + NOT_REACHED (); +}