X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fidentifier.c;h=db20010464cab1e3f3a1f42cc1e7a4c012bf8c1b;hb=e4a4f41ed1c534c4f742ce95feeee515563b97dc;hp=498f5ea966987e38f75eba3dd105f6e112ed69f5;hpb=e7e2a9e79da2f6c9ae534c5ad067acf49d84a75b;p=pspp diff --git a/src/data/identifier.c b/src/data/identifier.c index 498f5ea966..db20010464 100644 --- a/src/data/identifier.c +++ b/src/data/identifier.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2005, 2009, 2010 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2005, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -23,18 +23,23 @@ #include "data/identifier.h" -#include #include +#include #include #include "libpspp/assertion.h" +#include "libpspp/cast.h" #include "gl/c-ctype.h" +#include "gettext.h" +#define _(msgid) gettext (msgid) + /* Tokens. */ +/* Returns TYPE as a string, e.g. "ID" for T_ID. */ const char * -token_type_to_string (enum token_type type) +token_type_to_name (enum token_type type) { switch (type) { @@ -47,6 +52,104 @@ token_type_to_string (enum token_type type) } } +/* Returns an ASCII string that yields TOKEN if it appeared in a syntax file, + as a statically allocated constant string. This function returns NULL for + tokens that don't have any fixed string representation, such as identifier + and number tokens. */ +const char * +token_type_to_string (enum token_type token) +{ + switch (token) + { + case T_ID: + case T_POS_NUM: + case T_NEG_NUM: + case T_STRING: + case T_STOP: + return NULL; + + case T_ENDCMD: + return "."; + + case T_PLUS: + return "+"; + + case T_DASH: + return "-"; + + case T_ASTERISK: + return "*"; + + case T_SLASH: + return "/"; + + case T_EQUALS: + return "="; + + case T_LPAREN: + return "("; + + case T_RPAREN: + return ")"; + + case T_LBRACK: + return "["; + + case T_RBRACK: + return "]"; + + case T_COMMA: + return ","; + + case T_AND: + return "AND"; + + case T_OR: + return "OR"; + + case T_NOT: + return "NOT"; + + case T_EQ: + return "EQ"; + + case T_GE: + return ">="; + + case T_GT: + return ">"; + + case T_LE: + return "<="; + + case T_LT: + return "<"; + + case T_NE: + return "~="; + + case T_ALL: + return "ALL"; + + case T_BY: + return "BY"; + + case T_TO: + return "TO"; + + case T_WITH: + return "WITH"; + + case T_EXP: + return "**"; + + case TOKEN_N_TYPES: + NOT_REACHED (); + } + + NOT_REACHED (); +} + /* Recognizing identifiers. */ static bool @@ -87,7 +190,13 @@ lex_is_idn (char c) bool lex_uc_is_id1 (ucs4_t uc) { - return is_ascii_id1 (uc) || (uc >= 0x80 && uc_is_property_id_start (uc)); + return (uc < 0x80 + ? is_ascii_id1 (uc) + : (uc_is_general_category_withtable (uc, + UC_CATEGORY_MASK_L | + UC_CATEGORY_MASK_M | + UC_CATEGORY_MASK_S) + && uc != 0xfffc && uc != 0xfffd)); } /* Returns true if Unicode code point UC may be a character in an identifier @@ -95,8 +204,14 @@ lex_uc_is_id1 (ucs4_t uc) bool lex_uc_is_idn (ucs4_t uc) { - return (is_ascii_id1 (uc) || isdigit (uc) || uc == '.' || uc == '_' - || (uc >= 0x80 && uc_is_property_id_continue (uc))); + return (uc < 0x80 + ? is_ascii_id1 (uc) || isdigit (uc) || uc == '.' || uc == '_' + : (uc_is_general_category_withtable (uc, + UC_CATEGORY_MASK_L | + UC_CATEGORY_MASK_M | + UC_CATEGORY_MASK_S | + UC_CATEGORY_MASK_N) + && uc != 0xfffc && uc != 0xfffd)); } /* Returns true if Unicode code point UC is a space that separates tokens. */ @@ -119,15 +234,21 @@ lex_uc_is_space (ucs4_t uc) size_t lex_id_get_length (struct substring string) { - size_t length = 0; - if (!ss_is_empty (string) && lex_is_id1 (ss_first (string))) + const uint8_t *s = CHAR_CAST (const uint8_t *, string.string); + size_t len = string.length; + size_t ofs; + int mblen; + + for (ofs = 0; ofs < string.length; ofs += mblen) { - length = 1; - while (length < ss_length (string) - && lex_is_idn (ss_at (string, length))) - length++; + ucs4_t uc; + + mblen = u8_mbtouc (&uc, s + ofs, len - ofs); + if (!(ofs == 0 ? lex_uc_is_id1 (uc) : lex_uc_is_idn (uc))) + break; } - return length; + + return ofs; } /* Comparing identifiers. */ @@ -213,20 +334,3 @@ lex_id_to_token (struct substring id) return T_ID; } - -/* Returns the name for the given keyword token type. */ -const char * -lex_id_name (enum token_type token) -{ - const struct keyword *kw; - - for (kw = keywords; kw < &keywords[keyword_cnt]; kw++) - if (kw->token == token) - { - /* A "struct substring" is not guaranteed to be - null-terminated, as our caller expects, but in this - case it always will be. */ - return ss_data (kw->identifier); - } - NOT_REACHED (); -}