X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fidentifier.c;h=db20010464cab1e3f3a1f42cc1e7a4c012bf8c1b;hb=3b6384c0f3f35fb0b280cb20ddaa01b50912f4b8;hp=4b613bb480edb5555cb1532176d745182345c0c4;hpb=1b3322acf30d531cefe3cdbf7287ec8cde601bcd;p=pspp diff --git a/src/data/identifier.c b/src/data/identifier.c index 4b613bb480..db20010464 100644 --- a/src/data/identifier.c +++ b/src/data/identifier.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2005, 2009, 2010 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2005, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -23,15 +23,12 @@ #include "data/identifier.h" -#include #include -#include #include +#include #include "libpspp/assertion.h" #include "libpspp/cast.h" -#include "libpspp/i18n.h" -#include "libpspp/message.h" #include "gl/c-ctype.h" @@ -193,7 +190,13 @@ lex_is_idn (char c) bool lex_uc_is_id1 (ucs4_t uc) { - return is_ascii_id1 (uc) || (uc >= 0x80 && uc_is_property_id_start (uc)); + return (uc < 0x80 + ? is_ascii_id1 (uc) + : (uc_is_general_category_withtable (uc, + UC_CATEGORY_MASK_L | + UC_CATEGORY_MASK_M | + UC_CATEGORY_MASK_S) + && uc != 0xfffc && uc != 0xfffd)); } /* Returns true if Unicode code point UC may be a character in an identifier @@ -201,8 +204,14 @@ lex_uc_is_id1 (ucs4_t uc) bool lex_uc_is_idn (ucs4_t uc) { - return (is_ascii_id1 (uc) || isdigit (uc) || uc == '.' || uc == '_' - || (uc >= 0x80 && uc_is_property_id_continue (uc))); + return (uc < 0x80 + ? is_ascii_id1 (uc) || isdigit (uc) || uc == '.' || uc == '_' + : (uc_is_general_category_withtable (uc, + UC_CATEGORY_MASK_L | + UC_CATEGORY_MASK_M | + UC_CATEGORY_MASK_S | + UC_CATEGORY_MASK_N) + && uc != 0xfffc && uc != 0xfffd)); } /* Returns true if Unicode code point UC is a space that separates tokens. */ @@ -225,15 +234,21 @@ lex_uc_is_space (ucs4_t uc) size_t lex_id_get_length (struct substring string) { - size_t length = 0; - if (!ss_is_empty (string) && lex_is_id1 (ss_first (string))) + const uint8_t *s = CHAR_CAST (const uint8_t *, string.string); + size_t len = string.length; + size_t ofs; + int mblen; + + for (ofs = 0; ofs < string.length; ofs += mblen) { - length = 1; - while (length < ss_length (string) - && lex_is_idn (ss_at (string, length))) - length++; + ucs4_t uc; + + mblen = u8_mbtouc (&uc, s + ofs, len - ofs); + if (!(ofs == 0 ? lex_uc_is_id1 (uc) : lex_uc_is_idn (uc))) + break; } - return length; + + return ofs; } /* Comparing identifiers. */ @@ -319,20 +334,3 @@ lex_id_to_token (struct substring id) return T_ID; } - -/* Returns the name for the given keyword token type. */ -const char * -lex_id_name (enum token_type token) -{ - const struct keyword *kw; - - for (kw = keywords; kw < &keywords[keyword_cnt]; kw++) - if (kw->token == token) - { - /* A "struct substring" is not guaranteed to be - null-terminated, as our caller expects, but in this - case it always will be. */ - return ss_data (kw->identifier); - } - NOT_REACHED (); -}