X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fidentifier.c;h=db20010464cab1e3f3a1f42cc1e7a4c012bf8c1b;hb=00aad3b983774328140a04436d7d6ae7925fec97;hp=a757b31e3a03a8a644a7520fccec4ec8e81e6c14;hpb=c785bf16095624e47d9af976aaa751295a66f3d5;p=pspp diff --git a/src/data/identifier.c b/src/data/identifier.c index a757b31e3a..db20010464 100644 --- a/src/data/identifier.c +++ b/src/data/identifier.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2005, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2005, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,9 +24,11 @@ #include "data/identifier.h" #include +#include #include #include "libpspp/assertion.h" +#include "libpspp/cast.h" #include "gl/c-ctype.h" @@ -188,7 +190,13 @@ lex_is_idn (char c) bool lex_uc_is_id1 (ucs4_t uc) { - return is_ascii_id1 (uc) || (uc >= 0x80 && uc_is_property_id_start (uc)); + return (uc < 0x80 + ? is_ascii_id1 (uc) + : (uc_is_general_category_withtable (uc, + UC_CATEGORY_MASK_L | + UC_CATEGORY_MASK_M | + UC_CATEGORY_MASK_S) + && uc != 0xfffc && uc != 0xfffd)); } /* Returns true if Unicode code point UC may be a character in an identifier @@ -198,7 +206,12 @@ lex_uc_is_idn (ucs4_t uc) { return (uc < 0x80 ? is_ascii_id1 (uc) || isdigit (uc) || uc == '.' || uc == '_' - : uc >= 0x80 && uc_is_property_id_continue (uc)); + : (uc_is_general_category_withtable (uc, + UC_CATEGORY_MASK_L | + UC_CATEGORY_MASK_M | + UC_CATEGORY_MASK_S | + UC_CATEGORY_MASK_N) + && uc != 0xfffc && uc != 0xfffd)); } /* Returns true if Unicode code point UC is a space that separates tokens. */ @@ -221,15 +234,21 @@ lex_uc_is_space (ucs4_t uc) size_t lex_id_get_length (struct substring string) { - size_t length = 0; - if (!ss_is_empty (string) && lex_is_id1 (ss_first (string))) + const uint8_t *s = CHAR_CAST (const uint8_t *, string.string); + size_t len = string.length; + size_t ofs; + int mblen; + + for (ofs = 0; ofs < string.length; ofs += mblen) { - length = 1; - while (length < ss_length (string) - && lex_is_idn (ss_at (string, length))) - length++; + ucs4_t uc; + + mblen = u8_mbtouc (&uc, s + ofs, len - ofs); + if (!(ofs == 0 ? lex_uc_is_id1 (uc) : lex_uc_is_idn (uc))) + break; } - return length; + + return ofs; } /* Comparing identifiers. */