projects
/
pspp
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
category sort implicitly
[pspp]
/
src
/
data
/
identifier.c
diff --git
a/src/data/identifier.c
b/src/data/identifier.c
index f1c22ef1b567223579586ca8ac6f24fd3f4e3722..d9d9b2a6444c25a8f76fec3df66089e1306e6b40 100644
(file)
--- a/
src/data/identifier.c
+++ b/
src/data/identifier.c
@@
-1,5
+1,5
@@
/* PSPP - a program for statistical analysis.
/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2005, 2009, 2010, 2011 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2005, 2009, 2010, 2011
, 2012, 2013
Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@
-24,9
+24,11
@@
#include "data/identifier.h"
#include <string.h>
#include "data/identifier.h"
#include <string.h>
+#include <unistr.h>
#include <unictype.h>
#include "libpspp/assertion.h"
#include <unictype.h>
#include "libpspp/assertion.h"
+#include "libpspp/cast.h"
#include "gl/c-ctype.h"
#include "gl/c-ctype.h"
@@
-44,7
+46,6
@@
token_type_to_name (enum token_type type)
#define TOKEN_TYPE(TYPE) case T_##TYPE: return #TYPE;
TOKEN_TYPES
#undef TOKEN_TYPE
#define TOKEN_TYPE(TYPE) case T_##TYPE: return #TYPE;
TOKEN_TYPES
#undef TOKEN_TYPE
- case TOKEN_N_TYPES:
default:
return "unknown token type";
}
default:
return "unknown token type";
}
@@
-63,6
+64,8
@@
token_type_to_string (enum token_type token)
case T_POS_NUM:
case T_NEG_NUM:
case T_STRING:
case T_POS_NUM:
case T_NEG_NUM:
case T_STRING:
+ case T_MACRO_ID:
+ case T_MACRO_PUNCT:
case T_STOP:
return NULL;
case T_STOP:
return NULL;
@@
-96,9
+99,21
@@
token_type_to_string (enum token_type token)
case T_RBRACK:
return "]";
case T_RBRACK:
return "]";
+ case T_LCURLY:
+ return "{";
+
+ case T_RCURLY:
+ return "}";
+
case T_COMMA:
return ",";
case T_COMMA:
return ",";
+ case T_SEMICOLON:
+ return ";";
+
+ case T_COLON:
+ return ":";
+
case T_AND:
return "AND";
case T_AND:
return "AND";
@@
-140,9
+155,6
@@
token_type_to_string (enum token_type token)
case T_EXP:
return "**";
case T_EXP:
return "**";
-
- case TOKEN_N_TYPES:
- NOT_REACHED ();
}
NOT_REACHED ();
}
NOT_REACHED ();
@@
-188,7
+200,13
@@
lex_is_idn (char c)
bool
lex_uc_is_id1 (ucs4_t uc)
{
bool
lex_uc_is_id1 (ucs4_t uc)
{
- return is_ascii_id1 (uc) || (uc >= 0x80 && uc_is_property_id_start (uc));
+ return (uc < 0x80
+ ? is_ascii_id1 (uc)
+ : (uc_is_general_category_withtable (uc,
+ UC_CATEGORY_MASK_L |
+ UC_CATEGORY_MASK_M |
+ UC_CATEGORY_MASK_S)
+ && uc != 0xfffc && uc != 0xfffd));
}
/* Returns true if Unicode code point UC may be a character in an identifier
}
/* Returns true if Unicode code point UC may be a character in an identifier
@@
-196,8
+214,14
@@
lex_uc_is_id1 (ucs4_t uc)
bool
lex_uc_is_idn (ucs4_t uc)
{
bool
lex_uc_is_idn (ucs4_t uc)
{
- return (is_ascii_id1 (uc) || isdigit (uc) || uc == '.' || uc == '_'
- || (uc >= 0x80 && uc_is_property_id_continue (uc)));
+ return (uc < 0x80
+ ? is_ascii_id1 (uc) || isdigit (uc) || uc == '.' || uc == '_'
+ : (uc_is_general_category_withtable (uc,
+ UC_CATEGORY_MASK_L |
+ UC_CATEGORY_MASK_M |
+ UC_CATEGORY_MASK_S |
+ UC_CATEGORY_MASK_N)
+ && uc != 0xfffc && uc != 0xfffd));
}
/* Returns true if Unicode code point UC is a space that separates tokens. */
}
/* Returns true if Unicode code point UC is a space that separates tokens. */
@@
-220,15
+244,21
@@
lex_uc_is_space (ucs4_t uc)
size_t
lex_id_get_length (struct substring string)
{
size_t
lex_id_get_length (struct substring string)
{
- size_t length = 0;
- if (!ss_is_empty (string) && lex_is_id1 (ss_first (string)))
+ const uint8_t *s = CHAR_CAST (const uint8_t *, string.string);
+ size_t len = string.length;
+ size_t ofs;
+ int mblen;
+
+ for (ofs = 0; ofs < string.length; ofs += mblen)
{
{
- length = 1;
- while (length < ss_length (string)
- && lex_is_idn (ss_at (string, length)))
- length++;
+ ucs4_t uc;
+
+ mblen = u8_mbtouc (&uc, s + ofs, len - ofs);
+ if (!(ofs == 0 ? lex_uc_is_id1 (uc) : lex_uc_is_idn (uc)))
+ break;
}
}
- return length;
+
+ return ofs;
}
\f
/* Comparing identifiers. */
}
\f
/* Comparing identifiers. */
@@
-286,14
+316,14
@@
static const struct keyword keywords[] =
{ T_TO, SS_LITERAL_INITIALIZER ("TO") },
{ T_WITH, SS_LITERAL_INITIALIZER ("WITH") },
};
{ T_TO, SS_LITERAL_INITIALIZER ("TO") },
{ T_WITH, SS_LITERAL_INITIALIZER ("WITH") },
};
-static const size_t
keyword_cnt
= sizeof keywords / sizeof *keywords;
+static const size_t
n_keywords
= sizeof keywords / sizeof *keywords;
/* Returns true if TOKEN is representable as a keyword. */
bool
lex_is_keyword (enum token_type token)
{
const struct keyword *kw;
/* Returns true if TOKEN is representable as a keyword. */
bool
lex_is_keyword (enum token_type token)
{
const struct keyword *kw;
- for (kw = keywords; kw < &keywords[
keyword_cnt
]; kw++)
+ for (kw = keywords; kw < &keywords[
n_keywords
]; kw++)
if (kw->token == token)
return true;
return false;
if (kw->token == token)
return true;
return false;
@@
-307,7
+337,7
@@
lex_id_to_token (struct substring id)
if (ss_length (id) >= 2 && ss_length (id) <= 4)
{
const struct keyword *kw;
if (ss_length (id) >= 2 && ss_length (id) <= 4)
{
const struct keyword *kw;
- for (kw = keywords; kw < &keywords[
keyword_cnt
]; kw++)
+ for (kw = keywords; kw < &keywords[
n_keywords
]; kw++)
if (ss_equals_case (kw->identifier, id))
return kw->token;
}
if (ss_equals_case (kw->identifier, id))
return kw->token;
}