switch (token)
{
case T_ID:
- case T_MACRO_ID:
case T_POS_NUM:
case T_NEG_NUM:
case T_STRING:
+ case T_MACRO_ID:
+ case T_MACRO_PUNCT:
case T_STOP:
return NULL;
#define TOKEN_TYPES \
TOKEN_TYPE(ID) /* Identifier. */ \
- TOKEN_TYPE(MACRO_ID) /* Identifier starting with '!'. */ \
TOKEN_TYPE(POS_NUM) /* Positive number. */ \
TOKEN_TYPE(NEG_NUM) /* Negative number. */ \
TOKEN_TYPE(STRING) /* Quoted string. */ \
TOKEN_TYPE(SLASH) /* / */ \
TOKEN_TYPE(EQUALS) /* = */ \
TOKEN_TYPE(LPAREN) /* (*/ \
- TOKEN_TYPE(RPAREN) /*) */ \
+ TOKEN_TYPE(RPAREN) /* ) */ \
TOKEN_TYPE(LBRACK) /* [ */ \
TOKEN_TYPE(RBRACK) /* ] */ \
TOKEN_TYPE(COMMA) /* , */ \
TOKEN_TYPE(TO) /* TO */ \
TOKEN_TYPE(WITH) /* WITH */ \
\
- TOKEN_TYPE(EXP) /* ** */
-
+ TOKEN_TYPE(EXP) /* ** */ \
+ \
+ TOKEN_TYPE(MACRO_ID) /* Identifier starting with '!'. */ \
+ TOKEN_TYPE(MACRO_PUNCT) /* Miscellaneous punctuator. */
/* Token types. */
enum token_type
{
DEF_CMD (S_ANY, F_ENHANCED, "CLOSE FILE HANDLE", cmd_close_file_handle)
DEF_CMD (S_ANY, 0, "CACHE", cmd_cache)
DEF_CMD (S_ANY, 0, "CD", cmd_cd)
+//DEF_CMD (S_ANY, 0, "DEFINE", cmd_define)
DEF_CMD (S_ANY, 0, "DO REPEAT", cmd_do_repeat)
DEF_CMD (S_ANY, 0, "END REPEAT", cmd_end_repeat)
DEF_CMD (S_ANY, 0, "ECHO", cmd_echo)
UNIMPL_CMD ("CTABLES", "Display complex samples")
UNIMPL_CMD ("CURVEFIT", "Fit curve to line plot")
UNIMPL_CMD ("DATE", "Create time series data")
-UNIMPL_CMD ("DEFINE", "Syntax macros")
UNIMPL_CMD ("DETECTANOMALY", "Find unusual cases")
UNIMPL_CMD ("DISCRIMINANT", "Linear discriminant analysis")
UNIMPL_CMD ("EDIT", "obsolete")
case '<': return T_LT;
case '>': return T_GT;
case '~': return T_NOT;
+ default: return T_MACRO_PUNCT;
}
NOT_REACHED ();
else
{
token->type = scan_punct__ (s);
+ if (token->type == T_MACRO_PUNCT)
+ ss_alloc_substring (&token->string, s);
return SCAN_DONE;
}
}
else if (lex_uc_is_id1 (uc))
return segmenter_parse_id__ (s, input, n, eof, type);
+ else if (uc > 32 && uc < 127 && uc != '\\' && uc != '^')
+ {
+ *type = SEG_PUNCT;
+ s->substate = 0;
+ return 1;
+ }
else
{
*type = SEG_UNEXPECTED_CHAR;
case T_ID:
case T_MACRO_ID:
+ case T_MACRO_PUNCT:
return ss_xstrdup (token->string);
case T_STRING:
'foo
'very long unterminated string that be ellipsized in its error message
1e .x
-`
+^
�
])
AT_CHECK([pspp -O format=csv lexer.sps], [1], [dnl
lexer.sps:9: error: Unknown command `x'.
-lexer.sps:10.1: error: Syntax error at ``': Bad character ``' in input.
+lexer.sps:10.1: error: Syntax error at `^': Bad character `^' in input.
lexer.sps:11.1: error: Syntax error at `�': Bad character U+FFFD in input.
])
UNEXPECTED_DOT
ID "x"
SKIP
-UNEXPECTED_CHAR 95
+MACRO_PUNCT "_"
ID "z"
ENDCMD
SKIP
AT_DATA([input], [dnl
~ & | = >= > <= < ~= <> ( ) , - + * / [[ ]] **
~&|=>=><=<~=<>(),-+*/[[]]**
+% : ; ? _ ` { } ~
])
AT_DATA([expout-base], [dnl
NOT
LBRACK
RBRACK
EXP
+SKIP
+MACRO_PUNCT "%"
+SKIP
+MACRO_PUNCT ":"
+SKIP
+MACRO_PUNCT ";"
+SKIP
+MACRO_PUNCT "?"
+SKIP
+MACRO_PUNCT "_"
+SKIP
+MACRO_PUNCT "`"
+SKIP
+MACRO_PUNCT "{"
+SKIP
+MACRO_PUNCT "}"
+SKIP
+NOT
-SKIP
STOP
])
identifier x space
number 1
identifier y space
-unexpected_char \_
+punct \_
identifier z
-newline \n (later)
-
AT_DATA([input], [dnl
~ & | = >= > <= < ~= <> ( ) , - + * / [[ ]] **
~&|=>=><=<~=<>(),-+*/[[]]**
+% : ; ? _ ` { } ~
])
AT_DATA([expout-base], [dnl
punct ~ space
punct [[
punct ]]
punct **
+newline \n (later)
+
+punct % space
+punct : space
+punct ; space
+punct ? space
+punct \_ space
+punct ` space
+punct { space
+punct } space
+punct ~
-newline \n (later)
-
end