#include "language/lexer/token.h"
#include "libpspp/assertion.h"
#include "libpspp/cast.h"
+#include "libpspp/i18n.h"
#include "gl/c-ctype.h"
#include "gl/c-strtod.h"
#include "gl/xmemdup0.h"
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+
enum
{
S_START,
case '<': return T_LT;
case '>': return T_GT;
case '~': return T_NOT;
+ default: return T_MACRO_PUNCT;
}
NOT_REACHED ();
return type > SCAN_FIRST && type < SCAN_LAST;
}
+/* If TOKEN has the type of a scan error (a subset of those identified by
+ is_scan_type()), returns an appropriate error message. Otherwise, returns
+ NULL. */
+char *
+scan_token_to_error (const struct token *token)
+{
+ switch (token->type)
+ {
+ case SCAN_BAD_HEX_LENGTH:
+ return xasprintf (_("String of hex digits has %d characters, which "
+ "is not a multiple of 2."), (int) token->number);
+
+ case SCAN_BAD_HEX_DIGIT:
+ case SCAN_BAD_UNICODE_DIGIT:
+ return xasprintf (_("`%c' is not a valid hex digit."),
+ (int) token->number);
+
+ case SCAN_BAD_UNICODE_LENGTH:
+ return xasprintf (_("Unicode string contains %d bytes, which is "
+ "not in the valid range of 1 to 8 bytes."),
+ (int) token->number);
+
+ case SCAN_BAD_UNICODE_CODE_POINT:
+ return xasprintf (_("U+%04X is not a valid Unicode code point."),
+ (int) token->number);
+
+ case SCAN_EXPECTED_QUOTE:
+ return xasprintf (_("Unterminated string constant."));
+
+ case SCAN_EXPECTED_EXPONENT:
+ return xasprintf (_("Missing exponent following `%s'."),
+ token->string.string);
+
+ case SCAN_UNEXPECTED_CHAR:
+ char c_name[16];
+ return xasprintf (_("Bad character %s in input."),
+ uc_name (token->number, c_name));
+ }
+
+ return NULL;
+}
+
static enum scan_result
scan_start__ (struct scanner *scanner, enum segment_type type,
struct substring s, struct token *token)
case SEG_DO_REPEAT_COMMAND:
case SEG_INLINE_DATA:
case SEG_DOCUMENT:
+ case SEG_MACRO_BODY:
token->type = T_STRING;
ss_alloc_substring (&token->string, s);
return SCAN_DONE;
ss_alloc_substring (&token->string, s);
return SCAN_DONE;
+ case SEG_MACRO_ID:
+ token->type = T_MACRO_ID;
+ ss_alloc_substring (&token->string, s);
+ return SCAN_DONE;
+
case SEG_PUNCT:
if (s.length == 1 && s.string[0] == '-')
{
else
{
token->type = scan_punct__ (s);
+ if (token->type == T_MACRO_PUNCT)
+ ss_alloc_substring (&token->string, s);
return SCAN_DONE;
}
ss_alloc_substring (&token->string, s);
return SCAN_DONE;
- case SEG_UNEXPECTED_DOT:
- token->type = SCAN_UNEXPECTED_DOT;
- return SCAN_DONE;
-
case SEG_UNEXPECTED_CHAR:
return scan_unexpected_char (&s, token);
}
NOT_REACHED ();
}
\f
-/* Initializes SLEX for parsing INPUT in the specified MODE.
+/* Initializes SLEX for parsing INPUT, which is LENGTH bytes long, in the
+ specified MODE.
SLEX has no internal state to free, but it retains a reference to INPUT, so
INPUT must not be modified or freed while SLEX is still in use. */
void
-string_lexer_init (struct string_lexer *slex, const char *input,
- enum segmenter_mode mode)
+string_lexer_init (struct string_lexer *slex, const char *input, size_t length,
+ enum segmenter_mode mode, bool is_snippet)
{
- slex->input = input;
- slex->length = strlen (input) + 1;
- slex->offset = 0;
- segmenter_init (&slex->segmenter, mode);
+ *slex = (struct string_lexer) {
+ .input = input,
+ .length = length,
+ .offset = 0,
+ .segmenter = segmenter_init (mode, is_snippet),
+ };
}
/* */
enum segment_type type;
int n;
- n = segmenter_push (&slex->segmenter, s, left, &type);
+ n = segmenter_push (&slex->segmenter, s, left, true, &type);
assert (n >= 0);
slex->offset += n;