/* PSPP - a program for statistical analysis.
- Copyright (C) 2010, 2011 Free Software Foundation, Inc.
+ Copyright (C) 2010, 2011, 2013 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include "language/lexer/token.h"
#include "libpspp/assertion.h"
#include "libpspp/cast.h"
+#include "libpspp/i18n.h"
#include "gl/c-ctype.h"
#include "gl/c-strtod.h"
#include "gl/xmemdup0.h"
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+
enum
{
S_START,
case '<': return T_LT;
case '>': return T_GT;
case '~': return T_NOT;
+ default: return T_MACRO_PUNCT;
}
NOT_REACHED ();
#undef SCAN_TYPE
default:
- return token_type_to_name (type);
+ return token_type_to_name ((enum token_type) type);
}
}
return type > SCAN_FIRST && type < SCAN_LAST;
}
+/* If TOKEN has the type of a scan error (a subset of those identified by
+ is_scan_type()), returns an appropriate error message. Otherwise, returns
+ NULL. */
+char *
+scan_token_to_error (const struct token *token)
+{
+ switch (token->type)
+ {
+ case SCAN_BAD_HEX_LENGTH:
+ return xasprintf (_("String of hex digits has %d characters, which "
+ "is not a multiple of 2."), (int) token->number);
+
+ case SCAN_BAD_HEX_DIGIT:
+ case SCAN_BAD_UNICODE_DIGIT:
+ return xasprintf (_("`%c' is not a valid hex digit."),
+ (int) token->number);
+
+ case SCAN_BAD_UNICODE_LENGTH:
+ return xasprintf (_("Unicode string contains %d bytes, which is "
+ "not in the valid range of 1 to 8 bytes."),
+ (int) token->number);
+
+ case SCAN_BAD_UNICODE_CODE_POINT:
+ return xasprintf (_("U+%04X is not a valid Unicode code point."),
+ (int) token->number);
+
+ case SCAN_EXPECTED_QUOTE:
+ return xasprintf (_("Unterminated string constant."));
+
+ case SCAN_EXPECTED_EXPONENT:
+ return xasprintf (_("Missing exponent following `%s'."),
+ token->string.string);
+
+ case SCAN_UNEXPECTED_CHAR:
+ char c_name[16];
+ return xasprintf (_("Bad character %s in input."),
+ uc_name (token->number, c_name));
+ }
+
+ return NULL;
+}
+
static enum scan_result
scan_start__ (struct scanner *scanner, enum segment_type type,
struct substring s, struct token *token)
case SEG_DO_REPEAT_COMMAND:
case SEG_INLINE_DATA:
case SEG_DOCUMENT:
+ case SEG_MACRO_BODY:
token->type = T_STRING;
ss_alloc_substring (&token->string, s);
return SCAN_DONE;
ss_alloc_substring (&token->string, s);
return SCAN_DONE;
+ case SEG_MACRO_ID:
+ token->type = T_MACRO_ID;
+ ss_alloc_substring (&token->string, s);
+ return SCAN_DONE;
+
case SEG_PUNCT:
if (s.length == 1 && s.string[0] == '-')
{
else
{
token->type = scan_punct__ (s);
+ if (token->type == T_MACRO_PUNCT)
+ ss_alloc_substring (&token->string, s);
return SCAN_DONE;
}
ss_alloc_substring (&token->string, s);
return SCAN_DONE;
- case SEG_UNEXPECTED_DOT:
- token->type = SCAN_UNEXPECTED_DOT;
- return SCAN_DONE;
-
case SEG_UNEXPECTED_CHAR:
return scan_unexpected_char (&s, token);
-
- case SEG_N_TYPES:
- NOT_REACHED ();
}
NOT_REACHED ();
NOT_REACHED ();
}
+\f
+/* Initializes SLEX for parsing INPUT, which is LENGTH bytes long, in the
+ specified MODE.
+
+ SLEX has no internal state to free, but it retains a reference to INPUT, so
+ INPUT must not be modified or freed while SLEX is still in use. */
+void
+string_lexer_init (struct string_lexer *slex, const char *input, size_t length,
+ enum segmenter_mode mode, bool is_snippet)
+{
+ *slex = (struct string_lexer) {
+ .input = input,
+ .length = length,
+ .offset = 0,
+ .segmenter = segmenter_init (mode, is_snippet),
+ };
+}
+
+/* */
+bool
+string_lexer_next (struct string_lexer *slex, struct token *token)
+{
+ struct segmenter saved_segmenter;
+ size_t saved_offset = 0;
+
+ struct scanner scanner;
+
+ scanner_init (&scanner, token);
+ for (;;)
+ {
+ const char *s = slex->input + slex->offset;
+ size_t left = slex->length - slex->offset;
+ enum segment_type type;
+ int n;
+
+ n = segmenter_push (&slex->segmenter, s, left, true, &type);
+ assert (n >= 0);
+
+ slex->offset += n;
+ switch (scanner_push (&scanner, type, ss_buffer (s, n), token))
+ {
+ case SCAN_BACK:
+ slex->segmenter = saved_segmenter;
+ slex->offset = saved_offset;
+ /* Fall through. */
+ case SCAN_DONE:
+ return token->type != T_STOP;
+
+ case SCAN_MORE:
+ break;
+
+ case SCAN_SAVE:
+ saved_segmenter = slex->segmenter;
+ saved_offset = slex->offset;
+ break;
+ }
+ }
+}