/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2013 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2013, 2016 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
ll_push_tail (&lexer->sources, &lex_source_create (reader)->ll);
}
\f
-/* Advacning. */
+/* Advancing. */
static struct lex_token *
lex_push_token__ (struct lex_source *src)
}
else
{
- char *s = xasprintf ("`%s'", token_type_to_string (type));
- lex_error_expecting (lexer, s, NULL_SENTINEL);
- free (s);
+ const char *type_string = token_type_to_string (type);
+ if (type_string)
+ {
+ char *s = xasprintf ("`%s'", type_string);
+ lex_error_expecting (lexer, s, NULL_SENTINEL);
+ free (s);
+ }
+ else
+ lex_error_expecting (lexer, token_type_to_name (type), NULL_SENTINEL);
+
return false;
}
}
bool
lex_force_string_or_id (struct lexer *lexer)
{
- return lex_is_integer (lexer) || lex_force_string (lexer);
+ return lex_token (lexer) == T_ID || lex_force_string (lexer);
}
/* If the current token is an integer, does nothing and returns true.
{
do
{
- size_t head_ofs;
- size_t space;
- size_t n;
-
lex_source_expand__ (src);
- head_ofs = src->head - src->tail;
- space = src->allocated - head_ofs;
- n = src->reader->class->read (src->reader, &src->buffer[head_ofs],
- space,
- segmenter_get_prompt (&src->segmenter));
+ size_t head_ofs = src->head - src->tail;
+ size_t space = src->allocated - head_ofs;
+ enum prompt_style prompt = segmenter_get_prompt (&src->segmenter);
+ size_t n = src->reader->class->read (src->reader, &src->buffer[head_ofs],
+ space, prompt);
assert (n <= space);
+ for (char *p = &src->buffer[head_ofs]; p < &src->buffer[head_ofs + n];
+ p++)
+ if (*p == '\0')
+ {
+ struct msg m;
+ m.category = MSG_C_SYNTAX;
+ m.severity = MSG_S_ERROR;
+ m.file_name = src->reader->file_name;
+ m.first_line = 0;
+ m.last_line = 0;
+ m.first_column = 0;
+ m.last_column = 0;
+ m.text = xstrdup ("Bad character U+0000 in input.");
+ msg_emit (&m);
+
+ *p = ' ';
+ }
+
if (n == 0)
{
/* End of input.
va_end (args);
}
+/* Attempts to append an additional token into SRC's deque, reading more from
+ the underlying lex_reader if necessary.. Returns true if successful, false
+ if the deque already represents (a suffix of) the whole lex_reader's
+ contents, */
static bool
lex_source_get__ (const struct lex_source *src_)
{
struct lex_source *src = CONST_CAST (struct lex_source *, src_);
+ if (src->eof)
+ return false;
+ /* State maintained while scanning tokens. Usually we only need a single
+ state, but scanner_push() can return SCAN_SAVE to indicate that the state
+ needs to be saved and possibly restored later with SCAN_BACK. */
struct state
{
struct segmenter segmenter;
enum segment_type last_segment;
- int newlines;
+ int newlines; /* Number of newlines encountered so far. */
+ /* Maintained here so we can update lex_source's similar members when we
+ finish. */
size_t line_pos;
size_t seg_pos;
};
- struct state state, saved;
- enum scan_result result;
- struct scanner scanner;
- struct lex_token *token;
- int n_lines;
- int i;
-
- if (src->eof)
- return false;
-
- state.segmenter = src->segmenter;
- state.newlines = 0;
- state.seg_pos = src->seg_pos;
- state.line_pos = src->line_pos;
- saved = state;
+ /* Initialize state. */
+ struct state state =
+ {
+ .segmenter = src->segmenter,
+ .newlines = 0,
+ .seg_pos = src->seg_pos,
+ .line_pos = src->line_pos,
+ };
+ struct state saved = state;
- token = lex_push_token__ (src);
+ /* Append a new token to SRC and initialize it. */
+ struct lex_token *token = lex_push_token__ (src);
+ struct scanner scanner;
scanner_init (&scanner, &token->token);
token->line_pos = src->line_pos;
token->token_pos = src->seg_pos;
else
token->first_line = 0;
+ /* Extract segments and pass them through the scanner until we obtain a
+ token. */
for (;;)
{
+ /* Extract a segment. */
+ const char *segment = &src->buffer[state.seg_pos - src->tail];
+ size_t seg_maxlen = src->head - state.seg_pos;
enum segment_type type;
- const char *segment;
- size_t seg_maxlen;
- int seg_len;
-
- segment = &src->buffer[state.seg_pos - src->tail];
- seg_maxlen = src->head - state.seg_pos;
- seg_len = segmenter_push (&state.segmenter, segment, seg_maxlen, &type);
+ int seg_len = segmenter_push (&state.segmenter, segment, seg_maxlen,
+ &type);
if (seg_len < 0)
{
+ /* The segmenter needs more input to produce a segment. */
lex_source_read__ (src);
continue;
}
+ /* Update state based on the segment. */
state.last_segment = type;
state.seg_pos += seg_len;
if (type == SEG_NEWLINE)
state.line_pos = state.seg_pos;
}
- result = scanner_push (&scanner, type, ss_buffer (segment, seg_len),
- &token->token);
+ /* Pass the segment into the scanner and try to get a token out. */
+ enum scan_result result = scanner_push (&scanner, type,
+ ss_buffer (segment, seg_len),
+ &token->token);
if (result == SCAN_SAVE)
saved = state;
else if (result == SCAN_BACK)
break;
}
- n_lines = state.newlines;
+ /* If we've reached the end of a line, or the end of a command, then pass
+ the line to the output engine as a syntax text item. */
+ int n_lines = state.newlines;
if (state.last_segment == SEG_END_COMMAND && !src->suppress_next_newline)
{
n_lines++;
n_lines--;
src->suppress_next_newline = false;
}
- for (i = 0; i < n_lines; i++)
+ for (int i = 0; i < n_lines; i++)
{
- const char *newline;
- const char *line;
- size_t line_len;
- char *syntax;
-
- line = &src->buffer[src->journal_pos - src->tail];
- newline = rawmemchr (line, '\n');
- line_len = newline - line;
+ const char *line = &src->buffer[src->journal_pos - src->tail];
+ const char *newline = rawmemchr (line, '\n');
+ size_t line_len = newline - line;
if (line_len > 0 && line[line_len - 1] == '\r')
line_len--;
- syntax = malloc (line_len + 2);
+ char *syntax = malloc (line_len + 2);
memcpy (syntax, line, line_len);
syntax[line_len] = '\n';
syntax[line_len + 1] = '\0';