int n_newlines; /* Number of new-lines up to seg_pos. */
bool suppress_next_newline;
- /* Tokens. */
- struct deque deque; /* Indexes into 'tokens'. */
- struct lex_token *tokens; /* Lookahead tokens for parser. */
+ /* Tokens.
+
+ This is mostly like a deque, with the conceptual invariant that back <=
+ middle <= front (modulo SIZE_MAX+1). The tokens available for parsing
+ lie between 'back' and 'middle': the token at 'back' is the current
+ token, the token at 'back + 1' is the next token, and so on. There are
+ usually no tokens between 'middle' and 'front'; if there are, then they
+ need to go through macro expansion and are not yet available for
+ parsing.
+
+ 'capacity' is the current number of elements in 'tokens'. It is always
+ a power of 2. 'front', 'middle', and 'back' refer to indexes in
+ 'tokens' modulo 'capacity'. */
+ size_t front;
+ size_t middle;
+ size_t back;
+ size_t capacity;
+ struct lex_token *tokens;
};
static struct lex_source *lex_source_create (struct lexer *,
static const struct lex_token *lex_next__ (const struct lexer *, int n);
static void lex_source_push_endcmd__ (struct lex_source *);
-static void lex_source_pop__ (struct lex_source *);
+static void lex_source_pop_back (struct lex_source *);
static bool lex_source_get (const struct lex_source *);
static void lex_source_error_valist (struct lex_source *, int n0, int n1,
const char *format, va_list)
\f
/* Advancing. */
+/* Adds a new token at the front of SRC and returns a pointer to it. The
+ caller should initialize it. Does not advance the middle pointer, so the
+ token isn't immediately available to the parser. */
static struct lex_token *
lex_push_token__ (struct lex_source *src)
{
- struct lex_token *token;
-
- if (deque_is_full (&src->deque))
- src->tokens = deque_expand (&src->deque, src->tokens, sizeof *src->tokens);
+ if (src->front - src->back >= src->capacity)
+ {
+ /* Expansion works just like a deque, so we reuse the code. */
+ struct deque deque = {
+ .capacity = src->capacity,
+ .front = src->front,
+ .back = src->back,
+ };
+ src->tokens = deque_expand (&deque, src->tokens, sizeof *src->tokens);
+ src->capacity = deque.capacity;
+ }
- token = &src->tokens[deque_push_front (&src->deque)];
+ struct lex_token *token = &src->tokens[src->front++ & (src->capacity - 1)];
token->token = (struct token) { .type = T_STOP };
token->macro_rep = NULL;
token->ref_cnt = NULL;
return token;
}
+/* Removes the current token from SRC and uninitializes it. */
static void
-lex_source_pop__ (struct lex_source *src)
+lex_source_pop_back (struct lex_source *src)
{
- lex_token_uninit (&src->tokens[deque_pop_back (&src->deque)]);
+ assert (src->middle - src->back > 0);
+ lex_token_uninit (&src->tokens[src->back++ & (src->capacity - 1)]);
}
+/* Removes the token at the greatest lookahead from SRC and uninitializes
+ it. */
static void
lex_source_pop_front (struct lex_source *src)
{
- lex_token_uninit (&src->tokens[deque_pop_front (&src->deque)]);
+ assert (src->front - src->middle > 0);
+ lex_token_uninit (&src->tokens[--src->front & (src->capacity - 1)]);
}
/* Advances LEXER to the next token, consuming the current token. */
if (src == NULL)
return;
- if (!deque_is_empty (&src->deque))
- lex_source_pop__ (src);
+ if (src->middle - src->back > 0)
+ lex_source_pop_back (src);
- while (deque_is_empty (&src->deque))
+ while (src->back == src->middle)
if (!lex_source_get (src))
{
lex_source_destroy (src);
}
}
+/* Returns the token in SRC with the greatest lookahead. */
static const struct lex_token *
-lex_source_front (const struct lex_source *src)
+lex_source_middle (const struct lex_source *src)
{
- return &src->tokens[deque_front (&src->deque, 0)];
+ assert (src->middle - src->back > 0);
+ return &src->tokens[(src->middle - 1) & (src->capacity - 1)];
}
static const struct lex_token *
lex_source_next__ (const struct lex_source *src, int n)
{
- while (deque_count (&src->deque) <= n)
+ while (src->middle - src->back <= n)
{
- if (!deque_is_empty (&src->deque))
+ if (src->middle - src->back > 0)
{
- const struct lex_token *front = lex_source_front (src);
- if (front->token.type == T_STOP || front->token.type == T_ENDCMD)
- return front;
+ const struct lex_token *middle = lex_source_middle (src);
+ if (middle->token.type == T_STOP || middle->token.type == T_ENDCMD)
+ return middle;
}
lex_source_get (src);
}
- return &src->tokens[deque_back (&src->deque, n)];
+ return &src->tokens[(src->back + n) & (src->capacity - 1)];
}
/* Returns the "struct token" of the token N after the current one in LEXER.
src->suppress_next_newline = false;
src->segmenter = segmenter_init (segmenter_get_mode (&src->segmenter),
false);
- while (!deque_is_empty (&src->deque))
- lex_source_pop__ (src);
+ while (src->middle - src->back > 0)
+ lex_source_pop_back (src);
+ while (src->front - src->middle > 0)
+ lex_source_pop_front (src);
lex_source_push_endcmd__ (src);
}
}
if (src != NULL)
{
- while (!deque_is_empty (&src->deque))
- lex_source_pop__ (src);
+ while (src->middle - src->back > 0)
+ lex_source_pop_back (src);
for (; src != NULL && src->reader->error != LEX_ERROR_TERMINAL;
src = lex_source__ (lexer))
/* Use the oldest token also. (We know that src->deque cannot be empty
because we are in the process of adding a new token, which is already
initialized enough to use here.) */
- token = &src->tokens[deque_back (&src->deque, 0)];
+ token = &src->tokens[src->back & (src->capacity - 1)];
assert (token->token_pos >= token->line_pos);
max_tail = MIN (max_tail, token->line_pos);
lex_get_error (struct lex_source *src, const char *format, ...)
{
va_list args;
- int n;
-
va_start (args, format);
- n = deque_count (&src->deque) - 1;
+ size_t old_middle = src->middle;
+ src->middle = src->front;
+ size_t n = src->front - src->back - 1;
lex_source_error_valist (src, n, n, format, args);
+ src->middle = old_middle;
+
lex_source_pop_front (src);
va_end (args);
}
-/* Attempts to append an additional token into SRC's deque, reading more from
- the underlying lex_reader if necessary. Returns true if a new token was
- added to SRC's deque, false otherwise. */
+/* Attempts to append an additional token at the front of SRC, reading more
+ from the underlying lex_reader if necessary. Returns true if a new token
+ was added to SRC's deque, false otherwise. The caller should retry failures
+ unless SRC's 'eof' marker was set to true indicating that there will be no
+ more tokens from this source.
+
+ Does not make the new token available for lookahead yet; the caller must
+ adjust SRC's 'middle' pointer to do so. */
static bool
-lex_source_try_get (struct lex_source *src)
+lex_source_try_get__ (struct lex_source *src)
{
/* State maintained while scanning tokens. Usually we only need a single
state, but scanner_push() can return SCAN_SAVE to indicate that the state
NOT_REACHED ();
}
+/* Attempts to add a new token at the front of SRC. Returns true if
+ successful, false on failure. On failure, the end of SRC has been reached
+ and no more tokens will be forthcoming from it.
+
+ Does not make the new token available for lookahead yet; the caller must
+ adjust SRC's 'middle' pointer to do so. */
static bool
lex_source_get__ (struct lex_source *src)
{
- for (;;)
- {
- if (src->eof)
- return false;
- else if (lex_source_try_get (src))
- return true;
- }
+ while (!src->eof)
+ if (lex_source_try_get__ (src))
+ return true;
+ return false;
}
static bool
{
struct lex_source *src = CONST_CAST (struct lex_source *, src_);
- size_t old_count = deque_count (&src->deque);
- if (!lex_source_get__ (src))
- return false;
+ if (src->front - src->middle == 0)
+ {
+ if (!lex_source_get__ (src))
+ return false;
+ }
if (!settings_get_mexpand ())
- return true;
+ {
+ src->middle++;
+ return true;
+ }
struct macro_expander *me;
- int retval = macro_expander_create (src->lexer->macros,
- &lex_source_front (src)->token,
- &me);
- while (!retval)
+ int n_call = macro_expander_create (
+ src->lexer->macros, &src->tokens[src->middle & (src->capacity - 1)].token,
+ &me);
+ for (int middle_ofs = 1; !n_call; middle_ofs++)
{
- if (!lex_source_get__ (src))
+ if (src->front - src->middle <= middle_ofs && !lex_source_get__ (src))
{
/* This should not be reachable because we always get a T_ENDCMD at
the end of an input file (transformed from T_STOP by
- lex_source_try_get()) and the macro_expander should always
+ lex_source_try_get__()) and the macro_expander should always
terminate expansion on T_ENDCMD. */
NOT_REACHED ();
}
- const struct lex_token *front = lex_source_front (src);
- size_t start = front->token_pos;
- size_t end = front->token_pos + front->token_len;
+ const struct lex_token *t = &src->tokens[(src->middle + middle_ofs)
+ & (src->capacity - 1)];
+ size_t start = t->token_pos;
+ size_t end = t->token_pos + t->token_len;
const struct macro_token mt = {
- .token = front->token,
+ .token = t->token,
.representation = ss_buffer (&src->buffer[start - src->tail],
end - start),
};
- retval = macro_expander_add (me, &mt);
+ n_call = macro_expander_add (me, &mt);
}
- if (retval < 0)
+ if (n_call < 0)
{
- /* XXX handle case where there's a macro invocation starting from some
- later token we've already obtained */
+ /* False alarm: no macro expansion after all. Use first token as
+ lookahead. We'll retry macro expansion from the second token next
+ time around. */
macro_expander_destroy (me);
+ src->middle++;
return true;
}
- /* XXX handle case where the macro invocation doesn't use all the tokens */
- const struct lex_token *call_first = lex_source_next__ (src, old_count);
- const struct lex_token *call_last = lex_source_front (src);
+ /* The first 'n_call' tokens starting at 'middle' will be replaced by a
+ macro expansion. There might be more tokens after that, up to 'front'.
+
+ Figure out the boundary of the macro call in the syntax, to go into the
+ lex_tokens for the expansion so that later error messages can report what
+ macro was called. */
+ const struct lex_token *call_first
+ = &src->tokens[src->middle & (src->capacity - 1)];
+ const struct lex_token *call_last
+ = &src->tokens[(src->middle + n_call) & (src->capacity - 1)];
size_t call_pos = call_first->token_pos;
size_t call_len = (call_last->token_pos + call_last->token_len) - call_pos;
size_t line_pos = call_first->line_pos;
int first_line = call_first->first_line;
- while (deque_count (&src->deque) > old_count)
- lex_source_pop_front (src);
+ /* Destroy the tokens for the call, and save any tokens following the call so
+ we can add them back later. */
+ for (size_t i = src->middle; i != src->middle + n_call; i++)
+ lex_token_uninit (&src->tokens[i & (src->capacity - 1)]);
+ size_t n_save = src->front - (src->middle + n_call);
+ struct lex_token *save_tokens = xnmalloc (n_save, sizeof *save_tokens);
+ for (size_t i = 0; i < n_save; i++)
+ save_tokens[i] = src->tokens[(src->middle + n_call + i)
+ & (src->capacity - 1)];
+ src->front = src->middle;
+
+ /* Now expand the macro. */
struct macro_tokens expansion = { .n = 0 };
macro_expander_get_expansion (me, &expansion);
macro_expander_destroy (me);
+ /* Convert the macro expansion into syntax for possible error messages later. */
size_t *ofs = xnmalloc (expansion.n, sizeof *ofs);
size_t *len = xnmalloc (expansion.n, sizeof *len);
struct string s = DS_EMPTY_INITIALIZER;
output_item_submit (text_item_create (TEXT_ITEM_LOG, ds_cstr (&s),
_("Macro Expansion")));
+ /* Append the macro expansion tokens to the lookahead. */
char *macro_rep = ds_steal_cstr (&s);
size_t *ref_cnt = xmalloc (sizeof *ref_cnt);
*ref_cnt = expansion.n;
.len = len[i],
.ref_cnt = ref_cnt,
};
+ src->middle++;
ss_dealloc (&expansion.mts[i].representation);
}
free (ofs);
free (len);
+ /* Finally, put the saved tokens back. */
+ for (size_t i = 0; i < n_save; i++)
+ *lex_push_token__ (src) = save_tokens[i];
+ free (save_tokens);
+
return true;
}
\f
static void
lex_source_push_endcmd__ (struct lex_source *src)
{
- *lex_push_token__ (src) = (struct lex_token) { .token = { .type = T_ENDCMD } };
+ assert (src->back == src->middle && src->middle == src->front);
+ *lex_push_token__ (src) = (struct lex_token) {
+ .token = { .type = T_ENDCMD } };
+ src->middle++;
}
static struct lex_source *
lex_source_create (struct lexer *lexer, struct lex_reader *reader)
{
- struct lex_source *src;
-
- src = xzalloc (sizeof *src);
- src->reader = reader;
- src->segmenter = segmenter_init (reader->syntax, false);
- src->lexer = lexer;
- src->tokens = deque_init (&src->deque, 4, sizeof *src->tokens);
+ struct lex_source *src = xmalloc (sizeof *src);
+ *src = (struct lex_source) {
+ .reader = reader,
+ .segmenter = segmenter_init (reader->syntax, false),
+ .lexer = lexer,
+ };
lex_source_push_endcmd__ (src);
free (file_name);
free (encoding);
free (src->buffer);
- while (!deque_is_empty (&src->deque))
- lex_source_pop__ (src);
+ while (src->middle - src->back > 0)
+ lex_source_pop_back (src);
+ while (src->front - src->middle > 0)
+ lex_source_pop_front (src);
free (src->tokens);
ll_remove (&src->ll);
free (src);