+ struct segmenter segmenter;
+ enum segment_type last_segment;
+ int newlines; /* Number of newlines encountered so far. */
+ /* Maintained here so we can update lex_source's similar members when we
+ finish. */
+ size_t line_pos;
+ size_t seg_pos;
+ };
+
+ /* Initialize state. */
+ struct state state =
+ {
+ .segmenter = src->segmenter,
+ .newlines = 0,
+ .seg_pos = src->seg_pos,
+ .line_pos = src->line_pos,
+ };
+ struct state saved = state;
+
+ /* Append a new token to SRC and initialize it. */
+ struct lex_token *token = lex_push_token__ (src);
+ struct scanner scanner;
+ scanner_init (&scanner, &token->token);
+ token->line_pos = src->line_pos;
+ token->token_pos = src->seg_pos;
+ if (src->reader->line_number > 0)
+ token->first_line = src->reader->line_number + src->n_newlines;
+ else
+ token->first_line = 0;
+
+ /* Extract segments and pass them through the scanner until we obtain a
+ token. */
+ for (;;)
+ {
+ /* Extract a segment. */
+ const char *segment = &src->buffer[state.seg_pos - src->tail];
+ size_t seg_maxlen = src->head - state.seg_pos;
+ enum segment_type type;
+ int seg_len = segmenter_push (&state.segmenter, segment, seg_maxlen,
+ src->reader->eof, &type);
+ if (seg_len < 0)
+ {
+ /* The segmenter needs more input to produce a segment. */
+ assert (!src->reader->eof);
+ lex_source_read__ (src);
+ continue;
+ }
+
+ /* Update state based on the segment. */
+ state.last_segment = type;
+ state.seg_pos += seg_len;
+ if (type == SEG_NEWLINE)
+ {
+ state.newlines++;
+ state.line_pos = state.seg_pos;
+ }
+
+ /* Pass the segment into the scanner and try to get a token out. */
+ enum scan_result result = scanner_push (&scanner, type,
+ ss_buffer (segment, seg_len),
+ &token->token);
+ if (result == SCAN_SAVE)
+ saved = state;
+ else if (result == SCAN_BACK)
+ {
+ state = saved;
+ break;
+ }
+ else if (result == SCAN_DONE)
+ break;
+ }
+
+ /* If we've reached the end of a line, or the end of a command, then pass
+ the line to the output engine as a syntax text item. */
+ int n_lines = state.newlines;
+ if (state.last_segment == SEG_END_COMMAND && !src->suppress_next_newline)
+ {
+ n_lines++;
+ src->suppress_next_newline = true;
+ }
+ else if (n_lines > 0 && src->suppress_next_newline)
+ {
+ n_lines--;
+ src->suppress_next_newline = false;
+ }
+ for (int i = 0; i < n_lines; i++)
+ {
+ /* Beginning of line. */
+ const char *line = &src->buffer[src->journal_pos - src->tail];
+
+ /* Calculate line length, including \n or \r\n end-of-line if present.
+
+ We use src->head even though that may be beyond what we've actually
+ converted to tokens (which is only through state.line_pos). That's
+ because, if we're emitting the line due to SEG_END_COMMAND, we want to
+ take the whole line through the newline, not just through the '.'. */
+ size_t max_len = src->head - src->journal_pos;
+ const char *newline = memchr (line, '\n', max_len);
+ size_t line_len = newline ? newline - line + 1 : max_len;
+
+ /* Calculate line length excluding end-of-line. */
+ size_t copy_len = line_len;
+ if (copy_len > 0 && line[copy_len - 1] == '\n')
+ copy_len--;
+ if (copy_len > 0 && line[copy_len - 1] == '\r')
+ copy_len--;
+
+ /* Submit the line as syntax. */
+ text_item_submit (text_item_create_nocopy (TEXT_ITEM_SYNTAX,
+ xmemdup0 (line, copy_len)));
+
+ src->journal_pos += line_len;
+ }
+
+ token->token_len = state.seg_pos - src->seg_pos;
+
+ src->segmenter = state.segmenter;
+ src->seg_pos = state.seg_pos;
+ src->line_pos = state.line_pos;
+ src->n_newlines += state.newlines;
+
+ switch (token->token.type)
+ {
+ default: