+/* Allocates and returns a new struct msg_location that spans tokens with
+ offsets OFS0 through OFS1, inclusive, within the current command in
+ LEXER. See lex_ofs() for an explanation of token offsets.
+
+ The caller owns and must eventually free the returned object. */
+struct msg_location *
+lex_ofs_location (const struct lexer *lexer, int ofs0, int ofs1)
+{
+ int ofs = lex_ofs (lexer);
+ return lex_get_location (lexer, ofs0 - ofs, ofs1 - ofs);
+}
+
+/* Returns a msg_point for the first character in the token with offset OFS,
+ where offset 0 is the first token in the command currently being parsed, 1
+ the second token, and so on. These are absolute offsets, not relative to
+ the token currently being parsed within the command.
+
+ Returns zeros for a T_STOP token.
+ */
+struct msg_point
+lex_ofs_start_point (const struct lexer *lexer, int ofs)
+{
+ const struct lex_source *src = lex_source__ (lexer);
+ return (src
+ ? lex_token_start_point (src, lex_source_ofs__ (src, ofs))
+ : (struct msg_point) { 0, 0 });
+}
+
+/* Returns a msg_point for the last character, inclusive, in the token with
+ offset OFS, where offset 0 is the first token in the command currently being
+ parsed, 1 the second token, and so on. These are absolute offsets, not
+ relative to the token currently being parsed within the command.
+
+ Returns zeros for a T_STOP token.
+
+ Most of the time, a single token is wholly within a single line of syntax,
+ so that the start and end point for a given offset have the same line
+ number. There are two exceptions: a T_STRING token can be made up of
+ multiple segments on adjacent lines connected with "+" punctuators, and a
+ T_NEG_NUM token can consist of a "-" on one line followed by the number on
+ the next.
+ */
+struct msg_point
+lex_ofs_end_point (const struct lexer *lexer, int ofs)
+{
+ const struct lex_source *src = lex_source__ (lexer);
+ return (src
+ ? lex_token_end_point (src, lex_source_ofs__ (src, ofs))
+ : (struct msg_point) { 0, 0 });
+}
+
+/* Returns the text of the syntax in tokens N0 ahead of the current one,
+ through N1 ahead of the current one, inclusive. (For example, if N0 and N1
+ are both zero, this requests the syntax for the current token.)
+
+ The caller must eventually free the returned string (with free()). The
+ syntax is encoded in UTF-8 and in the original form supplied to the lexer so
+ that, for example, it may include comments, spaces, and new-lines if it
+ spans multiple tokens. Macro expansion, however, has already been
+ performed. */
+char *
+lex_next_representation (const struct lexer *lexer, int n0, int n1)
+{
+ const struct lex_source *src = lex_source__ (lexer);
+ return (src
+ ? lex_source_syntax__ (src, n0 + src->parse_ofs, n1 + src->parse_ofs)
+ : xstrdup (""));
+}
+
+
+/* Returns the text of the syntax in tokens with offsets OFS0 to OFS1,
+ inclusive. (For example, if OFS0 and OFS1 are both zero, this requests the
+ syntax for the first token in the current command.)
+
+ The caller must eventually free the returned string (with free()). The
+ syntax is encoded in UTF-8 and in the original form supplied to the lexer so
+ that, for example, it may include comments, spaces, and new-lines if it
+ spans multiple tokens. Macro expansion, however, has already been
+ performed. */
+char *
+lex_ofs_representation (const struct lexer *lexer, int ofs0, int ofs1)
+{
+ const struct lex_source *src = lex_source__ (lexer);
+ return src ? lex_source_syntax__ (src, ofs0, ofs1) : xstrdup ("");
+}
+
+/* Returns true if the token N ahead of the current one was produced by macro
+ expansion, false otherwise. */
+bool
+lex_next_is_from_macro (const struct lexer *lexer, int n)
+{
+ return lex_next__ (lexer, n)->macro_rep != NULL;
+}
+
+static bool
+lex_tokens_match (const struct token *actual, const struct token *expected)
+{
+ if (actual->type != expected->type)
+ return false;
+
+ switch (actual->type)
+ {
+ case T_POS_NUM:
+ case T_NEG_NUM:
+ return actual->number == expected->number;
+
+ case T_ID:
+ return lex_id_match (expected->string, actual->string);
+
+ case T_STRING:
+ return (actual->string.length == expected->string.length
+ && !memcmp (actual->string.string, expected->string.string,
+ actual->string.length));
+
+ default:
+ return true;
+ }
+}
+
+static size_t
+lex_at_phrase__ (struct lexer *lexer, const char *s)
+{
+ struct string_lexer slex;
+ struct token token;
+
+ size_t i = 0;
+ string_lexer_init (&slex, s, strlen (s), SEG_MODE_INTERACTIVE, true);
+ while (string_lexer_next (&slex, &token))
+ {
+ bool match = lex_tokens_match (lex_next (lexer, i++), &token);
+ token_uninit (&token);
+ if (!match)
+ return 0;
+ }
+ return i;
+}
+
+/* If LEXER is positioned at the sequence of tokens that may be parsed from S,
+ returns true. Otherwise, returns false.
+
+ S may consist of an arbitrary sequence of tokens, e.g. "KRUSKAL-WALLIS",
+ "2SLS", or "END INPUT PROGRAM". Identifiers may be abbreviated to their
+ first three letters. */
+bool
+lex_at_phrase (struct lexer *lexer, const char *s)
+{
+ return lex_at_phrase__ (lexer, s) > 0;
+}
+
+/* If LEXER is positioned at the sequence of tokens that may be parsed from S,
+ skips it and returns true. Otherwise, returns false.
+
+ S may consist of an arbitrary sequence of tokens, e.g. "KRUSKAL-WALLIS",
+ "2SLS", or "END INPUT PROGRAM". Identifiers may be abbreviated to their
+ first three letters. */
+bool
+lex_match_phrase (struct lexer *lexer, const char *s)
+{
+ size_t n = lex_at_phrase__ (lexer, s);
+ if (n > 0)
+ lex_get_n (lexer, n);
+ return n > 0;
+}
+
+/* Returns the 1-based line number of the source text at the byte OFFSET in
+ SRC. */
+static int
+lex_source_ofs_to_line_number (const struct lex_source *src, size_t offset)
+{
+ size_t lo = 0;
+ size_t hi = src->n_lines;
+ for (;;)
+ {
+ size_t mid = (lo + hi) / 2;
+ if (mid + 1 >= src->n_lines)
+ return src->n_lines;
+ else if (offset >= src->lines[mid + 1])
+ lo = mid;
+ else if (offset < src->lines[mid])
+ hi = mid;
+ else
+ return mid + 1;
+ }
+}
+
+/* Returns the 1-based column number of the source text at the byte OFFSET in
+ SRC. */
+static int
+lex_source_ofs_to_column_number (const struct lex_source *src, size_t offset)
+{
+ const char *newline = memrchr (src->buffer, '\n', offset);
+ size_t line_ofs = newline ? newline - src->buffer + 1 : 0;
+ return utf8_count_columns (&src->buffer[line_ofs], offset - line_ofs) + 1;
+}
+
+static struct msg_point
+lex_source_ofs_to_point__ (const struct lex_source *src, size_t offset)
+{
+ return (struct msg_point) {
+ .line = lex_source_ofs_to_line_number (src, offset),
+ .column = lex_source_ofs_to_column_number (src, offset),
+ };
+}
+
+static struct msg_point
+lex_token_start_point (const struct lex_source *src,
+ const struct lex_token *token)
+{
+ return lex_source_ofs_to_point__ (src, token->token_pos);
+}
+
+static struct msg_point
+lex_token_end_point (const struct lex_source *src,
+ const struct lex_token *token)
+{
+ return lex_source_ofs_to_point__ (src, lex_token_end (token));
+}
+
+static struct msg_location
+lex_token_location (const struct lex_source *src,
+ const struct lex_token *t0,
+ const struct lex_token *t1)
+{
+ return (struct msg_location) {
+ .file_name = intern_new_if_nonnull (src->reader->file_name),
+ .start = lex_token_start_point (src, t0),
+ .end = lex_token_end_point (src, t1),
+ };
+}
+
+static struct msg_location *
+lex_token_location_rw (const struct lex_source *src,
+ const struct lex_token *t0,
+ const struct lex_token *t1)