+static int
+lex_token_get_first_column (const struct lex_source *src,
+ const struct lex_token *token)
+{
+ return utf8_count_columns (&src->buffer[token->line_pos - src->tail],
+ token->token_pos - token->line_pos) + 1;
+}
+
+static int
+lex_token_get_last_column (const struct lex_source *src,
+ const struct lex_token *token)
+{
+ char *start, *end, *newline;
+
+ start = &src->buffer[token->line_pos - src->tail];
+ end = &src->buffer[(token->token_pos + token->token_len) - src->tail];
+ newline = memrchr (start, '\n', end - start);
+ if (newline != NULL)
+ start = newline + 1;
+ return utf8_count_columns (start, end - start) + 1;
+}
+
+static struct msg_location
+lex_token_location (const struct lex_source *src,
+ const struct lex_token *t0,
+ const struct lex_token *t1)
+{
+ return (struct msg_location) {
+ .file_name = src->reader->file_name,
+ .first_line = t0->first_line,
+ .last_line = lex_token_get_last_line_number (src, t1),
+ .first_column = lex_token_get_first_column (src, t0),
+ .last_column = lex_token_get_last_column (src, t1),
+ };
+}
+
+static struct msg_location *
+lex_token_location_rw (const struct lex_source *src,
+ const struct lex_token *t0,
+ const struct lex_token *t1)
+{
+ struct msg_location location = lex_token_location (src, t0, t1);
+ return msg_location_dup (&location);
+}
+
+static struct msg_location *
+lex_source_get_location (const struct lex_source *src, int n0, int n1)
+{
+ return lex_token_location_rw (src,
+ lex_source_next__ (src, n0),
+ lex_source_next__ (src, n1));
+}
+
+/* Returns the 1-based line number of the start of the syntax that represents
+ the token N after the current one in LEXER. Returns 0 for a T_STOP token or
+ if the token is drawn from a source that does not have line numbers. */
+int
+lex_get_first_line_number (const struct lexer *lexer, int n)
+{
+ const struct lex_source *src = lex_source__ (lexer);
+ return src ? lex_source_next__ (src, n)->first_line : 0;
+}
+
+/* Returns the 1-based line number of the end of the syntax that represents the
+ token N after the current one in LEXER, plus 1. Returns 0 for a T_STOP
+ token or if the token is drawn from a source that does not have line
+ numbers.
+
+ Most of the time, a single token is wholly within a single line of syntax,
+ but there are two exceptions: a T_STRING token can be made up of multiple
+ segments on adjacent lines connected with "+" punctuators, and a T_NEG_NUM
+ token can consist of a "-" on one line followed by the number on the next.
+ */
+int
+lex_get_last_line_number (const struct lexer *lexer, int n)
+{
+ const struct lex_source *src = lex_source__ (lexer);
+ return src ? lex_token_get_last_line_number (src,
+ lex_source_next__ (src, n)) : 0;
+}
+
+/* Returns the 1-based column number of the start of the syntax that represents
+ the token N after the current one in LEXER. Returns 0 for a T_STOP
+ token.
+
+ Column numbers are measured according to the width of characters as shown in
+ a typical fixed-width font, in which CJK characters have width 2 and
+ combining characters have width 0. */
+int
+lex_get_first_column (const struct lexer *lexer, int n)
+{
+ const struct lex_source *src = lex_source__ (lexer);
+ return src ? lex_token_get_first_column (src, lex_source_next__ (src, n)) : 0;
+}
+
+/* Returns the 1-based column number of the end of the syntax that represents
+ the token N after the current one in LEXER, plus 1. Returns 0 for a T_STOP
+ token.
+
+ Column numbers are measured according to the width of characters as shown in
+ a typical fixed-width font, in which CJK characters have width 2 and
+ combining characters have width 0. */
+int
+lex_get_last_column (const struct lexer *lexer, int n)
+{
+ const struct lex_source *src = lex_source__ (lexer);
+ return src ? lex_token_get_last_column (src, lex_source_next__ (src, n)) : 0;
+}
+
+/* Returns the name of the syntax file from which the current command is drawn.
+ Returns NULL for a T_STOP token or if the command's source does not have
+ line numbers.
+
+ There is no version of this function that takes an N argument because
+ lookahead only works to the end of a command and any given command is always
+ within a single syntax file. */
+const char *
+lex_get_file_name (const struct lexer *lexer)
+{
+ struct lex_source *src = lex_source__ (lexer);
+ return src == NULL ? NULL : src->reader->file_name;
+}
+
+/* Returns a newly allocated msg_location for the syntax that represents tokens
+ with 0-based offsets N0...N1, inclusive, from the current token. The caller
+ must eventually free the location (with msg_location_destroy()). */
+struct msg_location *
+lex_get_location (const struct lexer *lexer, int n0, int n1)
+{
+ struct msg_location *loc = lex_get_lines (lexer, n0, n1);
+ loc->first_column = lex_get_first_column (lexer, n0);
+ loc->last_column = lex_get_last_column (lexer, n1);
+ return loc;
+}
+
+/* Returns a newly allocated msg_location for the syntax that represents tokens
+ with 0-based offsets N0...N1, inclusive, from the current token. The
+ location only covers the tokens' lines, not the columns. The caller must
+ eventually free the location (with msg_location_destroy()). */
+struct msg_location *
+lex_get_lines (const struct lexer *lexer, int n0, int n1)
+{
+ struct msg_location *loc = xmalloc (sizeof *loc);
+ *loc = (struct msg_location) {
+ .file_name = xstrdup_if_nonnull (lex_get_file_name (lexer)),
+ .first_line = lex_get_first_line_number (lexer, n0),
+ .last_line = lex_get_last_line_number (lexer, n1),
+ };
+ return loc;