+ Only T_NEG_NUM and T_POS_NUM tokens have meaningful values. For other
+ tokens this function will always return zero. */
+double
+lex_next_tokval (const struct lexer *lexer, int n)
+{
+ const struct token *token = lex_next (lexer, n);
+ return token->number;
+}
+
+/* Returns the null-terminated string in the token N after the current one, in
+ UTF-8 encoding.
+
+ Only T_ID and T_STRING tokens have meaningful strings. For other tokens
+ this functions this function will always return NULL.
+
+ The UTF-8 encoding of the returned string is correct for variable names and
+ other identifiers. Use filename_to_utf8() to use it as a filename. Use
+ data_in() to use it in a "union value". */
+const char *
+lex_next_tokcstr (const struct lexer *lexer, int n)
+{
+ return lex_next_tokss (lexer, n).string;
+}
+
+/* Returns the string in the token N after the current one, in UTF-8 encoding.
+ The string is null-terminated (but the null terminator is not included in
+ the returned substring's 'length').
+
+ Only T_ID, T_MACRO_ID, T_STRING tokens have meaningful strings. For other
+ tokens this functions this function will always return NULL.
+
+ The UTF-8 encoding of the returned string is correct for variable names and
+ other identifiers. Use filename_to_utf8() to use it as a filename. Use
+ data_in() to use it in a "union value". */
+struct substring
+lex_next_tokss (const struct lexer *lexer, int n)
+{
+ return lex_next (lexer, n)->string;
+}
+
+static bool
+lex_tokens_match (const struct token *actual, const struct token *expected)
+{
+ if (actual->type != expected->type)
+ return false;
+
+ switch (actual->type)
+ {
+ case T_POS_NUM:
+ case T_NEG_NUM:
+ return actual->number == expected->number;
+
+ case T_ID:
+ return lex_id_match (expected->string, actual->string);
+
+ case T_STRING:
+ return (actual->string.length == expected->string.length
+ && !memcmp (actual->string.string, expected->string.string,
+ actual->string.length));
+
+ default:
+ return true;
+ }
+}
+
+/* If LEXER is positioned at the sequence of tokens that may be parsed from S,
+ skips it and returns true. Otherwise, returns false.
+
+ S may consist of an arbitrary sequence of tokens, e.g. "KRUSKAL-WALLIS",
+ "2SLS", or "END INPUT PROGRAM". Identifiers may be abbreviated to their
+ first three letters. */
+bool
+lex_match_phrase (struct lexer *lexer, const char *s)
+{
+ struct string_lexer slex;
+ struct token token;
+ int i;
+
+ i = 0;
+ string_lexer_init (&slex, s, strlen (s), SEG_MODE_INTERACTIVE);
+ while (string_lexer_next (&slex, &token))
+ if (token.type != SCAN_SKIP)
+ {
+ bool match = lex_tokens_match (lex_next (lexer, i++), &token);
+ token_uninit (&token);
+ if (!match)
+ return false;
+ }
+
+ while (i-- > 0)
+ lex_get (lexer);
+ return true;
+}
+
+static int
+lex_source_get_first_line_number (const struct lex_source *src, int n)
+{
+ return lex_source_next__ (src, n)->first_line;
+}
+
+static int
+count_newlines (char *s, size_t length)
+{
+ int n_newlines = 0;
+ char *newline;
+
+ while ((newline = memchr (s, '\n', length)) != NULL)
+ {
+ n_newlines++;
+ length -= (newline + 1) - s;
+ s = newline + 1;
+ }
+
+ return n_newlines;
+}
+
+static int
+lex_source_get_last_line_number (const struct lex_source *src, int n)
+{
+ const struct lex_token *token = lex_source_next__ (src, n);
+
+ if (token->first_line == 0)
+ return 0;
+ else
+ {
+ char *token_str = &src->buffer[token->token_pos - src->tail];
+ return token->first_line + count_newlines (token_str, token->token_len) + 1;
+ }
+}
+
+static int
+count_columns (const char *s_, size_t length)
+{
+ const uint8_t *s = CHAR_CAST (const uint8_t *, s_);
+ int columns;
+ size_t ofs;
+ int mblen;
+
+ columns = 0;
+ for (ofs = 0; ofs < length; ofs += mblen)
+ {
+ ucs4_t uc;
+
+ mblen = u8_mbtouc (&uc, s + ofs, length - ofs);
+ if (uc != '\t')
+ {
+ int width = uc_width (uc, "UTF-8");
+ if (width > 0)
+ columns += width;