+ else if (min == max)
+ {
+ if (name)
+ lex_error (lexer, _("Expected %ld for %s."), min, name);
+ else
+ lex_error (lexer, _("Expected %ld."), min);
+ }
+ else if (min + 1 == max)
+ {
+ if (name)
+ lex_error (lexer, _("Expected %ld or %ld for %s."), min, min + 1, name);
+ else
+ lex_error (lexer, _("Expected %ld or %ld."), min, min + 1);
+ }
+ else
+ {
+ bool report_lower_bound = (min > INT_MIN / 2) || too_small;
+ bool report_upper_bound = (max < INT_MAX / 2) || too_big;
+
+ if (report_lower_bound && report_upper_bound)
+ {
+ if (name)
+ lex_error (lexer,
+ _("Expected integer between %ld and %ld for %s."),
+ min, max, name);
+ else
+ lex_error (lexer, _("Expected integer between %ld and %ld."),
+ min, max);
+ }
+ else if (report_lower_bound)
+ {
+ if (min == 0)
+ {
+ if (name)
+ lex_error (lexer, _("Expected non-negative integer for %s."),
+ name);
+ else
+ lex_error (lexer, _("Expected non-negative integer."));
+ }
+ else if (min == 1)
+ {
+ if (name)
+ lex_error (lexer, _("Expected positive integer for %s."),
+ name);
+ else
+ lex_error (lexer, _("Expected positive integer."));
+ }
+ else
+ {
+ if (name)
+ lex_error (lexer, _("Expected integer %ld or greater for %s."),
+ min, name);
+ else
+ lex_error (lexer, _("Expected integer %ld or greater."), min);
+ }
+ }
+ else if (report_upper_bound)
+ {
+ if (name)
+ lex_error (lexer,
+ _("Expected integer less than or equal to %ld for %s."),
+ max, name);
+ else
+ lex_error (lexer, _("Expected integer less than or equal to %ld."),
+ max);
+ }
+ else
+ {
+ if (name)
+ lex_error (lexer, _("Integer expected for %s."), name);
+ else
+ lex_error (lexer, _("Integer expected."));
+ }
+ }
+ return false;
+}
+
+/* If the current token is a number, does nothing and returns true.
+ Otherwise, reports an error and returns false. */
+bool
+lex_force_num (struct lexer *lexer)
+{
+ if (lex_is_number (lexer))
+ return true;
+
+ lex_error (lexer, _("expecting number"));
+ return false;
+}
+
+/* If the current token is an number in the closed range [MIN,MAX], does
+ nothing and returns true. Otherwise, reports an error and returns false.
+ If NAME is nonnull, then it is used in the error message. */
+bool
+lex_force_num_range_closed (struct lexer *lexer, const char *name,
+ double min, double max)
+{
+ bool is_number = lex_is_number (lexer);
+ bool too_small = is_number && lex_number (lexer) < min;
+ bool too_big = is_number && lex_number (lexer) > max;
+ if (is_number && !too_small && !too_big)
+ return true;
+
+ if (min > max)
+ {
+ /* Weird, maybe a bug in the caller. Just report that we needed an
+ number. */
+ if (name)
+ lex_error (lexer, _("Number expected for %s."), name);
+ else
+ lex_error (lexer, _("Number expected."));
+ }
+ else if (min == max)
+ {
+ if (name)
+ lex_error (lexer, _("Expected %g for %s."), min, name);
+ else
+ lex_error (lexer, _("Expected %g."), min);
+ }
+ else
+ {
+ bool report_lower_bound = min > -DBL_MAX || too_small;
+ bool report_upper_bound = max < DBL_MAX || too_big;
+
+ if (report_lower_bound && report_upper_bound)
+ {
+ if (name)
+ lex_error (lexer,
+ _("Expected number between %g and %g for %s."),
+ min, max, name);
+ else
+ lex_error (lexer, _("Expected number between %g and %g."),
+ min, max);
+ }
+ else if (report_lower_bound)
+ {
+ if (min == 0)
+ {
+ if (name)
+ lex_error (lexer, _("Expected non-negative number for %s."),
+ name);
+ else
+ lex_error (lexer, _("Expected non-negative number."));
+ }
+ else
+ {
+ if (name)
+ lex_error (lexer, _("Expected number %g or greater for %s."),
+ min, name);
+ else
+ lex_error (lexer, _("Expected number %g or greater."), min);
+ }
+ }
+ else if (report_upper_bound)
+ {
+ if (name)
+ lex_error (lexer,
+ _("Expected number less than or equal to %g for %s."),
+ max, name);
+ else
+ lex_error (lexer, _("Expected number less than or equal to %g."),
+ max);
+ }
+ else
+ {
+ if (name)
+ lex_error (lexer, _("Number expected for %s."), name);
+ else
+ lex_error (lexer, _("Number expected."));
+ }
+ }
+ return false;
+}
+
+/* If the current token is an number in the half-open range [MIN,MAX), does
+ nothing and returns true. Otherwise, reports an error and returns false.
+ If NAME is nonnull, then it is used in the error message. */
+bool
+lex_force_num_range_halfopen (struct lexer *lexer, const char *name,
+ double min, double max)
+{
+ bool is_number = lex_is_number (lexer);
+ bool too_small = is_number && lex_number (lexer) < min;
+ bool too_big = is_number && lex_number (lexer) >= max;
+ if (is_number && !too_small && !too_big)
+ return true;
+
+ if (min >= max)
+ {
+ /* Weird, maybe a bug in the caller. Just report that we needed an
+ number. */
+ if (name)
+ lex_error (lexer, _("Number expected for %s."), name);
+ else
+ lex_error (lexer, _("Number expected."));
+ }
+ else
+ {
+ bool report_lower_bound = min > -DBL_MAX || too_small;
+ bool report_upper_bound = max < DBL_MAX || too_big;
+
+ if (report_lower_bound && report_upper_bound)
+ {
+ if (name)
+ lex_error (lexer, _("Expected number in [%g,%g) for %s."),
+ min, max, name);
+ else
+ lex_error (lexer, _("Expected number in [%g,%g)."),
+ min, max);
+ }
+ else if (report_lower_bound)
+ {
+ if (min == 0)
+ {
+ if (name)
+ lex_error (lexer, _("Expected non-negative number for %s."),
+ name);
+ else
+ lex_error (lexer, _("Expected non-negative number."));
+ }
+ else
+ {
+ if (name)
+ lex_error (lexer, _("Expected number %g or greater for %s."),
+ min, name);
+ else
+ lex_error (lexer, _("Expected number %g or greater."), min);
+ }
+ }
+ else if (report_upper_bound)
+ {
+ if (name)
+ lex_error (lexer,
+ _("Expected number less than %g for %s."), max, name);
+ else
+ lex_error (lexer, _("Expected number less than %g."), max);
+ }
+ else
+ {
+ if (name)
+ lex_error (lexer, _("Number expected for %s."), name);
+ else
+ lex_error (lexer, _("Number expected."));
+ }
+ }
+ return false;
+}
+
+/* If the current token is an number in the open range (MIN,MAX], does
+ nothing and returns true. Otherwise, reports an error and returns false.
+ If NAME is nonnull, then it is used in the error message. */
+bool
+lex_force_num_range_open (struct lexer *lexer, const char *name,
+ double min, double max)
+{
+ bool is_number = lex_is_number (lexer);
+ bool too_small = is_number && lex_number (lexer) <= min;
+ bool too_big = is_number && lex_number (lexer) >= max;
+ if (is_number && !too_small && !too_big)
+ return true;
+
+ if (min >= max)
+ {
+ /* Weird, maybe a bug in the caller. Just report that we needed an
+ number. */
+ if (name)
+ lex_error (lexer, _("Number expected for %s."), name);
+ else
+ lex_error (lexer, _("Number expected."));
+ }
+ else
+ {
+ bool report_lower_bound = min > -DBL_MAX || too_small;
+ bool report_upper_bound = max < DBL_MAX || too_big;
+
+ if (report_lower_bound && report_upper_bound)
+ {
+ if (name)
+ lex_error (lexer, _("Expected number in (%g,%g) for %s."),
+ min, max, name);
+ else
+ lex_error (lexer, _("Expected number in (%g,%g)."), min, max);
+ }
+ else if (report_lower_bound)
+ {
+ if (min == 0)
+ {
+ if (name)
+ lex_error (lexer, _("Expected positive number for %s."), name);
+ else
+ lex_error (lexer, _("Expected positive number."));
+ }
+ else
+ {
+ if (name)
+ lex_error (lexer, _("Expected number greater than %g for %s."),
+ min, name);
+ else
+ lex_error (lexer, _("Expected number greater than %g."), min);
+ }
+ }
+ else if (report_upper_bound)
+ {
+ if (name)
+ lex_error (lexer, _("Expected number less than %g for %s."),
+ max, name);
+ else
+ lex_error (lexer, _("Expected number less than %g."), max);
+ }
+ else
+ {
+ if (name)
+ lex_error (lexer, _("Number expected for %s."), name);
+ else
+ lex_error (lexer, _("Number expected."));
+ }
+ }
+ return false;
+}
+
+/* If the current token is an identifier, does nothing and returns true.
+ Otherwise, reports an error and returns false. */
+bool
+lex_force_id (struct lexer *lexer)
+{
+ if (lex_token (lexer) == T_ID)
+ return true;
+
+ lex_error (lexer, _("expecting identifier"));
+ return false;
+}
+\f
+/* Token accessors. */
+
+/* Returns the type of LEXER's current token. */
+enum token_type
+lex_token (const struct lexer *lexer)
+{
+ return lex_next_token (lexer, 0);
+}
+
+/* Returns the number in LEXER's current token.
+
+ Only T_NEG_NUM and T_POS_NUM tokens have meaningful values. For other
+ tokens this function will always return zero. */
+double
+lex_tokval (const struct lexer *lexer)
+{
+ return lex_next_tokval (lexer, 0);
+}
+
+/* Returns the null-terminated string in LEXER's current token, UTF-8 encoded.
+
+ Only T_ID and T_STRING tokens have meaningful strings. For other tokens
+ this functions this function will always return NULL.
+
+ The UTF-8 encoding of the returned string is correct for variable names and
+ other identifiers. Use filename_to_utf8() to use it as a filename. Use
+ data_in() to use it in a "union value". */
+const char *
+lex_tokcstr (const struct lexer *lexer)
+{
+ return lex_next_tokcstr (lexer, 0);
+}
+
+/* Returns the string in LEXER's current token, UTF-8 encoded. The string is
+ null-terminated (but the null terminator is not included in the returned
+ substring's 'length').
+
+ Only T_ID and T_STRING tokens have meaningful strings. For other tokens
+ this functions this function will always return NULL.
+
+ The UTF-8 encoding of the returned string is correct for variable names and
+ other identifiers. Use filename_to_utf8() to use it as a filename. Use
+ data_in() to use it in a "union value". */
+struct substring
+lex_tokss (const struct lexer *lexer)
+{
+ return lex_next_tokss (lexer, 0);
+}
+\f
+/* Looking ahead.
+
+ A value of 0 for N as an argument to any of these functions refers to the
+ current token. Lookahead is limited to the current command. Any N greater
+ than the number of tokens remaining in the current command will be treated
+ as referring to a T_ENDCMD token. */
+
+static const struct lex_token *
+lex_next__ (const struct lexer *lexer_, int n)
+{
+ struct lexer *lexer = CONST_CAST (struct lexer *, lexer_);
+ struct lex_source *src = lex_source__ (lexer);
+
+ if (src != NULL)
+ return lex_source_next__ (src, n);
+ else
+ {
+ static const struct lex_token stop_token = { .token = { .type = T_STOP } };
+ return &stop_token;
+ }
+}
+
+static const struct lex_token *
+lex_source_ofs__ (const struct lex_source *src_, int ofs)
+{
+ struct lex_source *src = CONST_CAST (struct lex_source *, src_);
+
+ if (ofs < 0)
+ {
+ static const struct lex_token endcmd_token
+ = { .token = { .type = T_ENDCMD } };
+ return &endcmd_token;
+ }
+
+ while (ofs >= src->n_parse)
+ {
+ if (src->n_parse > 0)
+ {
+ const struct lex_token *t = src->parse[src->n_parse - 1];
+ if (t->token.type == T_STOP || t->token.type == T_ENDCMD)
+ return t;
+ }
+
+ lex_source_get_parse (src);
+ }
+
+ return src->parse[ofs];
+}
+
+static const struct lex_token *
+lex_source_next__ (const struct lex_source *src, int n)
+{
+ return lex_source_ofs__ (src, n + src->parse_ofs);
+}
+
+/* Returns the "struct token" of the token N after the current one in LEXER.
+ The returned pointer can be invalidated by pretty much any succeeding call
+ into the lexer, although the string pointer within the returned token is
+ only invalidated by consuming the token (e.g. with lex_get()). */
+const struct token *
+lex_next (const struct lexer *lexer, int n)
+{
+ return &lex_next__ (lexer, n)->token;
+}
+
+/* Returns the type of the token N after the current one in LEXER. */
+enum token_type
+lex_next_token (const struct lexer *lexer, int n)
+{
+ return lex_next (lexer, n)->type;
+}
+
+/* Returns the number in the tokn N after the current one in LEXER.
+
+ Only T_NEG_NUM and T_POS_NUM tokens have meaningful values. For other
+ tokens this function will always return zero. */
+double
+lex_next_tokval (const struct lexer *lexer, int n)
+{
+ return token_number (lex_next (lexer, n));
+}
+
+/* Returns the null-terminated string in the token N after the current one, in
+ UTF-8 encoding.
+
+ Only T_ID and T_STRING tokens have meaningful strings. For other tokens
+ this functions this function will always return NULL.
+
+ The UTF-8 encoding of the returned string is correct for variable names and
+ other identifiers. Use filename_to_utf8() to use it as a filename. Use
+ data_in() to use it in a "union value". */
+const char *
+lex_next_tokcstr (const struct lexer *lexer, int n)
+{
+ return lex_next_tokss (lexer, n).string;
+}
+
+/* Returns the string in the token N after the current one, in UTF-8 encoding.
+ The string is null-terminated (but the null terminator is not included in
+ the returned substring's 'length').
+
+ Only T_ID, T_MACRO_ID, T_STRING tokens have meaningful strings. For other
+ tokens this functions this function will always return NULL.
+
+ The UTF-8 encoding of the returned string is correct for variable names and
+ other identifiers. Use filename_to_utf8() to use it as a filename. Use
+ data_in() to use it in a "union value". */
+struct substring
+lex_next_tokss (const struct lexer *lexer, int n)
+{
+ return lex_next (lexer, n)->string;
+}
+
+/* Returns the offset of the current token within the command being parsed in
+ LEXER. This is 0 for the first token in a command, 1 for the second, and so
+ on. The return value is useful later for referring to this token in calls
+ to lex_ofs_*(). */
+int
+lex_ofs (const struct lexer *lexer)
+{
+ struct lex_source *src = lex_source__ (lexer);
+ return src ? src->parse_ofs : 0;
+}
+
+/* Returns the token within LEXER's current command with offset OFS. Use
+ lex_ofs() to find out the offset of the current token. */
+const struct token *
+lex_ofs_token (const struct lexer *lexer_, int ofs)
+{
+ struct lexer *lexer = CONST_CAST (struct lexer *, lexer_);
+ struct lex_source *src = lex_source__ (lexer);
+
+ if (src != NULL)
+ return &lex_source_next__ (src, ofs - src->parse_ofs)->token;
+ else
+ {
+ static const struct token stop_token = { .type = T_STOP };
+ return &stop_token;
+ }
+}
+
+/* Allocates and returns a new struct msg_location that spans tokens with
+ offsets OFS0 through OFS1, inclusive, within the current command in
+ LEXER. See lex_ofs() for an explanation of token offsets.
+
+ The caller owns and must eventually free the returned object. */
+struct msg_location *
+lex_ofs_location (const struct lexer *lexer, int ofs0, int ofs1)
+{
+ int ofs = lex_ofs (lexer);
+ return lex_get_location (lexer, ofs0 - ofs, ofs1 - ofs);
+}
+
+/* Returns a msg_point for the first character in the token with offset OFS,
+ where offset 0 is the first token in the command currently being parsed, 1
+ the second token, and so on. These are absolute offsets, not relative to
+ the token currently being parsed within the command.
+
+ Returns zeros for a T_STOP token.
+ */
+struct msg_point
+lex_ofs_start_point (const struct lexer *lexer, int ofs)
+{
+ const struct lex_source *src = lex_source__ (lexer);
+ return (src
+ ? lex_token_start_point (src, lex_source_ofs__ (src, ofs))
+ : (struct msg_point) { 0, 0 });
+}
+
+/* Returns a msg_point for the last character, inclusive, in the token with
+ offset OFS, where offset 0 is the first token in the command currently being
+ parsed, 1 the second token, and so on. These are absolute offsets, not
+ relative to the token currently being parsed within the command.
+
+ Returns zeros for a T_STOP token.
+
+ Most of the time, a single token is wholly within a single line of syntax,
+ so that the start and end point for a given offset have the same line
+ number. There are two exceptions: a T_STRING token can be made up of
+ multiple segments on adjacent lines connected with "+" punctuators, and a
+ T_NEG_NUM token can consist of a "-" on one line followed by the number on
+ the next.
+ */
+struct msg_point
+lex_ofs_end_point (const struct lexer *lexer, int ofs)
+{
+ const struct lex_source *src = lex_source__ (lexer);
+ return (src
+ ? lex_token_end_point (src, lex_source_ofs__ (src, ofs))
+ : (struct msg_point) { 0, 0 });
+}
+
+/* Returns the text of the syntax in tokens N0 ahead of the current one,
+ through N1 ahead of the current one, inclusive. (For example, if N0 and N1
+ are both zero, this requests the syntax for the current token.)
+
+ The caller must eventually free the returned string (with free()). The
+ syntax is encoded in UTF-8 and in the original form supplied to the lexer so
+ that, for example, it may include comments, spaces, and new-lines if it
+ spans multiple tokens. Macro expansion, however, has already been
+ performed. */
+char *
+lex_next_representation (const struct lexer *lexer, int n0, int n1)
+{
+ const struct lex_source *src = lex_source__ (lexer);
+ return (src
+ ? lex_source_syntax__ (src, n0 + src->parse_ofs, n1 + src->parse_ofs)
+ : xstrdup (""));
+}
+
+
+/* Returns the text of the syntax in tokens with offsets OFS0 to OFS1,
+ inclusive. (For example, if OFS0 and OFS1 are both zero, this requests the
+ syntax for the first token in the current command.)
+
+ The caller must eventually free the returned string (with free()). The
+ syntax is encoded in UTF-8 and in the original form supplied to the lexer so
+ that, for example, it may include comments, spaces, and new-lines if it
+ spans multiple tokens. Macro expansion, however, has already been
+ performed. */
+char *
+lex_ofs_representation (const struct lexer *lexer, int ofs0, int ofs1)
+{
+ const struct lex_source *src = lex_source__ (lexer);
+ return src ? lex_source_syntax__ (src, ofs0, ofs1) : xstrdup ("");
+}
+
+/* Returns true if the token N ahead of the current one was produced by macro
+ expansion, false otherwise. */
+bool
+lex_next_is_from_macro (const struct lexer *lexer, int n)
+{
+ return lex_next__ (lexer, n)->macro_rep != NULL;
+}
+
+static bool
+lex_tokens_match (const struct token *actual, const struct token *expected)
+{
+ if (actual->type != expected->type)
+ return false;
+
+ switch (actual->type)
+ {
+ case T_POS_NUM:
+ case T_NEG_NUM:
+ return actual->number == expected->number;
+
+ case T_ID:
+ return lex_id_match (expected->string, actual->string);
+
+ case T_STRING:
+ return (actual->string.length == expected->string.length
+ && !memcmp (actual->string.string, expected->string.string,
+ actual->string.length));
+
+ default:
+ return true;
+ }
+}
+
+static size_t
+lex_at_phrase__ (struct lexer *lexer, const char *s)
+{
+ struct string_lexer slex;
+ struct token token;
+
+ size_t i = 0;
+ string_lexer_init (&slex, s, strlen (s), SEG_MODE_INTERACTIVE, true);
+ while (string_lexer_next (&slex, &token))
+ {
+ bool match = lex_tokens_match (lex_next (lexer, i++), &token);
+ token_uninit (&token);
+ if (!match)
+ return 0;
+ }
+ return i;
+}
+
+/* If LEXER is positioned at the sequence of tokens that may be parsed from S,
+ returns true. Otherwise, returns false.
+
+ S may consist of an arbitrary sequence of tokens, e.g. "KRUSKAL-WALLIS",
+ "2SLS", or "END INPUT PROGRAM". Identifiers may be abbreviated to their
+ first three letters. */
+bool
+lex_at_phrase (struct lexer *lexer, const char *s)
+{
+ return lex_at_phrase__ (lexer, s) > 0;
+}
+
+/* If LEXER is positioned at the sequence of tokens that may be parsed from S,
+ skips it and returns true. Otherwise, returns false.
+
+ S may consist of an arbitrary sequence of tokens, e.g. "KRUSKAL-WALLIS",
+ "2SLS", or "END INPUT PROGRAM". Identifiers may be abbreviated to their
+ first three letters. */
+bool
+lex_match_phrase (struct lexer *lexer, const char *s)
+{
+ size_t n = lex_at_phrase__ (lexer, s);
+ if (n > 0)
+ lex_get_n (lexer, n);
+ return n > 0;
+}
+
+/* Returns the 1-based line number of the source text at the byte OFFSET in
+ SRC. */
+static int
+lex_source_ofs_to_line_number (const struct lex_source *src, size_t offset)
+{
+ size_t lo = 0;
+ size_t hi = src->n_lines;
+ for (;;)
+ {
+ size_t mid = (lo + hi) / 2;
+ if (mid + 1 >= src->n_lines)
+ return src->n_lines;
+ else if (offset >= src->lines[mid + 1])
+ lo = mid;
+ else if (offset < src->lines[mid])
+ hi = mid;
+ else
+ return mid + 1;
+ }
+}
+
+/* Returns the 1-based column number of the source text at the byte OFFSET in
+ SRC. */
+static int
+lex_source_ofs_to_column_number (const struct lex_source *src, size_t offset)
+{
+ const char *newline = memrchr (src->buffer, '\n', offset);
+ size_t line_ofs = newline ? newline - src->buffer + 1 : 0;
+ return utf8_count_columns (&src->buffer[line_ofs], offset - line_ofs) + 1;
+}
+
+static struct msg_point
+lex_source_ofs_to_point__ (const struct lex_source *src, size_t offset)
+{
+ return (struct msg_point) {
+ .line = lex_source_ofs_to_line_number (src, offset),
+ .column = lex_source_ofs_to_column_number (src, offset),
+ };
+}
+
+static struct msg_point
+lex_token_start_point (const struct lex_source *src,
+ const struct lex_token *token)
+{
+ return lex_source_ofs_to_point__ (src, token->token_pos);
+}
+
+static struct msg_point
+lex_token_end_point (const struct lex_source *src,
+ const struct lex_token *token)
+{
+ return lex_source_ofs_to_point__ (src, lex_token_end (token));