X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Flexer%2Flexer.c;h=35a9afa381f038968fafb27c05a87d57e408b176;hb=23265697ad1bf77242d28c2ab7a5983a264f6aa4;hp=329003406bf0281b0d01f99db81372bd4b5649b0;hpb=510cc9dd9baf3108ba55cfb7893384517c9930b4;p=pspp diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c index 329003406b..35a9afa381 100644 --- a/src/language/lexer/lexer.c +++ b/src/language/lexer/lexer.c @@ -85,6 +85,9 @@ static struct msg_point lex_token_start_point (const struct lex_source *, static struct msg_point lex_token_end_point (const struct lex_source *, const struct lex_token *); +static bool lex_ofs_at_phrase__ (struct lexer *, int ofs, const char *s, + size_t *n_matchedp); + /* Source offset of the last byte in TOKEN. */ static size_t lex_token_end (const struct lex_token *token) @@ -282,17 +285,18 @@ struct lexer }; static struct lex_source *lex_source__ (const struct lexer *); -static char *lex_source_get_syntax__ (const struct lex_source *, - int n0, int n1); +static char *lex_source_syntax__ (const struct lex_source *, + int ofs0, int ofs1); static const struct lex_token *lex_next__ (const struct lexer *, int n); static void lex_source_push_endcmd__ (struct lex_source *); static void lex_source_push_parse (struct lex_source *, struct lex_token *); static void lex_source_clear_parse (struct lex_source *); static bool lex_source_get_parse (struct lex_source *); -static void lex_source_error_valist (struct lex_source *, int n0, int n1, - const char *format, va_list) - PRINTF_FORMAT (4, 0); +static void lex_source_msg_valist (struct lex_source *, enum msg_class, + int ofs0, int ofs1, + const char *format, va_list) + PRINTF_FORMAT (5, 0); static const struct lex_token *lex_source_next__ (const struct lex_source *, int n); @@ -425,27 +429,77 @@ lex_error (struct lexer *lexer, const char *format, ...) va_list args; va_start (args, format); - lex_next_error_valist (lexer, 0, 0, format, args); + lex_ofs_msg_valist (lexer, SE, lex_ofs (lexer), lex_ofs (lexer), + format, args); va_end (args); } -/* Prints a syntax error message containing the current token and - given message MESSAGE (if non-null). */ +/* Prints a syntax error message for the span of tokens N0 through N1, + inclusive, from the current token in LEXER, adding message MESSAGE (if + non-null). */ void -lex_error_valist (struct lexer *lexer, const char *format, va_list args) +lex_next_error (struct lexer *lexer, int n0, int n1, const char *format, ...) { - lex_next_error_valist (lexer, 0, 0, format, args); + va_list args; + + va_start (args, format); + int ofs = lex_ofs (lexer); + lex_ofs_msg_valist (lexer, SE, n0 + ofs, n1 + ofs, format, args); + va_end (args); } -/* Prints a syntax error message containing the current token and - given message MESSAGE (if non-null). */ +/* Prints a syntax error message for the span of tokens with offsets OFS0 + through OFS1, inclusive, within the current command in LEXER, adding message + MESSAGE (if non-null). */ void -lex_next_error (struct lexer *lexer, int n0, int n1, const char *format, ...) +lex_ofs_error (struct lexer *lexer, int ofs0, int ofs1, const char *format, ...) +{ + va_list args; + + va_start (args, format); + lex_ofs_msg_valist (lexer, SE, ofs0, ofs1, format, args); + va_end (args); +} + +/* Prints a message of the given CLASS containing the current token and given + message MESSAGE (if non-null). */ +void +lex_msg (struct lexer *lexer, enum msg_class class, const char *format, ...) +{ + va_list args; + + va_start (args, format); + lex_ofs_msg_valist (lexer, class, lex_ofs (lexer), lex_ofs (lexer), + format, args); + va_end (args); +} + +/* Prints a syntax error message for the span of tokens N0 through N1, + inclusive, from the current token in LEXER, adding message MESSAGE (if + non-null). */ +void +lex_next_msg (struct lexer *lexer, enum msg_class class, int n0, int n1, + const char *format, ...) +{ + va_list args; + + va_start (args, format); + int ofs = lex_ofs (lexer); + lex_ofs_msg_valist (lexer, class, n0 + ofs, n1 + ofs, format, args); + va_end (args); +} + +/* Prints a message of the given CLASS for the span of tokens with offsets OFS0 + through OFS1, inclusive, within the current command in LEXER, adding message + MESSAGE (if non-null). */ +void +lex_ofs_msg (struct lexer *lexer, enum msg_class class, int ofs0, int ofs1, + const char *format, ...) { va_list args; va_start (args, format); - lex_next_error_valist (lexer, n0, n1, format, args); + lex_ofs_msg_valist (lexer, class, ofs0, ofs1, format, args); va_end (args); } @@ -466,18 +520,22 @@ void void lex_error_expecting_valist (struct lexer *lexer, va_list args) { - enum { MAX_OPTIONS = 9 }; - const char *options[MAX_OPTIONS]; - int n = 0; - while (n < MAX_OPTIONS) + const char **options = NULL; + size_t allocated = 0; + size_t n = 0; + + for (;;) { const char *option = va_arg (args, const char *); if (!option) break; + if (n >= allocated) + options = x2nrealloc (options, &allocated, sizeof *options); options[n++] = option; } lex_error_expecting_array (lexer, options, n); + free (options); } void @@ -490,62 +548,82 @@ lex_error_expecting_array (struct lexer *lexer, const char **options, size_t n) break; case 1: - lex_error (lexer, _("expecting %s"), options[0]); + lex_error (lexer, _("Syntax error expecting %s."), options[0]); break; case 2: - lex_error (lexer, _("expecting %s or %s"), options[0], options[1]); + lex_error (lexer, _("Syntax error expecting %s or %s."), + options[0], options[1]); break; case 3: - lex_error (lexer, _("expecting %s, %s, or %s"), options[0], options[1], - options[2]); + lex_error (lexer, _("Syntax error expecting %s, %s, or %s."), + options[0], options[1], options[2]); break; case 4: - lex_error (lexer, _("expecting %s, %s, %s, or %s"), + lex_error (lexer, _("Syntax error expecting %s, %s, %s, or %s."), options[0], options[1], options[2], options[3]); break; case 5: - lex_error (lexer, _("expecting %s, %s, %s, %s, or %s"), + lex_error (lexer, _("Syntax error expecting %s, %s, %s, %s, or %s."), options[0], options[1], options[2], options[3], options[4]); break; case 6: - lex_error (lexer, _("expecting %s, %s, %s, %s, %s, or %s"), + lex_error (lexer, _("Syntax error expecting %s, %s, %s, %s, %s, or %s."), options[0], options[1], options[2], options[3], options[4], options[5]); break; case 7: - lex_error (lexer, _("expecting %s, %s, %s, %s, %s, %s, or %s"), + lex_error (lexer, _("Syntax error expecting %s, %s, %s, %s, %s, %s, " + "or %s."), options[0], options[1], options[2], options[3], options[4], options[5], options[6]); break; case 8: - lex_error (lexer, _("expecting %s, %s, %s, %s, %s, %s, %s, or %s"), + lex_error (lexer, _("Syntax error expecting %s, %s, %s, %s, %s, %s, %s, " + "or %s."), options[0], options[1], options[2], options[3], options[4], options[5], options[6], options[7]); break; default: - lex_error (lexer, NULL); + { + struct string s = DS_EMPTY_INITIALIZER; + for (size_t i = 0; i < n; i++) + { + if (i > 0) + ds_put_cstr (&s, ", "); + ds_put_cstr (&s, options[i]); + } + lex_error (lexer, _("Syntax error expecting one of the following: %s."), + ds_cstr (&s)); + ds_destroy (&s); + } + break; } } /* Reports an error to the effect that subcommand SBC may only be specified - once. - - This function does not take a lexer as an argument or use lex_error(), - because the result would ordinarily just be redundant: "Syntax error at - SUBCOMMAND: Subcommand SUBCOMMAND may only be specified once.", which does - not help the user find the error. */ + once. */ void -lex_sbc_only_once (const char *sbc) +lex_sbc_only_once (struct lexer *lexer, const char *sbc) { - msg (SE, _("Subcommand %s may only be specified once."), sbc); + int ofs = lex_ofs (lexer) - 1; + if (lex_ofs_token (lexer, ofs)->type == T_EQUALS) + ofs--; + + /* lex_ofs_at_phrase__() handles subcommand names that are keywords, such as + BY. */ + if (lex_ofs_at_phrase__ (lexer, ofs, sbc, NULL)) + lex_ofs_error (lexer, ofs, ofs, + _("Subcommand %s may only be specified once."), sbc); + else + msg (SE, _("Subcommand %s may only be specified once."), sbc); } /* Reports an error to the effect that subcommand SBC is missing. @@ -555,9 +633,10 @@ lex_sbc_only_once (const char *sbc) command has been parsed, and so lex_error() would always report "Syntax error at end of command", which does not help the user find the error. */ void -lex_sbc_missing (const char *sbc) +lex_sbc_missing (struct lexer *lexer, const char *sbc) { - msg (SE, _("Required subcommand %s was not specified."), sbc); + lex_ofs_error (lexer, 0, lex_max_ofs (lexer), + _("Required subcommand %s was not specified."), sbc); } /* Reports an error to the effect that specification SPEC may only be specified @@ -565,7 +644,7 @@ lex_sbc_missing (const char *sbc) void lex_spec_only_once (struct lexer *lexer, const char *sbc, const char *spec) { - lex_error (lexer, _("%s may only be specified once within subcommand %s"), + lex_error (lexer, _("%s may only be specified once within subcommand %s."), spec, sbc); } @@ -574,36 +653,18 @@ lex_spec_only_once (struct lexer *lexer, const char *sbc, const char *spec) void lex_spec_missing (struct lexer *lexer, const char *sbc, const char *spec) { - lex_error (lexer, _("Required %s specification missing from %s subcommand"), - sbc, spec); + lex_error (lexer, _("Required %s specification missing from %s subcommand."), + spec, sbc); } -/* Prints a syntax error message containing the current token and - given message MESSAGE (if non-null). */ +/* Prints a syntax error message for the span of tokens with offsets OFS0 + through OFS1, inclusive, within the current command in LEXER, adding message + MESSAGE (if non-null) with the given ARGS. */ void -lex_next_error_valist (struct lexer *lexer, int n0, int n1, - const char *format, va_list args) +lex_ofs_msg_valist (struct lexer *lexer, enum msg_class class, + int ofs0, int ofs1, const char *format, va_list args) { - struct lex_source *src = lex_source__ (lexer); - - if (src != NULL) - lex_source_error_valist (src, n0, n1, format, args); - else - { - struct string s; - - ds_init_empty (&s); - ds_put_format (&s, _("Syntax error at end of input")); - if (format != NULL) - { - ds_put_cstr (&s, ": "); - ds_put_vformat (&s, format, args); - } - if (ds_last (&s) != '.') - ds_put_byte (&s, '.'); - msg (SE, "%s", ds_cstr (&s)); - ds_destroy (&s); - } + lex_source_msg_valist (lex_source__ (lexer), class, ofs0, ofs1, format, args); } /* Checks that we're at end of command. @@ -615,7 +676,7 @@ lex_end_of_command (struct lexer *lexer) { if (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_STOP) { - lex_error (lexer, _("expecting end of command")); + lex_error (lexer, _("Syntax error expecting end of command.")); return CMD_FAILURE; } else @@ -817,7 +878,7 @@ lex_force_string (struct lexer *lexer) return true; else { - lex_error (lexer, _("expecting string")); + lex_error (lexer, _("Syntax error expecting string.")); return false; } } @@ -846,7 +907,7 @@ lex_force_int (struct lexer *lexer) return true; else { - lex_error (lexer, _("expecting integer")); + lex_error (lexer, _("Syntax error expecting integer.")); return false; } } @@ -873,23 +934,25 @@ lex_force_int_range (struct lexer *lexer, const char *name, long min, long max) /* Weird, maybe a bug in the caller. Just report that we needed an integer. */ if (name) - lex_error (lexer, _("Integer expected for %s."), name); + lex_error (lexer, _("Syntax error expecting integer for %s."), name); else - lex_error (lexer, _("Integer expected.")); + lex_error (lexer, _("Syntax error expecting integer.")); } else if (min == max) { if (name) - lex_error (lexer, _("Expected %ld for %s."), min, name); + lex_error (lexer, _("Syntax error expecting %ld for %s."), min, name); else - lex_error (lexer, _("Expected %ld."), min); + lex_error (lexer, _("Syntax error expecting %ld."), min); } else if (min + 1 == max) { if (name) - lex_error (lexer, _("Expected %ld or %ld for %s."), min, min + 1, name); + lex_error (lexer, _("Syntax error expecting %ld or %ld for %s."), + min, min + 1, name); else - lex_error (lexer, _("Expected %ld or %ld."), min, min + 1); + lex_error (lexer, _("Syntax error expecting %ld or %ld."), + min, min + 1); } else { @@ -900,10 +963,12 @@ lex_force_int_range (struct lexer *lexer, const char *name, long min, long max) { if (name) lex_error (lexer, - _("Expected integer between %ld and %ld for %s."), + _("Syntax error expecting integer " + "between %ld and %ld for %s."), min, max, name); else - lex_error (lexer, _("Expected integer between %ld and %ld."), + lex_error (lexer, _("Syntax error expecting integer " + "between %ld and %ld."), min, max); } else if (report_lower_bound) @@ -911,44 +976,53 @@ lex_force_int_range (struct lexer *lexer, const char *name, long min, long max) if (min == 0) { if (name) - lex_error (lexer, _("Expected non-negative integer for %s."), + lex_error (lexer, _("Syntax error expecting " + "non-negative integer for %s."), name); else - lex_error (lexer, _("Expected non-negative integer.")); + lex_error (lexer, _("Syntax error expecting " + "non-negative integer.")); } else if (min == 1) { if (name) - lex_error (lexer, _("Expected positive integer for %s."), + lex_error (lexer, _("Syntax error expecting " + "positive integer for %s."), name); else - lex_error (lexer, _("Expected positive integer.")); + lex_error (lexer, _("Syntax error expecting " + "positive integer.")); } else { if (name) - lex_error (lexer, _("Expected integer %ld or greater for %s."), + lex_error (lexer, _("Syntax error expecting " + "integer %ld or greater for %s."), min, name); else - lex_error (lexer, _("Expected integer %ld or greater."), min); + lex_error (lexer, _("Syntax error expecting " + "integer %ld or greater."), min); } } else if (report_upper_bound) { if (name) lex_error (lexer, - _("Expected integer less than or equal to %ld for %s."), + _("Syntax error expecting integer less than or equal " + "to %ld for %s."), max, name); else - lex_error (lexer, _("Expected integer less than or equal to %ld."), + lex_error (lexer, _("Syntax error expecting integer less than or " + "equal to %ld."), max); } else { if (name) - lex_error (lexer, _("Integer expected for %s."), name); + lex_error (lexer, _("Syntax error expecting integer for %s."), + name); else - lex_error (lexer, _("Integer expected.")); + lex_error (lexer, _("Syntax error expecting integer.")); } } return false; @@ -962,7 +1036,262 @@ lex_force_num (struct lexer *lexer) if (lex_is_number (lexer)) return true; - lex_error (lexer, _("expecting number")); + lex_error (lexer, _("Syntax error expecting number.")); + return false; +} + +/* If the current token is an number in the closed range [MIN,MAX], does + nothing and returns true. Otherwise, reports an error and returns false. + If NAME is nonnull, then it is used in the error message. */ +bool +lex_force_num_range_closed (struct lexer *lexer, const char *name, + double min, double max) +{ + bool is_number = lex_is_number (lexer); + bool too_small = is_number && lex_number (lexer) < min; + bool too_big = is_number && lex_number (lexer) > max; + if (is_number && !too_small && !too_big) + return true; + + if (min > max) + { + /* Weird, maybe a bug in the caller. Just report that we needed an + number. */ + if (name) + lex_error (lexer, _("Syntax error expecting number for %s."), name); + else + lex_error (lexer, _("Syntax error expecting number.")); + } + else if (min == max) + { + if (name) + lex_error (lexer, _("Syntax error expecting number %g for %s."), + min, name); + else + lex_error (lexer, _("Syntax error expecting number %g."), min); + } + else + { + bool report_lower_bound = min > -DBL_MAX || too_small; + bool report_upper_bound = max < DBL_MAX || too_big; + + if (report_lower_bound && report_upper_bound) + { + if (name) + lex_error (lexer, + _("Syntax error expecting number " + "between %g and %g for %s."), + min, max, name); + else + lex_error (lexer, _("Syntax error expecting number " + "between %g and %g."), + min, max); + } + else if (report_lower_bound) + { + if (min == 0) + { + if (name) + lex_error (lexer, _("Syntax error expecting " + "non-negative number for %s."), + name); + else + lex_error (lexer, _("Syntax error expecting " + "non-negative number.")); + } + else + { + if (name) + lex_error (lexer, _("Syntax error expecting number " + "%g or greater for %s."), + min, name); + else + lex_error (lexer, _("Syntax error expecting number " + "%g or greater."), min); + } + } + else if (report_upper_bound) + { + if (name) + lex_error (lexer, + _("Syntax error expecting number " + "less than or equal to %g for %s."), + max, name); + else + lex_error (lexer, _("Syntax error expecting number " + "less than or equal to %g."), + max); + } + else + { + if (name) + lex_error (lexer, _("Syntax error expecting number for %s."), name); + else + lex_error (lexer, _("Syntax error expecting number.")); + } + } + return false; +} + +/* If the current token is an number in the half-open range [MIN,MAX), does + nothing and returns true. Otherwise, reports an error and returns false. + If NAME is nonnull, then it is used in the error message. */ +bool +lex_force_num_range_halfopen (struct lexer *lexer, const char *name, + double min, double max) +{ + bool is_number = lex_is_number (lexer); + bool too_small = is_number && lex_number (lexer) < min; + bool too_big = is_number && lex_number (lexer) >= max; + if (is_number && !too_small && !too_big) + return true; + + if (min >= max) + { + /* Weird, maybe a bug in the caller. Just report that we needed an + number. */ + if (name) + lex_error (lexer, _("Syntax error expecting number for %s."), name); + else + lex_error (lexer, _("Syntax error expecting number.")); + } + else + { + bool report_lower_bound = min > -DBL_MAX || too_small; + bool report_upper_bound = max < DBL_MAX || too_big; + + if (report_lower_bound && report_upper_bound) + { + if (name) + lex_error (lexer, _("Syntax error expecting number " + "in [%g,%g) for %s."), + min, max, name); + else + lex_error (lexer, _("Syntax error expecting number in [%g,%g)."), + min, max); + } + else if (report_lower_bound) + { + if (min == 0) + { + if (name) + lex_error (lexer, _("Syntax error expecting " + "non-negative number for %s."), + name); + else + lex_error (lexer, _("Syntax error expecting " + "non-negative number.")); + } + else + { + if (name) + lex_error (lexer, _("Syntax error expecting " + "number %g or greater for %s."), + min, name); + else + lex_error (lexer, _("Syntax error expecting " + "number %g or greater."), min); + } + } + else if (report_upper_bound) + { + if (name) + lex_error (lexer, + _("Syntax error expecting " + "number less than %g for %s."), max, name); + else + lex_error (lexer, _("Syntax error expecting " + "number less than %g."), max); + } + else + { + if (name) + lex_error (lexer, _("Syntax error expecting number for %s."), name); + else + lex_error (lexer, _("Syntax error expecting number.")); + } + } + return false; +} + +/* If the current token is an number in the open range (MIN,MAX), does + nothing and returns true. Otherwise, reports an error and returns false. + If NAME is nonnull, then it is used in the error message. */ +bool +lex_force_num_range_open (struct lexer *lexer, const char *name, + double min, double max) +{ + bool is_number = lex_is_number (lexer); + bool too_small = is_number && lex_number (lexer) <= min; + bool too_big = is_number && lex_number (lexer) >= max; + if (is_number && !too_small && !too_big) + return true; + + if (min >= max) + { + /* Weird, maybe a bug in the caller. Just report that we needed an + number. */ + if (name) + lex_error (lexer, _("Syntax error expecting number for %s."), name); + else + lex_error (lexer, _("Syntax error expecting number.")); + } + else + { + bool report_lower_bound = min > -DBL_MAX || too_small; + bool report_upper_bound = max < DBL_MAX || too_big; + + if (report_lower_bound && report_upper_bound) + { + if (name) + lex_error (lexer, _("Syntax error expecting number " + "in (%g,%g) for %s."), + min, max, name); + else + lex_error (lexer, _("Syntax error expecting number " + "in (%g,%g)."), min, max); + } + else if (report_lower_bound) + { + if (min == 0) + { + if (name) + lex_error (lexer, _("Syntax error expecting " + "positive number for %s."), name); + else + lex_error (lexer, _("Syntax error expecting " + "positive number.")); + } + else + { + if (name) + lex_error (lexer, _("Syntax error expecting number " + "greater than %g for %s."), + min, name); + else + lex_error (lexer, _("Syntax error expecting number " + "greater than %g."), min); + } + } + else if (report_upper_bound) + { + if (name) + lex_error (lexer, _("Syntax error expecting number " + "less than %g for %s."), + max, name); + else + lex_error (lexer, _("Syntax error expecting number " + "less than %g."), max); + } + else + { + if (name) + lex_error (lexer, _("Syntax error expecting number " + "for %s."), name); + else + lex_error (lexer, _("Syntax error expecting number.")); + } + } return false; } @@ -974,7 +1303,7 @@ lex_force_id (struct lexer *lexer) if (lex_token (lexer) == T_ID) return true; - lex_error (lexer, _("expecting identifier")); + lex_error (lexer, _("Syntax error expecting identifier.")); return false; } @@ -1151,6 +1480,25 @@ lex_ofs (const struct lexer *lexer) return src ? src->parse_ofs : 0; } +/* Returns the offset of the last token in the current command. */ +int +lex_max_ofs (const struct lexer *lexer) +{ + struct lex_source *src = lex_source__ (lexer); + if (!src) + return 0; + + int ofs = MAX (1, src->n_parse) - 1; + for (;;) + { + enum token_type type = lex_source_ofs__ (src, ofs)->token.type; + if (type == T_ENDCMD || type == T_STOP) + return ofs; + + ofs++; + } +} + /* Returns the token within LEXER's current command with offset OFS. Use lex_ofs() to find out the offset of the current token. */ const struct token * @@ -1221,15 +1569,37 @@ lex_ofs_end_point (const struct lexer *lexer, int ofs) /* Returns the text of the syntax in tokens N0 ahead of the current one, through N1 ahead of the current one, inclusive. (For example, if N0 and N1 - are both zero, this requests the syntax for the current token.) The caller - must eventually free the returned string (with free()). The syntax is - encoded in UTF-8 and in the original form supplied to the lexer so that, for - example, it may include comments, spaces, and new-lines if it spans multiple - tokens. Macro expansion, however, has already been performed. */ + are both zero, this requests the syntax for the current token.) + + The caller must eventually free the returned string (with free()). The + syntax is encoded in UTF-8 and in the original form supplied to the lexer so + that, for example, it may include comments, spaces, and new-lines if it + spans multiple tokens. Macro expansion, however, has already been + performed. */ char * lex_next_representation (const struct lexer *lexer, int n0, int n1) { - return lex_source_get_syntax__ (lex_source__ (lexer), n0, n1); + const struct lex_source *src = lex_source__ (lexer); + return (src + ? lex_source_syntax__ (src, n0 + src->parse_ofs, n1 + src->parse_ofs) + : xstrdup ("")); +} + + +/* Returns the text of the syntax in tokens with offsets OFS0 to OFS1, + inclusive. (For example, if OFS0 and OFS1 are both zero, this requests the + syntax for the first token in the current command.) + + The caller must eventually free the returned string (with free()). The + syntax is encoded in UTF-8 and in the original form supplied to the lexer so + that, for example, it may include comments, spaces, and new-lines if it + spans multiple tokens. Macro expansion, however, has already been + performed. */ +char * +lex_ofs_representation (const struct lexer *lexer, int ofs0, int ofs1) +{ + const struct lex_source *src = lex_source__ (lexer); + return src ? lex_source_syntax__ (src, ofs0, ofs1) : xstrdup (""); } /* Returns true if the token N ahead of the current one was produced by macro @@ -1265,22 +1635,31 @@ lex_tokens_match (const struct token *actual, const struct token *expected) } } -static size_t -lex_at_phrase__ (struct lexer *lexer, const char *s) +static bool +lex_ofs_at_phrase__ (struct lexer *lexer, int ofs, const char *s, + size_t *n_matchedp) { struct string_lexer slex; struct token token; - size_t i = 0; + size_t n_matched = 0; + bool all_matched = true; string_lexer_init (&slex, s, strlen (s), SEG_MODE_INTERACTIVE, true); while (string_lexer_next (&slex, &token)) { - bool match = lex_tokens_match (lex_next (lexer, i++), &token); + bool match = lex_tokens_match (lex_ofs_token (lexer, ofs + n_matched), + &token); token_uninit (&token); if (!match) - return 0; + { + all_matched = false; + break; + } + n_matched++; } - return i; + if (n_matchedp) + *n_matchedp = n_matched; + return all_matched; } /* If LEXER is positioned at the sequence of tokens that may be parsed from S, @@ -1292,7 +1671,7 @@ lex_at_phrase__ (struct lexer *lexer, const char *s) bool lex_at_phrase (struct lexer *lexer, const char *s) { - return lex_at_phrase__ (lexer, s) > 0; + return lex_ofs_at_phrase__ (lexer, lex_ofs (lexer), s, NULL); } /* If LEXER is positioned at the sequence of tokens that may be parsed from S, @@ -1304,10 +1683,29 @@ lex_at_phrase (struct lexer *lexer, const char *s) bool lex_match_phrase (struct lexer *lexer, const char *s) { - size_t n = lex_at_phrase__ (lexer, s); - if (n > 0) - lex_get_n (lexer, n); - return n > 0; + size_t n_matched; + if (!lex_ofs_at_phrase__ (lexer, lex_ofs (lexer), s, &n_matched)) + return false; + lex_get_n (lexer, n_matched); + return true; +} + +/* If LEXER is positioned at the sequence of tokens that may be parsed from S, + skips it and returns true. Otherwise, issues an error and returns false. + + S may consist of an arbitrary sequence of tokens, e.g. "KRUSKAL-WALLIS", + "2SLS", or "END INPUT PROGRAM". Identifiers may be abbreviated to their + first three letters. */ +bool +lex_force_match_phrase (struct lexer *lexer, const char *s) +{ + size_t n_matched; + bool ok = lex_ofs_at_phrase__ (lexer, lex_ofs (lexer), s, &n_matched); + if (ok) + lex_get_n (lexer, n_matched); + else + lex_next_error (lexer, 0, n_matched, _("Syntax error expecting `%s'."), s); + return ok; } /* Returns the 1-based line number of the source text at the byte OFFSET in @@ -1373,6 +1771,7 @@ lex_token_location (const struct lex_source *src, .file_name = intern_new_if_nonnull (src->reader->file_name), .start = lex_token_start_point (src, t0), .end = lex_token_end_point (src, t1), + .src = CONST_CAST (struct lex_source *, src), }; } @@ -1386,11 +1785,11 @@ lex_token_location_rw (const struct lex_source *src, } static struct msg_location * -lex_source_get_location (const struct lex_source *src, int n0, int n1) +lex_source_get_location (const struct lex_source *src, int ofs0, int ofs1) { return lex_token_location_rw (src, - lex_source_next__ (src, n0), - lex_source_next__ (src, n1)); + lex_source_ofs__ (src, ofs0), + lex_source_ofs__ (src, ofs1)); } /* Returns the name of the syntax file from which the current command is drawn. @@ -1500,9 +1899,11 @@ void lex_discard_noninteractive (struct lexer *lexer) { struct lex_source *src = lex_source__ (lexer); - if (src != NULL) { + if (src->reader->error == LEX_ERROR_IGNORE) + return; + lex_stage_clear (&src->pp); lex_stage_clear (&src->merge); lex_source_clear_parse (src); @@ -1557,32 +1958,39 @@ lex_source__ (const struct lexer *lexer) : ll_data (ll_head (&lexer->sources), struct lex_source, ll)); } -/* Returns the text of the syntax in SRC for tokens N0 ahead of the current - one, through N1 ahead of the current one, inclusive. (For example, if N0 - and N1 are both zero, this requests the syntax for the current token.) The - caller must eventually free the returned string (with free()). The syntax - is encoded in UTF-8 and in the original form supplied to the lexer so that, - for example, it may include comments, spaces, and new-lines if it spans - multiple tokens. Macro expansion, however, has already been performed. */ +const struct lex_source * +lex_source (const struct lexer *lexer) +{ + return lex_source__ (lexer); +} + +/* Returns the text of the syntax in SRC for tokens with offsets OFS0 through + OFS1 in the current command, inclusive. (For example, if OFS0 and OFS1 are + both zero, this requests the syntax for the first token in the current + command.) The caller must eventually free the returned string (with + free()). The syntax is encoded in UTF-8 and in the original form supplied + to the lexer so that, for example, it may include comments, spaces, and + new-lines if it spans multiple tokens. Macro expansion, however, has + already been performed. */ static char * -lex_source_get_syntax__ (const struct lex_source *src, int n0, int n1) +lex_source_syntax__ (const struct lex_source *src, int ofs0, int ofs1) { struct string s = DS_EMPTY_INITIALIZER; - for (size_t i = n0; i <= n1; ) + for (size_t i = ofs0; i <= ofs1; ) { /* Find [I,J) as the longest sequence of tokens not produced by macro expansion, or otherwise the longest sequence expanded from a single macro call. */ - const struct lex_token *first = lex_source_next__ (src, i); + const struct lex_token *first = lex_source_ofs__ (src, i); size_t j; - for (j = i + 1; j <= n1; j++) + for (j = i + 1; j <= ofs1; j++) { - const struct lex_token *cur = lex_source_next__ (src, j); + const struct lex_token *cur = lex_source_ofs__ (src, j); if ((first->macro_rep != NULL) != (cur->macro_rep != NULL) || first->macro_rep != cur->macro_rep) break; } - const struct lex_token *last = lex_source_next__ (src, j - 1); + const struct lex_token *last = lex_source_ofs__ (src, j - 1); /* Now add the syntax for this sequence of tokens to SRC. */ if (!ds_is_empty (&s)) @@ -1607,10 +2015,10 @@ lex_source_get_syntax__ (const struct lex_source *src, int n0, int n1) } static bool -lex_source_contains_macro_call (struct lex_source *src, int n0, int n1) +lex_source_contains_macro_call (struct lex_source *src, int ofs0, int ofs1) { - for (size_t i = n0; i <= n1; i++) - if (lex_source_next__ (src, i)->macro_rep) + for (int i = ofs0; i <= ofs1; i++) + if (lex_source_ofs__ (src, i)->macro_rep) return true; return false; } @@ -1625,13 +2033,13 @@ lex_source_contains_macro_call (struct lex_source *src, int n0, int n1) The caller must not modify or free the returned string. */ static struct substring -lex_source_get_macro_call (struct lex_source *src, int n0, int n1) +lex_source_get_macro_call (struct lex_source *src, int ofs0, int ofs1) { - if (!lex_source_contains_macro_call (src, n0, n1)) + if (!lex_source_contains_macro_call (src, ofs0, ofs1)) return ss_empty (); - const struct lex_token *token0 = lex_source_next__ (src, n0); - const struct lex_token *token1 = lex_source_next__ (src, MAX (n0, n1)); + const struct lex_token *token0 = lex_source_ofs__ (src, ofs0); + const struct lex_token *token1 = lex_source_ofs__ (src, MAX (ofs0, ofs1)); size_t start = token0->token_pos; size_t end = token1->token_pos + token1->token_len; @@ -1639,63 +2047,39 @@ lex_source_get_macro_call (struct lex_source *src, int n0, int n1) } static void -lex_source_error_valist (struct lex_source *src, int n0, int n1, - const char *format, va_list args) +lex_source_msg_valist (struct lex_source *src, enum msg_class class, + int ofs0, int ofs1, const char *format, va_list args) { - const struct lex_token *token; - struct string s; - - ds_init_empty (&s); + struct string s = DS_EMPTY_INITIALIZER; - token = lex_source_next__ (src, n0); - if (token->token.type == T_ENDCMD) - ds_put_cstr (&s, _("Syntax error at end of command")); - else + if (src) { - /* Get the syntax that caused the error. */ - char *raw_syntax = lex_source_get_syntax__ (src, n0, n1); - char syntax[64]; - str_ellipsize (ss_cstr (raw_syntax), syntax, sizeof syntax); - free (raw_syntax); - /* Get the macro call(s) that expanded to the syntax that caused the error. */ char call[64]; - str_ellipsize (lex_source_get_macro_call (src, n0, n1), + str_ellipsize (lex_source_get_macro_call (src, ofs0, ofs1), call, sizeof call); - - if (syntax[0]) - { - if (call[0]) - ds_put_format (&s, - _("Syntax error at `%s' (in expansion of `%s')"), - syntax, call); - else - ds_put_format (&s, _("Syntax error at `%s'"), syntax); - } - else - { - if (call[0]) - ds_put_format (&s, _("Syntax error in syntax expanded from `%s'"), - call); - else - ds_put_cstr (&s, _("Syntax error")); - } + if (call[0]) + ds_put_format (&s, _("In syntax expanded from `%s'"), call); } + else + ds_put_cstr (&s, _("At end of input")); + if (!ds_is_empty (&s)) + ds_put_cstr (&s, ": "); if (format) - { - ds_put_cstr (&s, ": "); - ds_put_vformat (&s, format, args); - } + ds_put_vformat (&s, format, args); + else + ds_put_cstr (&s, _("Syntax error.")); + if (ds_last (&s) != '.') ds_put_byte (&s, '.'); struct msg *m = xmalloc (sizeof *m); *m = (struct msg) { - .category = MSG_C_SYNTAX, - .severity = MSG_S_ERROR, - .location = lex_source_get_location (src, n0, n1), + .category = msg_class_to_category (class), + .severity = msg_class_to_severity (class), + .location = src ? lex_source_get_location (src, ofs0, ofs1) : NULL, .text = ds_steal_cstr (&s), }; msg_emit (m); @@ -1709,8 +2093,7 @@ lex_get_error (struct lex_source *src, const struct lex_token *token) syntax, sizeof syntax); struct string s = DS_EMPTY_INITIALIZER; - ds_put_format (&s, _("Syntax error at `%s'"), syntax); - ds_put_format (&s, ": %s", token->token.string.string); + ds_put_cstr (&s, token->token.string.string); struct msg *m = xmalloc (sizeof *m); *m = (struct msg) { @@ -2093,7 +2476,7 @@ lex_set_message_handler (struct lexer *lexer, msg_set_handler (&msg_handler); } -void +struct lex_source * lex_source_ref (const struct lex_source *src_) { struct lex_source *src = CONST_CAST (struct lex_source *, src_); @@ -2102,6 +2485,7 @@ lex_source_ref (const struct lex_source *src_) assert (src->n_refs > 0); src->n_refs++; } + return src; } void @@ -2247,9 +2631,7 @@ lex_reader_for_substring_nocopy (struct substring s, const char *encoding) struct lex_reader * lex_reader_for_string (const char *s, const char *encoding) { - struct substring ss; - ss_alloc_substring (&ss, ss_cstr (s)); - return lex_reader_for_substring_nocopy (ss, encoding); + return lex_reader_for_substring_nocopy (ss_clone (ss_cstr (s)), encoding); } /* Formats FORMAT as a printf()-like format string and creates and returns a @@ -2309,6 +2691,13 @@ lex_source_get_line (const struct lex_source *src, int line) return ss_empty (); size_t ofs = src->lines[line - 1]; - size_t end = line >= src->n_lines ? src->length : src->lines[line]; + size_t end; + if (line < src->n_lines) + end = src->lines[line]; + else + { + const char *newline = memchr (src->buffer + ofs, '\n', src->length - ofs); + end = newline ? newline - src->buffer : src->length; + } return ss_buffer (&src->buffer[ofs], end - ofs); }