1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2013, 2016 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/lexer/lexer.h"
32 #include "language/command.h"
33 #include "language/lexer/macro.h"
34 #include "language/lexer/scan.h"
35 #include "language/lexer/segment.h"
36 #include "language/lexer/token.h"
37 #include "libpspp/assertion.h"
38 #include "libpspp/cast.h"
39 #include "libpspp/deque.h"
40 #include "libpspp/i18n.h"
41 #include "libpspp/intern.h"
42 #include "libpspp/ll.h"
43 #include "libpspp/message.h"
44 #include "libpspp/misc.h"
45 #include "libpspp/str.h"
46 #include "libpspp/u8-istream.h"
47 #include "output/journal.h"
48 #include "output/output-item.h"
50 #include "gl/c-ctype.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
53 #include "gl/xmemdup0.h"
56 #define _(msgid) gettext (msgid)
57 #define N_(msgid) msgid
59 /* A token within a lex_source. */
62 /* The regular token information. */
65 /* For a token obtained through the lexer in an ordinary way, this is the
66 location of the token in terms of the lex_source's buffer.
68 For a token produced through macro expansion, this is the entire macro
70 size_t token_pos; /* Offset into src->buffer of token start. */
71 size_t token_len; /* Length of source for token in bytes. */
73 /* For a token obtained through macro expansion, this is just this token.
75 For a token obtained through the lexer in an ordinary way, these are
77 char *macro_rep; /* The whole macro expansion. */
78 size_t ofs; /* Offset of this token in macro_rep. */
79 size_t len; /* Length of this token in macro_rep. */
80 size_t *ref_cnt; /* Number of lex_tokens that refer to macro_rep. */
83 static struct msg_point lex_token_start_point (const struct lex_source *,
84 const struct lex_token *);
85 static struct msg_point lex_token_end_point (const struct lex_source *,
86 const struct lex_token *);
88 static bool lex_ofs_at_phrase__ (struct lexer *, int ofs, const char *s,
91 /* Source offset of the last byte in TOKEN. */
93 lex_token_end (const struct lex_token *token)
95 return token->token_pos + MAX (token->token_len, 1) - 1;
99 lex_token_destroy (struct lex_token *t)
101 token_uninit (&t->token);
104 assert (*t->ref_cnt > 0);
114 /* A deque of lex_tokens that comprises one stage in the token pipeline in a
119 struct lex_token **tokens;
122 static void lex_stage_clear (struct lex_stage *);
123 static void lex_stage_uninit (struct lex_stage *);
125 static size_t lex_stage_count (const struct lex_stage *);
126 static bool lex_stage_is_empty (const struct lex_stage *);
128 static struct lex_token *lex_stage_first (struct lex_stage *);
129 static struct lex_token *lex_stage_nth (struct lex_stage *, size_t ofs);
131 static void lex_stage_push_last (struct lex_stage *, struct lex_token *);
132 static void lex_stage_pop_first (struct lex_stage *);
134 static void lex_stage_shift (struct lex_stage *dst, struct lex_stage *src,
137 /* Deletes all the tokens from STAGE. */
139 lex_stage_clear (struct lex_stage *stage)
141 while (!deque_is_empty (&stage->deque))
142 lex_stage_pop_first (stage);
145 /* Deletes all the tokens from STAGE and frees storage for the deque. */
147 lex_stage_uninit (struct lex_stage *stage)
149 lex_stage_clear (stage);
150 free (stage->tokens);
153 /* Returns true if STAGE contains no tokens, otherwise false. */
155 lex_stage_is_empty (const struct lex_stage *stage)
157 return deque_is_empty (&stage->deque);
160 /* Returns the number of tokens in STAGE. */
162 lex_stage_count (const struct lex_stage *stage)
164 return deque_count (&stage->deque);
167 /* Returns the first token in STAGE, which must be nonempty.
168 The first token is the one accessed with the least lookahead. */
169 static struct lex_token *
170 lex_stage_first (struct lex_stage *stage)
172 return lex_stage_nth (stage, 0);
175 /* Returns the token the given INDEX in STAGE. The first token (with the least
176 lookahead) is 0, the second token is 1, and so on. There must be at least
177 INDEX + 1 tokens in STAGE. */
178 static struct lex_token *
179 lex_stage_nth (struct lex_stage *stage, size_t index)
181 return stage->tokens[deque_back (&stage->deque, index)];
184 /* Adds TOKEN so that it becomes the last token in STAGE. */
186 lex_stage_push_last (struct lex_stage *stage, struct lex_token *token)
188 if (deque_is_full (&stage->deque))
189 stage->tokens = deque_expand (&stage->deque, stage->tokens,
190 sizeof *stage->tokens);
191 stage->tokens[deque_push_front (&stage->deque)] = token;
194 /* Removes and returns the first token from STAGE. */
195 static struct lex_token *
196 lex_stage_take_first (struct lex_stage *stage)
198 return stage->tokens[deque_pop_back (&stage->deque)];
201 /* Removes the first token from STAGE and uninitializes it. */
203 lex_stage_pop_first (struct lex_stage *stage)
205 lex_token_destroy (lex_stage_take_first (stage));
208 /* Removes the first N tokens from SRC, appending them to DST as the last
211 lex_stage_shift (struct lex_stage *dst, struct lex_stage *src, size_t n)
213 for (size_t i = 0; i < n; i++)
214 lex_stage_push_last (dst, lex_stage_take_first (src));
217 /* A source of tokens, corresponding to a syntax file.
219 This is conceptually a lex_reader wrapped with everything needed to convert
220 its UTF-8 bytes into tokens. */
223 struct ll ll; /* In lexer's list of sources. */
227 - One for struct lexer.
229 - One for each struct msg_location that references this source. */
232 struct lex_reader *reader;
234 struct segmenter segmenter;
235 bool eof; /* True if T_STOP was read from 'reader'. */
237 /* Buffer of UTF-8 bytes. */
238 char *buffer; /* Source file contents. */
239 size_t length; /* Number of bytes filled. */
240 size_t allocated; /* Number of bytes allocated. */
242 /* Offsets into 'buffer'. */
243 size_t journal_pos; /* First byte not yet output to journal. */
244 size_t seg_pos; /* First byte not yet scanned as token. */
246 /* Offset into 'buffer' of starts of lines. */
248 size_t n_lines, allocated_lines;
250 bool suppress_next_newline;
254 This is a pipeline with the following stages. Each token eventually
255 made available to the parser passes through of these stages. The stages
256 are named after the processing that happens in each one.
258 Initially, tokens come from the segmenter and scanner to 'pp':
260 - pp: Tokens that need to pass through the macro preprocessor to end up
263 - merge: Tokens that need to pass through scan_merge() to end up in
266 - parse: Tokens available to the client for parsing.
268 'pp' and 'merge' store tokens only temporarily until they pass into
269 'parse'. Tokens then live in 'parse' until the command is fully
270 consumed, at which time they are freed together. */
272 struct lex_stage merge;
273 struct lex_token **parse;
274 size_t n_parse, allocated_parse, parse_ofs;
277 static struct lex_source *lex_source_create (struct lexer *,
278 struct lex_reader *);
283 struct ll_list sources; /* Contains "struct lex_source"s. */
284 struct macro_set *macros;
286 /* Temporarily stores errors and warnings to be emitted by the lexer while
287 lexing is going on, to avoid reentrancy. */
288 struct msg **messages;
289 size_t n_messages, allocated_messages;
292 static struct lex_source *lex_source__ (const struct lexer *);
293 static char *lex_source_syntax__ (const struct lex_source *,
295 static const struct lex_token *lex_next__ (const struct lexer *, int n);
296 static void lex_source_push_endcmd__ (struct lex_source *);
297 static void lex_source_push_parse (struct lex_source *, struct lex_token *);
298 static void lex_source_clear_parse (struct lex_source *);
300 static bool lex_source_get_parse (struct lex_source *);
301 static void lex_source_msg_valist (struct lex_source *, enum msg_class,
303 const char *format, va_list)
304 PRINTF_FORMAT (5, 0);
305 static const struct lex_token *lex_source_next__ (const struct lex_source *,
308 /* Initializes READER with the specified CLASS and otherwise some reasonable
309 defaults. The caller should fill in the others members as desired. */
311 lex_reader_init (struct lex_reader *reader,
312 const struct lex_reader_class *class)
314 reader->class = class;
315 reader->syntax = SEG_MODE_AUTO;
316 reader->error = LEX_ERROR_CONTINUE;
317 reader->file_name = NULL;
318 reader->encoding = NULL;
319 reader->line_number = 0;
323 /* Frees any file name already in READER and replaces it by a copy of
324 FILE_NAME, or if FILE_NAME is null then clears any existing name. */
326 lex_reader_set_file_name (struct lex_reader *reader, const char *file_name)
328 free (reader->file_name);
329 reader->file_name = xstrdup_if_nonnull (file_name);
332 /* Creates and returns a new lexer. */
336 struct lexer *lexer = xmalloc (sizeof *lexer);
337 *lexer = (struct lexer) {
338 .sources = LL_INITIALIZER (lexer->sources),
339 .macros = macro_set_create (),
344 /* Destroys LEXER. */
346 lex_destroy (struct lexer *lexer)
350 struct lex_source *source, *next;
352 assert (!lexer->messages);
354 ll_for_each_safe (source, next, struct lex_source, ll, &lexer->sources)
356 ll_remove (&source->ll);
357 lex_source_unref (source);
359 macro_set_destroy (lexer->macros);
364 /* Adds M to LEXER's set of macros. M replaces any existing macro with the
365 same name. Takes ownership of M. */
367 lex_define_macro (struct lexer *lexer, struct macro *m)
369 macro_set_add (lexer->macros, m);
372 /* Returns LEXER's macro set. The caller should not modify it. */
373 const struct macro_set *
374 lex_get_macros (const struct lexer *lexer)
376 return lexer->macros;
379 /* Inserts READER into LEXER so that the next token read by LEXER comes from
380 READER. Before the caller, LEXER must either be empty or at a T_ENDCMD
383 lex_include (struct lexer *lexer, struct lex_reader *reader)
385 assert (ll_is_empty (&lexer->sources) || lex_token (lexer) == T_ENDCMD);
386 ll_push_head (&lexer->sources, &lex_source_create (lexer, reader)->ll);
389 /* Appends READER to LEXER, so that it will be read after all other current
390 readers have already been read. */
392 lex_append (struct lexer *lexer, struct lex_reader *reader)
394 ll_push_tail (&lexer->sources, &lex_source_create (lexer, reader)->ll);
399 /* Advances LEXER to the next token, consuming the current token. */
401 lex_get (struct lexer *lexer)
403 struct lex_source *src;
405 src = lex_source__ (lexer);
409 if (src->parse_ofs < src->n_parse)
411 if (src->parse[src->parse_ofs]->token.type == T_ENDCMD)
412 lex_source_clear_parse (src);
417 while (src->parse_ofs == src->n_parse)
418 if (!lex_source_get_parse (src))
420 ll_remove (&src->ll);
421 lex_source_unref (src);
422 src = lex_source__ (lexer);
428 /* Advances LEXER by N tokens. */
430 lex_get_n (struct lexer *lexer, size_t n)
436 /* Issuing errors. */
438 /* Prints a syntax error message containing the current token and
439 given message MESSAGE (if non-null). */
441 lex_error (struct lexer *lexer, const char *format, ...)
445 va_start (args, format);
446 lex_ofs_msg_valist (lexer, SE, lex_ofs (lexer), lex_ofs (lexer),
451 /* Prints a syntax error message for the span of tokens N0 through N1,
452 inclusive, from the current token in LEXER, adding message MESSAGE (if
455 lex_next_error (struct lexer *lexer, int n0, int n1, const char *format, ...)
459 va_start (args, format);
460 int ofs = lex_ofs (lexer);
461 lex_ofs_msg_valist (lexer, SE, n0 + ofs, n1 + ofs, format, args);
465 /* Prints a syntax error message for the span of tokens with offsets OFS0
466 through OFS1, inclusive, within the current command in LEXER, adding message
467 MESSAGE (if non-null). */
469 lex_ofs_error (struct lexer *lexer, int ofs0, int ofs1, const char *format, ...)
473 va_start (args, format);
474 lex_ofs_msg_valist (lexer, SE, ofs0, ofs1, format, args);
478 /* Prints a message of the given CLASS containing the current token and given
479 message MESSAGE (if non-null). */
481 lex_msg (struct lexer *lexer, enum msg_class class, const char *format, ...)
485 va_start (args, format);
486 lex_ofs_msg_valist (lexer, class, lex_ofs (lexer), lex_ofs (lexer),
491 /* Prints a syntax error message for the span of tokens N0 through N1,
492 inclusive, from the current token in LEXER, adding message MESSAGE (if
495 lex_next_msg (struct lexer *lexer, enum msg_class class, int n0, int n1,
496 const char *format, ...)
500 va_start (args, format);
501 int ofs = lex_ofs (lexer);
502 lex_ofs_msg_valist (lexer, class, n0 + ofs, n1 + ofs, format, args);
506 /* Prints a message of the given CLASS for the span of tokens with offsets OFS0
507 through OFS1, inclusive, within the current command in LEXER, adding message
508 MESSAGE (if non-null). */
510 lex_ofs_msg (struct lexer *lexer, enum msg_class class, int ofs0, int ofs1,
511 const char *format, ...)
515 va_start (args, format);
516 lex_ofs_msg_valist (lexer, class, ofs0, ofs1, format, args);
520 /* Prints a syntax error message saying that one of the strings provided as
521 varargs, up to the first NULL, is expected. */
523 (lex_error_expecting) (struct lexer *lexer, ...)
527 va_start (args, lexer);
528 lex_error_expecting_valist (lexer, args);
532 /* Prints a syntax error message saying that one of the options provided in
533 ARGS, up to the first NULL, is expected. */
535 lex_error_expecting_valist (struct lexer *lexer, va_list args)
537 const char **options = NULL;
538 size_t allocated = 0;
543 const char *option = va_arg (args, const char *);
548 options = x2nrealloc (options, &allocated, sizeof *options);
549 options[n++] = option;
551 lex_error_expecting_array (lexer, options, n);
556 lex_error_expecting_array (struct lexer *lexer, const char **options, size_t n)
561 lex_error (lexer, NULL);
565 lex_error (lexer, _("Syntax error expecting %s."), options[0]);
569 lex_error (lexer, _("Syntax error expecting %s or %s."),
570 options[0], options[1]);
574 lex_error (lexer, _("Syntax error expecting %s, %s, or %s."),
575 options[0], options[1], options[2]);
579 lex_error (lexer, _("Syntax error expecting %s, %s, %s, or %s."),
580 options[0], options[1], options[2], options[3]);
584 lex_error (lexer, _("Syntax error expecting %s, %s, %s, %s, or %s."),
585 options[0], options[1], options[2], options[3], options[4]);
589 lex_error (lexer, _("Syntax error expecting %s, %s, %s, %s, %s, or %s."),
590 options[0], options[1], options[2], options[3], options[4],
595 lex_error (lexer, _("Syntax error expecting %s, %s, %s, %s, %s, %s, "
597 options[0], options[1], options[2], options[3], options[4],
598 options[5], options[6]);
602 lex_error (lexer, _("Syntax error expecting %s, %s, %s, %s, %s, %s, %s, "
604 options[0], options[1], options[2], options[3], options[4],
605 options[5], options[6], options[7]);
610 struct string s = DS_EMPTY_INITIALIZER;
611 for (size_t i = 0; i < n; i++)
614 ds_put_cstr (&s, ", ");
615 ds_put_cstr (&s, options[i]);
617 lex_error (lexer, _("Syntax error expecting one of the following: %s."),
625 /* Reports an error to the effect that subcommand SBC may only be specified
628 lex_sbc_only_once (struct lexer *lexer, const char *sbc)
630 int ofs = lex_ofs (lexer) - 1;
631 if (lex_ofs_token (lexer, ofs)->type == T_EQUALS)
634 /* lex_ofs_at_phrase__() handles subcommand names that are keywords, such as
636 if (lex_ofs_at_phrase__ (lexer, ofs, sbc, NULL))
637 lex_ofs_error (lexer, ofs, ofs,
638 _("Subcommand %s may only be specified once."), sbc);
640 msg (SE, _("Subcommand %s may only be specified once."), sbc);
643 /* Reports an error to the effect that subcommand SBC is missing.
645 This function does not take a lexer as an argument or use lex_error(),
646 because a missing subcommand can normally be detected only after the whole
647 command has been parsed, and so lex_error() would always report "Syntax
648 error at end of command", which does not help the user find the error. */
650 lex_sbc_missing (struct lexer *lexer, const char *sbc)
652 lex_ofs_error (lexer, 0, lex_max_ofs (lexer),
653 _("Required subcommand %s was not specified."), sbc);
656 /* Reports an error to the effect that specification SPEC may only be specified
657 once within subcommand SBC. */
659 lex_spec_only_once (struct lexer *lexer, const char *sbc, const char *spec)
661 lex_error (lexer, _("%s may only be specified once within subcommand %s."),
665 /* Reports an error to the effect that specification SPEC is missing within
668 lex_spec_missing (struct lexer *lexer, const char *sbc, const char *spec)
670 lex_error (lexer, _("Required %s specification missing from %s subcommand."),
674 /* Prints a syntax error message for the span of tokens with offsets OFS0
675 through OFS1, inclusive, within the current command in LEXER, adding message
676 MESSAGE (if non-null) with the given ARGS. */
678 lex_ofs_msg_valist (struct lexer *lexer, enum msg_class class,
679 int ofs0, int ofs1, const char *format, va_list args)
681 lex_source_msg_valist (lex_source__ (lexer), class, ofs0, ofs1, format, args);
684 /* Checks that we're at end of command.
685 If so, returns a successful command completion code.
686 If not, flags a syntax error and returns an error command
689 lex_end_of_command (struct lexer *lexer)
691 if (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_STOP)
693 lex_error (lexer, _("Syntax error expecting end of command."));
700 /* Token testing functions. */
702 /* Returns true if the current token is a number. */
704 lex_is_number (const struct lexer *lexer)
706 return lex_next_is_number (lexer, 0);
709 /* Returns true if the current token is a string. */
711 lex_is_string (const struct lexer *lexer)
713 return lex_next_is_string (lexer, 0);
716 /* Returns the value of the current token, which must be a
717 floating point number. */
719 lex_number (const struct lexer *lexer)
721 return lex_next_number (lexer, 0);
724 /* Returns true iff the current token is an integer. */
726 lex_is_integer (const struct lexer *lexer)
728 return lex_next_is_integer (lexer, 0);
731 /* Returns the value of the current token, which must be an
734 lex_integer (const struct lexer *lexer)
736 return lex_next_integer (lexer, 0);
739 /* Token testing functions with lookahead.
741 A value of 0 for N as an argument to any of these functions refers to the
742 current token. Lookahead is limited to the current command. Any N greater
743 than the number of tokens remaining in the current command will be treated
744 as referring to a T_ENDCMD token. */
746 /* Returns true if the token N ahead of the current token is a number. */
748 lex_next_is_number (const struct lexer *lexer, int n)
750 return token_is_number (lex_next (lexer, n));
753 /* Returns true if the token N ahead of the current token is a string. */
755 lex_next_is_string (const struct lexer *lexer, int n)
757 return token_is_string (lex_next (lexer, n));
760 /* Returns the value of the token N ahead of the current token, which must be a
761 floating point number. */
763 lex_next_number (const struct lexer *lexer, int n)
765 return token_number (lex_next (lexer, n));
768 /* Returns true if the token N ahead of the current token is an integer. */
770 lex_next_is_integer (const struct lexer *lexer, int n)
772 return token_is_integer (lex_next (lexer, n));
775 /* Returns the value of the token N ahead of the current token, which must be
778 lex_next_integer (const struct lexer *lexer, int n)
780 return token_integer (lex_next (lexer, n));
783 /* Token matching functions. */
785 /* If the current token has the specified TYPE, skips it and returns true.
786 Otherwise, returns false. */
788 lex_match (struct lexer *lexer, enum token_type type)
790 if (lex_token (lexer) == type)
799 /* If the current token matches IDENTIFIER, skips it and returns true.
800 IDENTIFIER may be abbreviated to its first three letters. Otherwise,
803 IDENTIFIER must be an ASCII string. */
805 lex_match_id (struct lexer *lexer, const char *identifier)
807 return lex_match_id_n (lexer, identifier, 3);
810 /* If the current token is IDENTIFIER, skips it and returns true. IDENTIFIER
811 may be abbreviated to its first N letters. Otherwise, returns false.
813 IDENTIFIER must be an ASCII string. */
815 lex_match_id_n (struct lexer *lexer, const char *identifier, size_t n)
817 if (lex_token (lexer) == T_ID
818 && lex_id_match_n (ss_cstr (identifier), lex_tokss (lexer), n))
827 /* If the current token is integer X, skips it and returns true. Otherwise,
830 lex_match_int (struct lexer *lexer, int x)
832 if (lex_is_integer (lexer) && lex_integer (lexer) == x)
841 /* Forced matches. */
843 /* If this token is IDENTIFIER, skips it and returns true. IDENTIFIER may be
844 abbreviated to its first 3 letters. Otherwise, reports an error and returns
847 IDENTIFIER must be an ASCII string. */
849 lex_force_match_id (struct lexer *lexer, const char *identifier)
851 if (lex_match_id (lexer, identifier))
855 lex_error_expecting (lexer, identifier);
860 /* If the current token has the specified TYPE, skips it and returns true.
861 Otherwise, reports an error and returns false. */
863 lex_force_match (struct lexer *lexer, enum token_type type)
865 if (lex_token (lexer) == type)
872 const char *type_string = token_type_to_string (type);
875 char *s = xasprintf ("`%s'", type_string);
876 lex_error_expecting (lexer, s);
880 lex_error_expecting (lexer, token_type_to_name (type));
886 /* If the current token is a string, does nothing and returns true.
887 Otherwise, reports an error and returns false. */
889 lex_force_string (struct lexer *lexer)
891 if (lex_is_string (lexer))
895 lex_error (lexer, _("Syntax error expecting string."));
900 /* If the current token is a string or an identifier, does nothing and returns
901 true. Otherwise, reports an error and returns false.
903 This is meant for use in syntactic situations where we want to encourage the
904 user to supply a quoted string, but for compatibility we also accept
905 identifiers. (One example of such a situation is file names.) Therefore,
906 the error message issued when the current token is wrong only says that a
907 string is expected and doesn't mention that an identifier would also be
910 lex_force_string_or_id (struct lexer *lexer)
912 return lex_token (lexer) == T_ID || lex_force_string (lexer);
915 /* If the current token is an integer, does nothing and returns true.
916 Otherwise, reports an error and returns false. */
918 lex_force_int (struct lexer *lexer)
920 if (lex_is_integer (lexer))
924 lex_error (lexer, _("Syntax error expecting integer."));
929 /* If the current token is an integer in the range MIN...MAX (inclusive), does
930 nothing and returns true. Otherwise, reports an error and returns false.
931 If NAME is nonnull, then it is used in the error message. */
933 lex_force_int_range (struct lexer *lexer, const char *name, long min, long max)
935 bool is_number = lex_is_number (lexer);
936 bool is_integer = lex_is_integer (lexer);
937 bool too_small = (is_integer ? lex_integer (lexer) < min
938 : is_number ? lex_number (lexer) < min
940 bool too_big = (is_integer ? lex_integer (lexer) > max
941 : is_number ? lex_number (lexer) > max
943 if (is_integer && !too_small && !too_big)
948 /* Weird, maybe a bug in the caller. Just report that we needed an
951 lex_error (lexer, _("Syntax error expecting integer for %s."), name);
953 lex_error (lexer, _("Syntax error expecting integer."));
958 lex_error (lexer, _("Syntax error expecting %ld for %s."), min, name);
960 lex_error (lexer, _("Syntax error expecting %ld."), min);
962 else if (min + 1 == max)
965 lex_error (lexer, _("Syntax error expecting %ld or %ld for %s."),
968 lex_error (lexer, _("Syntax error expecting %ld or %ld."),
973 bool report_lower_bound = (min > INT_MIN / 2) || too_small;
974 bool report_upper_bound = (max < INT_MAX / 2) || too_big;
976 if (report_lower_bound && report_upper_bound)
980 _("Syntax error expecting integer "
981 "between %ld and %ld for %s."),
984 lex_error (lexer, _("Syntax error expecting integer "
985 "between %ld and %ld."),
988 else if (report_lower_bound)
993 lex_error (lexer, _("Syntax error expecting "
994 "non-negative integer for %s."),
997 lex_error (lexer, _("Syntax error expecting "
998 "non-negative integer."));
1003 lex_error (lexer, _("Syntax error expecting "
1004 "positive integer for %s."),
1007 lex_error (lexer, _("Syntax error expecting "
1008 "positive integer."));
1013 lex_error (lexer, _("Syntax error expecting "
1014 "integer %ld or greater for %s."),
1017 lex_error (lexer, _("Syntax error expecting "
1018 "integer %ld or greater."), min);
1021 else if (report_upper_bound)
1025 _("Syntax error expecting integer less than or equal "
1029 lex_error (lexer, _("Syntax error expecting integer less than or "
1036 lex_error (lexer, _("Syntax error expecting integer for %s."),
1039 lex_error (lexer, _("Syntax error expecting integer."));
1045 /* If the current token is a number, does nothing and returns true.
1046 Otherwise, reports an error and returns false. */
1048 lex_force_num (struct lexer *lexer)
1050 if (lex_is_number (lexer))
1053 lex_error (lexer, _("Syntax error expecting number."));
1057 /* If the current token is an number in the closed range [MIN,MAX], does
1058 nothing and returns true. Otherwise, reports an error and returns false.
1059 If NAME is nonnull, then it is used in the error message. */
1061 lex_force_num_range_closed (struct lexer *lexer, const char *name,
1062 double min, double max)
1064 bool is_number = lex_is_number (lexer);
1065 bool too_small = is_number && lex_number (lexer) < min;
1066 bool too_big = is_number && lex_number (lexer) > max;
1067 if (is_number && !too_small && !too_big)
1072 /* Weird, maybe a bug in the caller. Just report that we needed an
1075 lex_error (lexer, _("Syntax error expecting number for %s."), name);
1077 lex_error (lexer, _("Syntax error expecting number."));
1079 else if (min == max)
1082 lex_error (lexer, _("Syntax error expecting number %g for %s."),
1085 lex_error (lexer, _("Syntax error expecting number %g."), min);
1089 bool report_lower_bound = min > -DBL_MAX || too_small;
1090 bool report_upper_bound = max < DBL_MAX || too_big;
1092 if (report_lower_bound && report_upper_bound)
1096 _("Syntax error expecting number "
1097 "between %g and %g for %s."),
1100 lex_error (lexer, _("Syntax error expecting number "
1101 "between %g and %g."),
1104 else if (report_lower_bound)
1109 lex_error (lexer, _("Syntax error expecting "
1110 "non-negative number for %s."),
1113 lex_error (lexer, _("Syntax error expecting "
1114 "non-negative number."));
1119 lex_error (lexer, _("Syntax error expecting number "
1120 "%g or greater for %s."),
1123 lex_error (lexer, _("Syntax error expecting number "
1124 "%g or greater."), min);
1127 else if (report_upper_bound)
1131 _("Syntax error expecting number "
1132 "less than or equal to %g for %s."),
1135 lex_error (lexer, _("Syntax error expecting number "
1136 "less than or equal to %g."),
1142 lex_error (lexer, _("Syntax error expecting number for %s."), name);
1144 lex_error (lexer, _("Syntax error expecting number."));
1150 /* If the current token is an number in the half-open range [MIN,MAX), does
1151 nothing and returns true. Otherwise, reports an error and returns false.
1152 If NAME is nonnull, then it is used in the error message. */
1154 lex_force_num_range_halfopen (struct lexer *lexer, const char *name,
1155 double min, double max)
1157 bool is_number = lex_is_number (lexer);
1158 bool too_small = is_number && lex_number (lexer) < min;
1159 bool too_big = is_number && lex_number (lexer) >= max;
1160 if (is_number && !too_small && !too_big)
1165 /* Weird, maybe a bug in the caller. Just report that we needed an
1168 lex_error (lexer, _("Syntax error expecting number for %s."), name);
1170 lex_error (lexer, _("Syntax error expecting number."));
1174 bool report_lower_bound = min > -DBL_MAX || too_small;
1175 bool report_upper_bound = max < DBL_MAX || too_big;
1177 if (report_lower_bound && report_upper_bound)
1180 lex_error (lexer, _("Syntax error expecting number "
1181 "in [%g,%g) for %s."),
1184 lex_error (lexer, _("Syntax error expecting number in [%g,%g)."),
1187 else if (report_lower_bound)
1192 lex_error (lexer, _("Syntax error expecting "
1193 "non-negative number for %s."),
1196 lex_error (lexer, _("Syntax error expecting "
1197 "non-negative number."));
1202 lex_error (lexer, _("Syntax error expecting "
1203 "number %g or greater for %s."),
1206 lex_error (lexer, _("Syntax error expecting "
1207 "number %g or greater."), min);
1210 else if (report_upper_bound)
1214 _("Syntax error expecting "
1215 "number less than %g for %s."), max, name);
1217 lex_error (lexer, _("Syntax error expecting "
1218 "number less than %g."), max);
1223 lex_error (lexer, _("Syntax error expecting number for %s."), name);
1225 lex_error (lexer, _("Syntax error expecting number."));
1231 /* If the current token is an number in the open range (MIN,MAX), does
1232 nothing and returns true. Otherwise, reports an error and returns false.
1233 If NAME is nonnull, then it is used in the error message. */
1235 lex_force_num_range_open (struct lexer *lexer, const char *name,
1236 double min, double max)
1238 bool is_number = lex_is_number (lexer);
1239 bool too_small = is_number && lex_number (lexer) <= min;
1240 bool too_big = is_number && lex_number (lexer) >= max;
1241 if (is_number && !too_small && !too_big)
1246 /* Weird, maybe a bug in the caller. Just report that we needed an
1249 lex_error (lexer, _("Syntax error expecting number for %s."), name);
1251 lex_error (lexer, _("Syntax error expecting number."));
1255 bool report_lower_bound = min > -DBL_MAX || too_small;
1256 bool report_upper_bound = max < DBL_MAX || too_big;
1258 if (report_lower_bound && report_upper_bound)
1261 lex_error (lexer, _("Syntax error expecting number "
1262 "in (%g,%g) for %s."),
1265 lex_error (lexer, _("Syntax error expecting number "
1266 "in (%g,%g)."), min, max);
1268 else if (report_lower_bound)
1273 lex_error (lexer, _("Syntax error expecting "
1274 "positive number for %s."), name);
1276 lex_error (lexer, _("Syntax error expecting "
1277 "positive number."));
1282 lex_error (lexer, _("Syntax error expecting number "
1283 "greater than %g for %s."),
1286 lex_error (lexer, _("Syntax error expecting number "
1287 "greater than %g."), min);
1290 else if (report_upper_bound)
1293 lex_error (lexer, _("Syntax error expecting number "
1294 "less than %g for %s."),
1297 lex_error (lexer, _("Syntax error expecting number "
1298 "less than %g."), max);
1303 lex_error (lexer, _("Syntax error expecting number "
1306 lex_error (lexer, _("Syntax error expecting number."));
1312 /* If the current token is an identifier, does nothing and returns true.
1313 Otherwise, reports an error and returns false. */
1315 lex_force_id (struct lexer *lexer)
1317 if (lex_token (lexer) == T_ID)
1320 lex_error (lexer, _("Syntax error expecting identifier."));
1324 /* Token accessors. */
1326 /* Returns the type of LEXER's current token. */
1328 lex_token (const struct lexer *lexer)
1330 return lex_next_token (lexer, 0);
1333 /* Returns the number in LEXER's current token.
1335 Only T_NEG_NUM and T_POS_NUM tokens have meaningful values. For other
1336 tokens this function will always return zero. */
1338 lex_tokval (const struct lexer *lexer)
1340 return lex_next_tokval (lexer, 0);
1343 /* Returns the null-terminated string in LEXER's current token, UTF-8 encoded.
1345 Only T_ID and T_STRING tokens have meaningful strings. For other tokens
1346 this functions this function will always return NULL.
1348 The UTF-8 encoding of the returned string is correct for variable names and
1349 other identifiers. Use filename_to_utf8() to use it as a filename. Use
1350 data_in() to use it in a "union value". */
1352 lex_tokcstr (const struct lexer *lexer)
1354 return lex_next_tokcstr (lexer, 0);
1357 /* Returns the string in LEXER's current token, UTF-8 encoded. The string is
1358 null-terminated (but the null terminator is not included in the returned
1359 substring's 'length').
1361 Only T_ID and T_STRING tokens have meaningful strings. For other tokens
1362 this functions this function will always return NULL.
1364 The UTF-8 encoding of the returned string is correct for variable names and
1365 other identifiers. Use filename_to_utf8() to use it as a filename. Use
1366 data_in() to use it in a "union value". */
1368 lex_tokss (const struct lexer *lexer)
1370 return lex_next_tokss (lexer, 0);
1375 A value of 0 for N as an argument to any of these functions refers to the
1376 current token. Lookahead is limited to the current command. Any N greater
1377 than the number of tokens remaining in the current command will be treated
1378 as referring to a T_ENDCMD token. */
1380 static const struct lex_token *
1381 lex_next__ (const struct lexer *lexer_, int n)
1383 struct lexer *lexer = CONST_CAST (struct lexer *, lexer_);
1384 struct lex_source *src = lex_source__ (lexer);
1387 return lex_source_next__ (src, n);
1390 static const struct lex_token stop_token = { .token = { .type = T_STOP } };
1395 static const struct lex_token *
1396 lex_source_ofs__ (const struct lex_source *src_, int ofs)
1398 struct lex_source *src = CONST_CAST (struct lex_source *, src_);
1402 static const struct lex_token endcmd_token
1403 = { .token = { .type = T_ENDCMD } };
1404 return &endcmd_token;
1407 while (ofs >= src->n_parse)
1409 if (src->n_parse > 0)
1411 const struct lex_token *t = src->parse[src->n_parse - 1];
1412 if (t->token.type == T_STOP || t->token.type == T_ENDCMD)
1416 lex_source_get_parse (src);
1419 return src->parse[ofs];
1422 static const struct lex_token *
1423 lex_source_next__ (const struct lex_source *src, int n)
1425 return lex_source_ofs__ (src, n + src->parse_ofs);
1428 /* Returns the "struct token" of the token N after the current one in LEXER.
1429 The returned pointer can be invalidated by pretty much any succeeding call
1430 into the lexer, although the string pointer within the returned token is
1431 only invalidated by consuming the token (e.g. with lex_get()). */
1432 const struct token *
1433 lex_next (const struct lexer *lexer, int n)
1435 return &lex_next__ (lexer, n)->token;
1438 /* Returns the type of the token N after the current one in LEXER. */
1440 lex_next_token (const struct lexer *lexer, int n)
1442 return lex_next (lexer, n)->type;
1445 /* Returns the number in the tokn N after the current one in LEXER.
1447 Only T_NEG_NUM and T_POS_NUM tokens have meaningful values. For other
1448 tokens this function will always return zero. */
1450 lex_next_tokval (const struct lexer *lexer, int n)
1452 return token_number (lex_next (lexer, n));
1455 /* Returns the null-terminated string in the token N after the current one, in
1458 Only T_ID and T_STRING tokens have meaningful strings. For other tokens
1459 this functions this function will always return NULL.
1461 The UTF-8 encoding of the returned string is correct for variable names and
1462 other identifiers. Use filename_to_utf8() to use it as a filename. Use
1463 data_in() to use it in a "union value". */
1465 lex_next_tokcstr (const struct lexer *lexer, int n)
1467 return lex_next_tokss (lexer, n).string;
1470 /* Returns the string in the token N after the current one, in UTF-8 encoding.
1471 The string is null-terminated (but the null terminator is not included in
1472 the returned substring's 'length').
1474 Only T_ID, T_MACRO_ID, T_STRING tokens have meaningful strings. For other
1475 tokens this functions this function will always return NULL.
1477 The UTF-8 encoding of the returned string is correct for variable names and
1478 other identifiers. Use filename_to_utf8() to use it as a filename. Use
1479 data_in() to use it in a "union value". */
1481 lex_next_tokss (const struct lexer *lexer, int n)
1483 return lex_next (lexer, n)->string;
1486 /* Returns the offset of the current token within the command being parsed in
1487 LEXER. This is 0 for the first token in a command, 1 for the second, and so
1488 on. The return value is useful later for referring to this token in calls
1491 lex_ofs (const struct lexer *lexer)
1493 struct lex_source *src = lex_source__ (lexer);
1494 return src ? src->parse_ofs : 0;
1497 /* Returns the offset of the last token in the current command. */
1499 lex_max_ofs (const struct lexer *lexer)
1501 struct lex_source *src = lex_source__ (lexer);
1505 int ofs = MAX (1, src->n_parse) - 1;
1508 enum token_type type = lex_source_ofs__ (src, ofs)->token.type;
1509 if (type == T_ENDCMD || type == T_STOP)
1516 /* Returns the token within LEXER's current command with offset OFS. Use
1517 lex_ofs() to find out the offset of the current token. */
1518 const struct token *
1519 lex_ofs_token (const struct lexer *lexer_, int ofs)
1521 struct lexer *lexer = CONST_CAST (struct lexer *, lexer_);
1522 struct lex_source *src = lex_source__ (lexer);
1525 return &lex_source_next__ (src, ofs - src->parse_ofs)->token;
1528 static const struct token stop_token = { .type = T_STOP };
1533 /* Allocates and returns a new struct msg_location that spans tokens with
1534 offsets OFS0 through OFS1, inclusive, within the current command in
1535 LEXER. See lex_ofs() for an explanation of token offsets.
1537 The caller owns and must eventually free the returned object. */
1538 struct msg_location *
1539 lex_ofs_location (const struct lexer *lexer, int ofs0, int ofs1)
1541 int ofs = lex_ofs (lexer);
1542 return lex_get_location (lexer, ofs0 - ofs, ofs1 - ofs);
1545 /* Returns a msg_point for the first character in the token with offset OFS,
1546 where offset 0 is the first token in the command currently being parsed, 1
1547 the second token, and so on. These are absolute offsets, not relative to
1548 the token currently being parsed within the command.
1550 Returns zeros for a T_STOP token.
1553 lex_ofs_start_point (const struct lexer *lexer, int ofs)
1555 const struct lex_source *src = lex_source__ (lexer);
1557 ? lex_token_start_point (src, lex_source_ofs__ (src, ofs))
1558 : (struct msg_point) { 0, 0 });
1561 /* Returns a msg_point for the last character, inclusive, in the token with
1562 offset OFS, where offset 0 is the first token in the command currently being
1563 parsed, 1 the second token, and so on. These are absolute offsets, not
1564 relative to the token currently being parsed within the command.
1566 Returns zeros for a T_STOP token.
1568 Most of the time, a single token is wholly within a single line of syntax,
1569 so that the start and end point for a given offset have the same line
1570 number. There are two exceptions: a T_STRING token can be made up of
1571 multiple segments on adjacent lines connected with "+" punctuators, and a
1572 T_NEG_NUM token can consist of a "-" on one line followed by the number on
1576 lex_ofs_end_point (const struct lexer *lexer, int ofs)
1578 const struct lex_source *src = lex_source__ (lexer);
1580 ? lex_token_end_point (src, lex_source_ofs__ (src, ofs))
1581 : (struct msg_point) { 0, 0 });
1584 /* Returns the text of the syntax in tokens N0 ahead of the current one,
1585 through N1 ahead of the current one, inclusive. (For example, if N0 and N1
1586 are both zero, this requests the syntax for the current token.)
1588 The caller must eventually free the returned string (with free()). The
1589 syntax is encoded in UTF-8 and in the original form supplied to the lexer so
1590 that, for example, it may include comments, spaces, and new-lines if it
1591 spans multiple tokens. Macro expansion, however, has already been
1594 lex_next_representation (const struct lexer *lexer, int n0, int n1)
1596 const struct lex_source *src = lex_source__ (lexer);
1598 ? lex_source_syntax__ (src, n0 + src->parse_ofs, n1 + src->parse_ofs)
1603 /* Returns the text of the syntax in tokens with offsets OFS0 to OFS1,
1604 inclusive. (For example, if OFS0 and OFS1 are both zero, this requests the
1605 syntax for the first token in the current command.)
1607 The caller must eventually free the returned string (with free()). The
1608 syntax is encoded in UTF-8 and in the original form supplied to the lexer so
1609 that, for example, it may include comments, spaces, and new-lines if it
1610 spans multiple tokens. Macro expansion, however, has already been
1613 lex_ofs_representation (const struct lexer *lexer, int ofs0, int ofs1)
1615 const struct lex_source *src = lex_source__ (lexer);
1616 return src ? lex_source_syntax__ (src, ofs0, ofs1) : xstrdup ("");
1619 /* Returns true if the token N ahead of the current one was produced by macro
1620 expansion, false otherwise. */
1622 lex_next_is_from_macro (const struct lexer *lexer, int n)
1624 return lex_next__ (lexer, n)->macro_rep != NULL;
1628 lex_tokens_match (const struct token *actual, const struct token *expected)
1630 if (actual->type != expected->type)
1633 switch (actual->type)
1637 return actual->number == expected->number;
1640 return lex_id_match (expected->string, actual->string);
1643 return (actual->string.length == expected->string.length
1644 && !memcmp (actual->string.string, expected->string.string,
1645 actual->string.length));
1653 lex_ofs_at_phrase__ (struct lexer *lexer, int ofs, const char *s,
1656 struct string_lexer slex;
1659 size_t n_matched = 0;
1660 bool all_matched = true;
1661 string_lexer_init (&slex, s, strlen (s), SEG_MODE_INTERACTIVE, true);
1662 while (string_lexer_next (&slex, &token))
1664 bool match = lex_tokens_match (lex_ofs_token (lexer, ofs + n_matched),
1666 token_uninit (&token);
1669 all_matched = false;
1675 *n_matchedp = n_matched;
1679 /* If LEXER is positioned at the sequence of tokens that may be parsed from S,
1680 returns true. Otherwise, returns false.
1682 S may consist of an arbitrary sequence of tokens, e.g. "KRUSKAL-WALLIS",
1683 "2SLS", or "END INPUT PROGRAM". Identifiers may be abbreviated to their
1684 first three letters. */
1686 lex_at_phrase (struct lexer *lexer, const char *s)
1688 return lex_ofs_at_phrase__ (lexer, lex_ofs (lexer), s, NULL);
1691 /* If LEXER is positioned at the sequence of tokens that may be parsed from S,
1692 skips it and returns true. Otherwise, returns false.
1694 S may consist of an arbitrary sequence of tokens, e.g. "KRUSKAL-WALLIS",
1695 "2SLS", or "END INPUT PROGRAM". Identifiers may be abbreviated to their
1696 first three letters. */
1698 lex_match_phrase (struct lexer *lexer, const char *s)
1701 if (!lex_ofs_at_phrase__ (lexer, lex_ofs (lexer), s, &n_matched))
1703 lex_get_n (lexer, n_matched);
1707 /* If LEXER is positioned at the sequence of tokens that may be parsed from S,
1708 skips it and returns true. Otherwise, issues an error and returns false.
1710 S may consist of an arbitrary sequence of tokens, e.g. "KRUSKAL-WALLIS",
1711 "2SLS", or "END INPUT PROGRAM". Identifiers may be abbreviated to their
1712 first three letters. */
1714 lex_force_match_phrase (struct lexer *lexer, const char *s)
1717 bool ok = lex_ofs_at_phrase__ (lexer, lex_ofs (lexer), s, &n_matched);
1719 lex_get_n (lexer, n_matched);
1721 lex_next_error (lexer, 0, n_matched, _("Syntax error expecting `%s'."), s);
1725 /* Returns the 1-based line number of the source text at the byte OFFSET in
1728 lex_source_ofs_to_line_number (const struct lex_source *src, size_t offset)
1731 size_t hi = src->n_lines;
1734 size_t mid = (lo + hi) / 2;
1735 if (mid + 1 >= src->n_lines)
1736 return src->n_lines;
1737 else if (offset >= src->lines[mid + 1])
1739 else if (offset < src->lines[mid])
1746 /* Returns the 1-based column number of the source text at the byte OFFSET in
1749 lex_source_ofs_to_column_number (const struct lex_source *src, size_t offset)
1751 const char *newline = memrchr (src->buffer, '\n', offset);
1752 size_t line_ofs = newline ? newline - src->buffer + 1 : 0;
1753 return utf8_count_columns (&src->buffer[line_ofs], offset - line_ofs) + 1;
1756 static struct msg_point
1757 lex_source_ofs_to_point__ (const struct lex_source *src, size_t offset)
1759 return (struct msg_point) {
1760 .line = lex_source_ofs_to_line_number (src, offset),
1761 .column = lex_source_ofs_to_column_number (src, offset),
1765 static struct msg_point
1766 lex_token_start_point (const struct lex_source *src,
1767 const struct lex_token *token)
1769 return lex_source_ofs_to_point__ (src, token->token_pos);
1772 static struct msg_point
1773 lex_token_end_point (const struct lex_source *src,
1774 const struct lex_token *token)
1776 return lex_source_ofs_to_point__ (src, lex_token_end (token));
1779 static struct msg_location
1780 lex_token_location (const struct lex_source *src,
1781 const struct lex_token *t0,
1782 const struct lex_token *t1)
1784 return (struct msg_location) {
1785 .file_name = intern_new_if_nonnull (src->reader->file_name),
1786 .start = lex_token_start_point (src, t0),
1787 .end = lex_token_end_point (src, t1),
1788 .src = CONST_CAST (struct lex_source *, src),
1792 static struct msg_location *
1793 lex_token_location_rw (const struct lex_source *src,
1794 const struct lex_token *t0,
1795 const struct lex_token *t1)
1797 struct msg_location location = lex_token_location (src, t0, t1);
1798 return msg_location_dup (&location);
1801 static struct msg_location *
1802 lex_source_get_location (const struct lex_source *src, int ofs0, int ofs1)
1804 return lex_token_location_rw (src,
1805 lex_source_ofs__ (src, ofs0),
1806 lex_source_ofs__ (src, ofs1));
1809 /* Returns the name of the syntax file from which the current command is drawn.
1810 Returns NULL for a T_STOP token or if the command's source does not have
1813 There is no version of this function that takes an N argument because
1814 lookahead only works to the end of a command and any given command is always
1815 within a single syntax file. */
1817 lex_get_file_name (const struct lexer *lexer)
1819 struct lex_source *src = lex_source__ (lexer);
1820 return src == NULL ? NULL : src->reader->file_name;
1823 /* Returns a newly allocated msg_location for the syntax that represents tokens
1824 with 0-based offsets N0...N1, inclusive, from the current token. The caller
1825 must eventually free the location (with msg_location_destroy()). */
1826 struct msg_location *
1827 lex_get_location (const struct lexer *lexer, int n0, int n1)
1829 struct msg_location *loc = xmalloc (sizeof *loc);
1830 *loc = (struct msg_location) {
1831 .file_name = intern_new_if_nonnull (lex_get_file_name (lexer)),
1832 .start = lex_ofs_start_point (lexer, n0 + lex_ofs (lexer)),
1833 .end = lex_ofs_end_point (lexer, n1 + lex_ofs (lexer)),
1834 .src = lex_source__ (lexer),
1836 lex_source_ref (loc->src);
1841 lex_get_encoding (const struct lexer *lexer)
1843 struct lex_source *src = lex_source__ (lexer);
1844 return src == NULL ? NULL : src->reader->encoding;
1847 /* Returns the syntax mode for the syntax file from which the current drawn is
1848 drawn. Returns SEG_MODE_AUTO for a T_STOP token or if the command's source
1849 does not have line numbers.
1851 There is no version of this function that takes an N argument because
1852 lookahead only works to the end of a command and any given command is always
1853 within a single syntax file. */
1855 lex_get_syntax_mode (const struct lexer *lexer)
1857 struct lex_source *src = lex_source__ (lexer);
1858 return src == NULL ? SEG_MODE_AUTO : src->reader->syntax;
1861 /* Returns the error mode for the syntax file from which the current drawn is
1862 drawn. Returns LEX_ERROR_TERMINAL for a T_STOP token or if the command's
1863 source does not have line numbers.
1865 There is no version of this function that takes an N argument because
1866 lookahead only works to the end of a command and any given command is always
1867 within a single syntax file. */
1869 lex_get_error_mode (const struct lexer *lexer)
1871 struct lex_source *src = lex_source__ (lexer);
1872 return src == NULL ? LEX_ERROR_TERMINAL : src->reader->error;
1875 /* If the source that LEXER is currently reading has error mode
1876 LEX_ERROR_TERMINAL, discards all buffered input and tokens, so that the next
1877 token to be read comes directly from whatever is next read from the stream.
1879 It makes sense to call this function after encountering an error in a
1880 command entered on the console, because usually the user would prefer not to
1881 have cascading errors. */
1883 lex_interactive_reset (struct lexer *lexer)
1885 struct lex_source *src = lex_source__ (lexer);
1886 if (src != NULL && src->reader->error == LEX_ERROR_TERMINAL)
1889 src->journal_pos = src->seg_pos = 0;
1891 src->suppress_next_newline = false;
1892 src->segmenter = segmenter_init (segmenter_get_mode (&src->segmenter),
1894 lex_stage_clear (&src->pp);
1895 lex_stage_clear (&src->merge);
1896 lex_source_clear_parse (src);
1897 lex_source_push_endcmd__ (src);
1901 /* Advances past any tokens in LEXER up to a T_ENDCMD or T_STOP. */
1903 lex_discard_rest_of_command (struct lexer *lexer)
1905 while (lex_token (lexer) != T_STOP && lex_token (lexer) != T_ENDCMD)
1909 /* Discards all lookahead tokens in LEXER, then discards all input sources
1910 until it encounters one with error mode LEX_ERROR_TERMINAL or until it
1911 runs out of input sources. */
1913 lex_discard_noninteractive (struct lexer *lexer)
1915 struct lex_source *src = lex_source__ (lexer);
1918 if (src->reader->error == LEX_ERROR_IGNORE)
1921 lex_stage_clear (&src->pp);
1922 lex_stage_clear (&src->merge);
1923 lex_source_clear_parse (src);
1925 for (; src != NULL && src->reader->error != LEX_ERROR_TERMINAL;
1926 src = lex_source__ (lexer))
1928 ll_remove (&src->ll);
1929 lex_source_unref (src);
1935 lex_source_expand__ (struct lex_source *src)
1937 if (src->length >= src->allocated)
1938 src->buffer = x2realloc (src->buffer, &src->allocated);
1942 lex_source_read__ (struct lex_source *src)
1946 lex_source_expand__ (src);
1948 size_t space = src->allocated - src->length;
1949 enum prompt_style prompt = segmenter_get_prompt (&src->segmenter);
1950 size_t n = src->reader->class->read (src->reader,
1951 &src->buffer[src->length],
1953 assert (n <= space);
1958 src->reader->eof = true;
1964 while (!memchr (&src->buffer[src->seg_pos], '\n',
1965 src->length - src->seg_pos));
1968 static struct lex_source *
1969 lex_source__ (const struct lexer *lexer)
1971 return (ll_is_empty (&lexer->sources) ? NULL
1972 : ll_data (ll_head (&lexer->sources), struct lex_source, ll));
1975 const struct lex_source *
1976 lex_source (const struct lexer *lexer)
1978 return lex_source__ (lexer);
1981 /* Returns the text of the syntax in SRC for tokens with offsets OFS0 through
1982 OFS1 in the current command, inclusive. (For example, if OFS0 and OFS1 are
1983 both zero, this requests the syntax for the first token in the current
1984 command.) The caller must eventually free the returned string (with
1985 free()). The syntax is encoded in UTF-8 and in the original form supplied
1986 to the lexer so that, for example, it may include comments, spaces, and
1987 new-lines if it spans multiple tokens. Macro expansion, however, has
1988 already been performed. */
1990 lex_source_syntax__ (const struct lex_source *src, int ofs0, int ofs1)
1992 struct string s = DS_EMPTY_INITIALIZER;
1993 for (size_t i = ofs0; i <= ofs1; )
1995 /* Find [I,J) as the longest sequence of tokens not produced by macro
1996 expansion, or otherwise the longest sequence expanded from a single
1998 const struct lex_token *first = lex_source_ofs__ (src, i);
2000 for (j = i + 1; j <= ofs1; j++)
2002 const struct lex_token *cur = lex_source_ofs__ (src, j);
2003 if ((first->macro_rep != NULL) != (cur->macro_rep != NULL)
2004 || first->macro_rep != cur->macro_rep)
2007 const struct lex_token *last = lex_source_ofs__ (src, j - 1);
2009 /* Now add the syntax for this sequence of tokens to SRC. */
2010 if (!ds_is_empty (&s))
2011 ds_put_byte (&s, ' ');
2012 if (!first->macro_rep)
2014 size_t start = first->token_pos;
2015 size_t end = last->token_pos + last->token_len;
2016 ds_put_substring (&s, ss_buffer (&src->buffer[start], end - start));
2020 size_t start = first->ofs;
2021 size_t end = last->ofs + last->len;
2022 ds_put_substring (&s, ss_buffer (first->macro_rep + start,
2028 return ds_steal_cstr (&s);
2032 lex_source_contains_macro_call (struct lex_source *src, int ofs0, int ofs1)
2034 for (int i = ofs0; i <= ofs1; i++)
2035 if (lex_source_ofs__ (src, i)->macro_rep)
2040 /* If tokens N0...N1 (inclusive) in SRC contains a macro call, this returns the
2041 raw UTF-8 syntax for the macro call (not for the expansion) and for any
2042 other tokens included in that range. The syntax is encoded in UTF-8 and in
2043 the original form supplied to the lexer so that, for example, it may include
2044 comments, spaces, and new-lines if it spans multiple tokens.
2046 Returns an empty string if the token range doesn't include a macro call.
2048 The caller must not modify or free the returned string. */
2049 static struct substring
2050 lex_source_get_macro_call (struct lex_source *src, int ofs0, int ofs1)
2052 if (!lex_source_contains_macro_call (src, ofs0, ofs1))
2055 const struct lex_token *token0 = lex_source_ofs__ (src, ofs0);
2056 const struct lex_token *token1 = lex_source_ofs__ (src, MAX (ofs0, ofs1));
2057 size_t start = token0->token_pos;
2058 size_t end = token1->token_pos + token1->token_len;
2060 return ss_buffer (&src->buffer[start], end - start);
2064 lex_source_msg_valist (struct lex_source *src, enum msg_class class,
2065 int ofs0, int ofs1, const char *format, va_list args)
2067 struct string s = DS_EMPTY_INITIALIZER;
2071 /* Get the macro call(s) that expanded to the syntax that caused the
2074 str_ellipsize (lex_source_get_macro_call (src, ofs0, ofs1),
2077 ds_put_format (&s, _("In syntax expanded from `%s'"), call);
2080 ds_put_cstr (&s, _("At end of input"));
2082 if (!ds_is_empty (&s))
2083 ds_put_cstr (&s, ": ");
2085 ds_put_vformat (&s, format, args);
2087 ds_put_cstr (&s, _("Syntax error."));
2089 if (ds_last (&s) != '.')
2090 ds_put_byte (&s, '.');
2092 struct msg *m = xmalloc (sizeof *m);
2094 .category = msg_class_to_category (class),
2095 .severity = msg_class_to_severity (class),
2096 .location = src ? lex_source_get_location (src, ofs0, ofs1) : NULL,
2097 .text = ds_steal_cstr (&s),
2103 lex_get_error (struct lex_source *src, const struct lex_token *token)
2105 struct msg *m = xmalloc (sizeof *m);
2107 .category = MSG_C_SYNTAX,
2108 .severity = MSG_S_ERROR,
2109 .location = lex_token_location_rw (src, token, token),
2110 .text = ss_xstrdup (token->token.string),
2113 struct lexer *lexer = src->lexer;
2114 if (lexer->n_messages >= lexer->allocated_messages)
2115 lexer->messages = x2nrealloc (lexer->messages, &lexer->allocated_messages,
2116 sizeof *lexer->messages);
2117 lexer->messages[lexer->n_messages++] = m;
2120 /* Attempts to append an additional token to 'pp' in SRC, reading more from the
2121 underlying lex_reader if necessary. Returns true if a new token was added
2122 to SRC's deque, false otherwise. The caller should retry failures unless
2123 SRC's 'eof' marker was set to true indicating that there will be no more
2124 tokens from this source. */
2126 lex_source_try_get_pp (struct lex_source *src)
2128 /* Append a new token to SRC and initialize it. */
2129 struct lex_token *token = xmalloc (sizeof *token);
2130 token->token = (struct token) { .type = T_STOP };
2131 token->macro_rep = NULL;
2132 token->ref_cnt = NULL;
2133 token->token_pos = src->seg_pos;
2135 /* Extract a segment. */
2136 const char *segment;
2137 enum segment_type seg_type;
2141 segment = &src->buffer[src->seg_pos];
2142 seg_len = segmenter_push (&src->segmenter, segment,
2143 src->length - src->seg_pos,
2144 src->reader->eof, &seg_type);
2148 /* The segmenter needs more input to produce a segment. */
2149 assert (!src->reader->eof);
2150 lex_source_read__ (src);
2153 /* Update state based on the segment. */
2154 token->token_len = seg_len;
2155 src->seg_pos += seg_len;
2156 if (seg_type == SEG_NEWLINE)
2158 if (src->n_lines >= src->allocated_lines)
2159 src->lines = x2nrealloc (src->lines, &src->allocated_lines,
2160 sizeof *src->lines);
2161 src->lines[src->n_lines++] = src->seg_pos;
2164 /* Get a token from the segment. */
2165 enum tokenize_result result = token_from_segment (
2166 seg_type, ss_buffer (segment, seg_len), &token->token);
2168 /* If we've reached the end of a line, or the end of a command, then pass
2169 the line to the output engine as a syntax text item. */
2170 int n_lines = seg_type == SEG_NEWLINE;
2171 if (seg_type == SEG_END_COMMAND && !src->suppress_next_newline)
2174 src->suppress_next_newline = true;
2176 else if (n_lines > 0 && src->suppress_next_newline)
2179 src->suppress_next_newline = false;
2181 for (int i = 0; i < n_lines; i++)
2183 /* Beginning of line. */
2184 const char *line = &src->buffer[src->journal_pos];
2186 /* Calculate line length, including \n or \r\n end-of-line if present.
2188 We use src->length even though that may be beyond what we've actually
2189 converted to tokens. That's because, if we're emitting the line due
2190 to SEG_END_COMMAND, we want to take the whole line through the
2191 newline, not just through the '.'. */
2192 size_t max_len = src->length - src->journal_pos;
2193 const char *newline = memchr (line, '\n', max_len);
2194 size_t line_len = newline ? newline - line + 1 : max_len;
2196 /* Calculate line length excluding end-of-line. */
2197 size_t copy_len = line_len;
2198 if (copy_len > 0 && line[copy_len - 1] == '\n')
2200 if (copy_len > 0 && line[copy_len - 1] == '\r')
2203 /* Submit the line as syntax. */
2204 output_item_submit (text_item_create_nocopy (TEXT_ITEM_SYNTAX,
2205 xmemdup0 (line, copy_len),
2208 src->journal_pos += line_len;
2213 case TOKENIZE_ERROR:
2214 lex_get_error (src, token);
2216 case TOKENIZE_EMPTY:
2217 lex_token_destroy (token);
2220 case TOKENIZE_TOKEN:
2221 if (token->token.type == T_STOP)
2223 token->token.type = T_ENDCMD;
2226 lex_stage_push_last (&src->pp, token);
2232 /* Attempts to append a new token to SRC. Returns true if successful, false on
2233 failure. On failure, the end of SRC has been reached and no more tokens
2234 will be forthcoming from it.
2236 Does not make the new token available for lookahead yet; the caller must
2237 adjust SRC's 'middle' pointer to do so. */
2239 lex_source_get_pp (struct lex_source *src)
2242 if (lex_source_try_get_pp (src))
2248 lex_source_try_get_merge (const struct lex_source *src_)
2250 struct lex_source *src = CONST_CAST (struct lex_source *, src_);
2252 if (lex_stage_is_empty (&src->pp) && !lex_source_get_pp (src))
2255 if (!settings_get_mexpand ())
2257 lex_stage_shift (&src->merge, &src->pp, lex_stage_count (&src->pp));
2261 /* Now pass tokens one-by-one to the macro expander.
2263 In the common case where there is no macro to expand, the loop is not
2265 struct macro_call *mc;
2266 int n_call = macro_call_create (src->lexer->macros,
2267 &lex_stage_first (&src->pp)->token, &mc);
2268 for (int ofs = 1; !n_call; ofs++)
2270 if (lex_stage_count (&src->pp) <= ofs && !lex_source_get_pp (src))
2272 /* This should not be reachable because we always get a T_ENDCMD at
2273 the end of an input file (transformed from T_STOP by
2274 lex_source_try_get_pp()) and the macro_expander should always
2275 terminate expansion on T_ENDCMD. */
2279 const struct lex_token *t = lex_stage_nth (&src->pp, ofs);
2280 const struct macro_token mt = {
2282 .syntax = ss_buffer (&src->buffer[t->token_pos], t->token_len),
2284 const struct msg_location loc = lex_token_location (src, t, t);
2285 n_call = macro_call_add (mc, &mt, &loc);
2289 /* False alarm: no macro expansion after all. Use first token as
2290 lookahead. We'll retry macro expansion from the second token next
2292 macro_call_destroy (mc);
2293 lex_stage_shift (&src->merge, &src->pp, 1);
2297 /* The first 'n_call' tokens in 'pp', which we bracket as C0...C1, inclusive,
2298 are a macro call. (These are likely to be the only tokens in 'pp'.)
2300 const struct lex_token *c0 = lex_stage_first (&src->pp);
2301 const struct lex_token *c1 = lex_stage_nth (&src->pp, n_call - 1);
2302 struct macro_tokens expansion = { .n = 0 };
2303 struct msg_location loc = lex_token_location (src, c0, c1);
2304 macro_call_expand (mc, src->reader->syntax, &loc, &expansion);
2305 macro_call_destroy (mc);
2307 /* Convert the macro expansion into syntax for possible error messages
2309 size_t *ofs = xnmalloc (expansion.n, sizeof *ofs);
2310 size_t *len = xnmalloc (expansion.n, sizeof *len);
2311 struct string s = DS_EMPTY_INITIALIZER;
2312 macro_tokens_to_syntax (&expansion, &s, ofs, len);
2314 if (settings_get_mprint ())
2315 output_item_submit (text_item_create (TEXT_ITEM_LOG, ds_cstr (&s),
2316 _("Macro Expansion")));
2318 /* Append the macro expansion tokens to the lookahead. */
2319 if (expansion.n > 0)
2321 char *macro_rep = ds_steal_cstr (&s);
2322 size_t *ref_cnt = xmalloc (sizeof *ref_cnt);
2323 *ref_cnt = expansion.n;
2324 for (size_t i = 0; i < expansion.n; i++)
2326 struct lex_token *token = xmalloc (sizeof *token);
2327 *token = (struct lex_token) {
2328 .token = expansion.mts[i].token,
2329 .token_pos = c0->token_pos,
2330 .token_len = (c1->token_pos + c1->token_len) - c0->token_pos,
2331 .macro_rep = macro_rep,
2336 lex_stage_push_last (&src->merge, token);
2338 ss_dealloc (&expansion.mts[i].syntax);
2343 free (expansion.mts);
2347 /* Destroy the tokens for the call. */
2348 for (size_t i = 0; i < n_call; i++)
2349 lex_stage_pop_first (&src->pp);
2351 return expansion.n > 0;
2354 /* Attempts to obtain at least one new token into 'merge' in SRC.
2356 Returns true if successful, false on failure. In the latter case, SRC is
2357 exhausted and 'src->eof' is now true. */
2359 lex_source_get_merge (struct lex_source *src)
2362 if (lex_source_try_get_merge (src))
2368 lex_source_get_parse__ (struct lex_source *src)
2370 struct merger m = MERGER_INIT;
2372 for (size_t i = 0; ; i++)
2374 while (lex_stage_count (&src->merge) <= i && !lex_source_get_merge (src))
2376 /* We always get a T_ENDCMD at the end of an input file
2377 (transformed from T_STOP by lex_source_try_get_pp()) and
2378 merger_add() should never return -1 on T_ENDCMD. */
2379 assert (lex_stage_is_empty (&src->merge));
2383 int retval = merger_add (&m, &lex_stage_nth (&src->merge, i)->token,
2387 lex_source_push_parse (src, lex_stage_take_first (&src->merge));
2390 else if (retval > 0)
2392 /* Add a token that merges all the tokens together. */
2393 const struct lex_token *first = lex_stage_first (&src->merge);
2394 const struct lex_token *last = lex_stage_nth (&src->merge,
2396 bool macro = first->macro_rep && first->macro_rep == last->macro_rep;
2397 struct lex_token *t = xmalloc (sizeof *t);
2398 *t = (struct lex_token) {
2400 .token_pos = first->token_pos,
2401 .token_len = (last->token_pos - first->token_pos) + last->token_len,
2403 /* This works well if all the tokens were not expanded from macros,
2404 or if they came from the same macro expansion. It just gives up
2405 in the other (corner) cases. */
2406 .macro_rep = macro ? first->macro_rep : NULL,
2407 .ofs = macro ? first->ofs : 0,
2408 .len = macro ? (last->ofs - first->ofs) + last->len : 0,
2409 .ref_cnt = macro ? first->ref_cnt : NULL,
2413 lex_source_push_parse (src, t);
2415 for (int i = 0; i < retval; i++)
2416 lex_stage_pop_first (&src->merge);
2422 /* Attempts to obtain at least one new token into 'lookahead' in SRC.
2424 Returns true if successful, false on failure. In the latter case, SRC is
2425 exhausted and 'src->eof' is now true. */
2427 lex_source_get_parse (struct lex_source *src)
2429 bool ok = lex_source_get_parse__ (src);
2430 struct lexer *lexer = src->lexer;
2431 if (lexer->n_messages)
2433 struct msg **messages = lexer->messages;
2434 size_t n = lexer->n_messages;
2436 lexer->messages = NULL;
2437 lexer->n_messages = lexer->allocated_messages = 0;
2439 for (size_t i = 0; i < n; i++)
2440 msg_emit (messages[i]);
2447 lex_source_push_endcmd__ (struct lex_source *src)
2449 assert (src->n_parse == 0);
2451 struct lex_token *token = xmalloc (sizeof *token);
2452 *token = (struct lex_token) { .token = { .type = T_ENDCMD } };
2453 lex_source_push_parse (src, token);
2457 lex_source_push_parse (struct lex_source *src, struct lex_token *token)
2459 if (src->n_parse >= src->allocated_parse)
2460 src->parse = x2nrealloc (src->parse, &src->allocated_parse,
2461 sizeof *src->parse);
2462 src->parse[src->n_parse++] = token;
2466 lex_source_clear_parse (struct lex_source *src)
2468 for (size_t i = 0; i < src->n_parse; i++)
2469 lex_token_destroy (src->parse[i]);
2470 src->n_parse = src->parse_ofs = 0;
2473 static struct lex_source *
2474 lex_source_create (struct lexer *lexer, struct lex_reader *reader)
2476 size_t allocated_lines = 4;
2477 size_t *lines = xmalloc (allocated_lines * sizeof *lines);
2480 struct lex_source *src = xmalloc (sizeof *src);
2481 *src = (struct lex_source) {
2484 .segmenter = segmenter_init (reader->syntax, false),
2488 .allocated_lines = allocated_lines,
2491 lex_source_push_endcmd__ (src);
2497 lex_set_message_handler (struct lexer *lexer,
2498 void (*output_msg) (const struct msg *,
2501 struct msg_handler msg_handler = {
2502 .output_msg = (void (*)(const struct msg *, void *)) output_msg,
2504 .lex_source_ref = lex_source_ref,
2505 .lex_source_unref = lex_source_unref,
2506 .lex_source_get_line = lex_source_get_line,
2508 msg_set_handler (&msg_handler);
2512 lex_source_ref (const struct lex_source *src_)
2514 struct lex_source *src = CONST_CAST (struct lex_source *, src_);
2517 assert (src->n_refs > 0);
2524 lex_source_unref (struct lex_source *src)
2529 assert (src->n_refs > 0);
2530 if (--src->n_refs > 0)
2533 char *file_name = src->reader->file_name;
2534 char *encoding = src->reader->encoding;
2535 if (src->reader->class->destroy != NULL)
2536 src->reader->class->destroy (src->reader);
2541 lex_stage_uninit (&src->pp);
2542 lex_stage_uninit (&src->merge);
2543 lex_source_clear_parse (src);
2548 struct lex_file_reader
2550 struct lex_reader reader;
2551 struct u8_istream *istream;
2554 static struct lex_reader_class lex_file_reader_class;
2556 /* Creates and returns a new lex_reader that will read from file FILE_NAME (or
2557 from stdin if FILE_NAME is "-"). The file is expected to be encoded with
2558 ENCODING, which should take one of the forms accepted by
2559 u8_istream_for_file(). SYNTAX and ERROR become the syntax mode and error
2560 mode of the new reader, respectively.
2562 Returns a null pointer if FILE_NAME cannot be opened. */
2564 lex_reader_for_file (const char *file_name, const char *encoding,
2565 enum segmenter_mode syntax,
2566 enum lex_error_mode error)
2568 struct lex_file_reader *r;
2569 struct u8_istream *istream;
2571 istream = (!strcmp(file_name, "-")
2572 ? u8_istream_for_fd (encoding, STDIN_FILENO)
2573 : u8_istream_for_file (encoding, file_name, O_RDONLY));
2574 if (istream == NULL)
2576 msg (ME, _("Opening `%s': %s."), file_name, strerror (errno));
2580 r = xmalloc (sizeof *r);
2581 lex_reader_init (&r->reader, &lex_file_reader_class);
2582 r->reader.syntax = syntax;
2583 r->reader.error = error;
2584 r->reader.file_name = xstrdup (file_name);
2585 r->reader.encoding = xstrdup_if_nonnull (encoding);
2586 r->reader.line_number = 1;
2587 r->istream = istream;
2592 static struct lex_file_reader *
2593 lex_file_reader_cast (struct lex_reader *r)
2595 return UP_CAST (r, struct lex_file_reader, reader);
2599 lex_file_read (struct lex_reader *r_, char *buf, size_t n,
2600 enum prompt_style prompt_style UNUSED)
2602 struct lex_file_reader *r = lex_file_reader_cast (r_);
2603 ssize_t n_read = u8_istream_read (r->istream, buf, n);
2606 msg (ME, _("Error reading `%s': %s."), r_->file_name, strerror (errno));
2613 lex_file_close (struct lex_reader *r_)
2615 struct lex_file_reader *r = lex_file_reader_cast (r_);
2617 if (u8_istream_fileno (r->istream) != STDIN_FILENO)
2619 if (u8_istream_close (r->istream) != 0)
2620 msg (ME, _("Error closing `%s': %s."), r_->file_name, strerror (errno));
2623 u8_istream_free (r->istream);
2628 static struct lex_reader_class lex_file_reader_class =
2634 struct lex_string_reader
2636 struct lex_reader reader;
2641 static struct lex_reader_class lex_string_reader_class;
2643 /* Creates and returns a new lex_reader for the contents of S, which must be
2644 encoded in the given ENCODING. The new reader takes ownership of S and will free it
2645 with ss_dealloc() when it is closed. */
2647 lex_reader_for_substring_nocopy (struct substring s, const char *encoding)
2649 struct lex_string_reader *r;
2651 r = xmalloc (sizeof *r);
2652 lex_reader_init (&r->reader, &lex_string_reader_class);
2653 r->reader.syntax = SEG_MODE_AUTO;
2654 r->reader.encoding = xstrdup_if_nonnull (encoding);
2661 /* Creates and returns a new lex_reader for a copy of null-terminated string S,
2662 which must be encoded in ENCODING. The caller retains ownership of S. */
2664 lex_reader_for_string (const char *s, const char *encoding)
2666 return lex_reader_for_substring_nocopy (ss_clone (ss_cstr (s)), encoding);
2669 /* Formats FORMAT as a printf()-like format string and creates and returns a
2670 new lex_reader for the formatted result. */
2672 lex_reader_for_format (const char *format, const char *encoding, ...)
2674 struct lex_reader *r;
2677 va_start (args, encoding);
2678 r = lex_reader_for_substring_nocopy (ss_cstr (xvasprintf (format, args)), encoding);
2684 static struct lex_string_reader *
2685 lex_string_reader_cast (struct lex_reader *r)
2687 return UP_CAST (r, struct lex_string_reader, reader);
2691 lex_string_read (struct lex_reader *r_, char *buf, size_t n,
2692 enum prompt_style prompt_style UNUSED)
2694 struct lex_string_reader *r = lex_string_reader_cast (r_);
2697 chunk = MIN (n, r->s.length - r->offset);
2698 memcpy (buf, r->s.string + r->offset, chunk);
2705 lex_string_close (struct lex_reader *r_)
2707 struct lex_string_reader *r = lex_string_reader_cast (r_);
2713 static struct lex_reader_class lex_string_reader_class =
2720 lex_source_get_line (const struct lex_source *src, int line)
2722 if (line < 1 || line > src->n_lines)
2725 size_t ofs = src->lines[line - 1];
2727 if (line < src->n_lines)
2728 end = src->lines[line];
2731 const char *newline = memchr (src->buffer + ofs, '\n', src->length - ofs);
2732 end = newline ? newline - src->buffer : src->length;
2734 return ss_buffer (&src->buffer[ofs], end - ofs);