1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2013, 2016 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/lexer/lexer.h"
33 #include "language/command.h"
34 #include "language/lexer/macro.h"
35 #include "language/lexer/scan.h"
36 #include "language/lexer/segment.h"
37 #include "language/lexer/token.h"
38 #include "libpspp/assertion.h"
39 #include "libpspp/cast.h"
40 #include "libpspp/deque.h"
41 #include "libpspp/i18n.h"
42 #include "libpspp/ll.h"
43 #include "libpspp/message.h"
44 #include "libpspp/misc.h"
45 #include "libpspp/str.h"
46 #include "libpspp/u8-istream.h"
47 #include "output/journal.h"
48 #include "output/output-item.h"
50 #include "gl/c-ctype.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
53 #include "gl/xmemdup0.h"
56 #define _(msgid) gettext (msgid)
57 #define N_(msgid) msgid
59 /* A token within a lex_source. */
62 /* The regular token information. */
65 /* Location of token in terms of the lex_source's buffer.
66 src->tail <= line_pos <= token_pos <= src->head. */
67 size_t token_pos; /* Start of token. */
68 size_t token_len; /* Length of source for token in bytes. */
69 size_t line_pos; /* Start of line containing token_pos. */
70 int first_line; /* Line number at token_pos. */
74 /* A source of tokens, corresponding to a syntax file.
76 This is conceptually a lex_reader wrapped with everything needed to convert
77 its UTF-8 bytes into tokens. */
80 struct ll ll; /* In lexer's list of sources. */
81 struct lex_reader *reader;
83 struct segmenter segmenter;
84 bool eof; /* True if T_STOP was read from 'reader'. */
86 /* Buffer of UTF-8 bytes. */
88 size_t allocated; /* Number of bytes allocated. */
89 size_t tail; /* &buffer[0] offset into UTF-8 source. */
90 size_t head; /* &buffer[head - tail] offset into source. */
92 /* Positions in source file, tail <= pos <= head for each member here. */
93 size_t journal_pos; /* First byte not yet output to journal. */
94 size_t seg_pos; /* First byte not yet scanned as token. */
95 size_t line_pos; /* First byte of line containing seg_pos. */
97 int n_newlines; /* Number of new-lines up to seg_pos. */
98 bool suppress_next_newline;
101 struct deque deque; /* Indexes into 'tokens'. */
102 struct lex_token *tokens; /* Lookahead tokens for parser. */
105 static struct lex_source *lex_source_create (struct lexer *,
106 struct lex_reader *);
107 static void lex_source_destroy (struct lex_source *);
112 struct ll_list sources; /* Contains "struct lex_source"s. */
113 struct macro_set *macros;
116 static struct lex_source *lex_source__ (const struct lexer *);
117 static struct substring lex_source_get_syntax__ (const struct lex_source *,
119 static const struct lex_token *lex_next__ (const struct lexer *, int n);
120 static void lex_source_push_endcmd__ (struct lex_source *);
122 static void lex_source_pop__ (struct lex_source *);
123 static bool lex_source_get (const struct lex_source *);
124 static void lex_source_error_valist (struct lex_source *, int n0, int n1,
125 const char *format, va_list)
126 PRINTF_FORMAT (4, 0);
127 static const struct lex_token *lex_source_next__ (const struct lex_source *,
130 /* Initializes READER with the specified CLASS and otherwise some reasonable
131 defaults. The caller should fill in the others members as desired. */
133 lex_reader_init (struct lex_reader *reader,
134 const struct lex_reader_class *class)
136 reader->class = class;
137 reader->syntax = SEG_MODE_AUTO;
138 reader->error = LEX_ERROR_CONTINUE;
139 reader->file_name = NULL;
140 reader->encoding = NULL;
141 reader->line_number = 0;
145 /* Frees any file name already in READER and replaces it by a copy of
146 FILE_NAME, or if FILE_NAME is null then clears any existing name. */
148 lex_reader_set_file_name (struct lex_reader *reader, const char *file_name)
150 free (reader->file_name);
151 reader->file_name = xstrdup_if_nonnull (file_name);
154 /* Creates and returns a new lexer. */
158 struct lexer *lexer = xmalloc (sizeof *lexer);
159 *lexer = (struct lexer) {
160 .sources = LL_INITIALIZER (lexer->sources),
161 .macros = macro_set_create (),
166 /* Destroys LEXER. */
168 lex_destroy (struct lexer *lexer)
172 struct lex_source *source, *next;
174 ll_for_each_safe (source, next, struct lex_source, ll, &lexer->sources)
175 lex_source_destroy (source);
176 macro_set_destroy (lexer->macros);
181 /* Adds M to LEXER's set of macros. M replaces any existing macro with the
182 same name. Takes ownership of M. */
184 lex_define_macro (struct lexer *lexer, struct macro *m)
186 macro_set_add (lexer->macros, m);
189 /* Inserts READER into LEXER so that the next token read by LEXER comes from
190 READER. Before the caller, LEXER must either be empty or at a T_ENDCMD
193 lex_include (struct lexer *lexer, struct lex_reader *reader)
195 assert (ll_is_empty (&lexer->sources) || lex_token (lexer) == T_ENDCMD);
196 ll_push_head (&lexer->sources, &lex_source_create (lexer, reader)->ll);
199 /* Appends READER to LEXER, so that it will be read after all other current
200 readers have already been read. */
202 lex_append (struct lexer *lexer, struct lex_reader *reader)
204 ll_push_tail (&lexer->sources, &lex_source_create (lexer, reader)->ll);
209 static struct lex_token *
210 lex_push_token__ (struct lex_source *src)
212 struct lex_token *token;
214 if (deque_is_full (&src->deque))
215 src->tokens = deque_expand (&src->deque, src->tokens, sizeof *src->tokens);
217 token = &src->tokens[deque_push_front (&src->deque)];
218 token->token = (struct token) { .type = T_STOP };
223 lex_source_pop__ (struct lex_source *src)
225 token_uninit (&src->tokens[deque_pop_back (&src->deque)].token);
229 lex_source_pop_front (struct lex_source *src)
231 token_uninit (&src->tokens[deque_pop_front (&src->deque)].token);
234 /* Advances LEXER to the next token, consuming the current token. */
236 lex_get (struct lexer *lexer)
238 struct lex_source *src;
240 src = lex_source__ (lexer);
244 if (!deque_is_empty (&src->deque))
245 lex_source_pop__ (src);
247 while (deque_is_empty (&src->deque))
248 if (!lex_source_get (src))
250 lex_source_destroy (src);
251 src = lex_source__ (lexer);
257 /* Issuing errors. */
259 /* Prints a syntax error message containing the current token and
260 given message MESSAGE (if non-null). */
262 lex_error (struct lexer *lexer, const char *format, ...)
266 va_start (args, format);
267 lex_next_error_valist (lexer, 0, 0, format, args);
271 /* Prints a syntax error message containing the current token and
272 given message MESSAGE (if non-null). */
274 lex_error_valist (struct lexer *lexer, const char *format, va_list args)
276 lex_next_error_valist (lexer, 0, 0, format, args);
279 /* Prints a syntax error message containing the current token and
280 given message MESSAGE (if non-null). */
282 lex_next_error (struct lexer *lexer, int n0, int n1, const char *format, ...)
286 va_start (args, format);
287 lex_next_error_valist (lexer, n0, n1, format, args);
291 /* Prints a syntax error message saying that one of the strings provided as
292 varargs, up to the first NULL, is expected. */
294 (lex_error_expecting) (struct lexer *lexer, ...)
298 va_start (args, lexer);
299 lex_error_expecting_valist (lexer, args);
303 /* Prints a syntax error message saying that one of the options provided in
304 ARGS, up to the first NULL, is expected. */
306 lex_error_expecting_valist (struct lexer *lexer, va_list args)
308 enum { MAX_OPTIONS = 9 };
309 const char *options[MAX_OPTIONS];
311 while (n < MAX_OPTIONS)
313 const char *option = va_arg (args, const char *);
317 options[n++] = option;
319 lex_error_expecting_array (lexer, options, n);
323 lex_error_expecting_array (struct lexer *lexer, const char **options, size_t n)
328 lex_error (lexer, NULL);
332 lex_error (lexer, _("expecting %s"), options[0]);
336 lex_error (lexer, _("expecting %s or %s"), options[0], options[1]);
340 lex_error (lexer, _("expecting %s, %s, or %s"), options[0], options[1],
345 lex_error (lexer, _("expecting %s, %s, %s, or %s"),
346 options[0], options[1], options[2], options[3]);
350 lex_error (lexer, _("expecting %s, %s, %s, %s, or %s"),
351 options[0], options[1], options[2], options[3], options[4]);
355 lex_error (lexer, _("expecting %s, %s, %s, %s, %s, or %s"),
356 options[0], options[1], options[2], options[3], options[4],
361 lex_error (lexer, _("expecting %s, %s, %s, %s, %s, %s, or %s"),
362 options[0], options[1], options[2], options[3], options[4],
363 options[5], options[6]);
367 lex_error (lexer, _("expecting %s, %s, %s, %s, %s, %s, %s, or %s"),
368 options[0], options[1], options[2], options[3], options[4],
369 options[5], options[6], options[7]);
373 lex_error (lexer, NULL);
377 /* Reports an error to the effect that subcommand SBC may only be specified
380 This function does not take a lexer as an argument or use lex_error(),
381 because the result would ordinarily just be redundant: "Syntax error at
382 SUBCOMMAND: Subcommand SUBCOMMAND may only be specified once.", which does
383 not help the user find the error. */
385 lex_sbc_only_once (const char *sbc)
387 msg (SE, _("Subcommand %s may only be specified once."), sbc);
390 /* Reports an error to the effect that subcommand SBC is missing.
392 This function does not take a lexer as an argument or use lex_error(),
393 because a missing subcommand can normally be detected only after the whole
394 command has been parsed, and so lex_error() would always report "Syntax
395 error at end of command", which does not help the user find the error. */
397 lex_sbc_missing (const char *sbc)
399 msg (SE, _("Required subcommand %s was not specified."), sbc);
402 /* Reports an error to the effect that specification SPEC may only be specified
403 once within subcommand SBC. */
405 lex_spec_only_once (struct lexer *lexer, const char *sbc, const char *spec)
407 lex_error (lexer, _("%s may only be specified once within subcommand %s"),
411 /* Reports an error to the effect that specification SPEC is missing within
414 lex_spec_missing (struct lexer *lexer, const char *sbc, const char *spec)
416 lex_error (lexer, _("Required %s specification missing from %s subcommand"),
420 /* Prints a syntax error message containing the current token and
421 given message MESSAGE (if non-null). */
423 lex_next_error_valist (struct lexer *lexer, int n0, int n1,
424 const char *format, va_list args)
426 struct lex_source *src = lex_source__ (lexer);
429 lex_source_error_valist (src, n0, n1, format, args);
435 ds_put_format (&s, _("Syntax error at end of input"));
438 ds_put_cstr (&s, ": ");
439 ds_put_vformat (&s, format, args);
441 ds_put_byte (&s, '.');
442 msg (SE, "%s", ds_cstr (&s));
447 /* Checks that we're at end of command.
448 If so, returns a successful command completion code.
449 If not, flags a syntax error and returns an error command
452 lex_end_of_command (struct lexer *lexer)
454 if (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_STOP)
456 lex_error (lexer, _("expecting end of command"));
463 /* Token testing functions. */
465 /* Returns true if the current token is a number. */
467 lex_is_number (const struct lexer *lexer)
469 return lex_next_is_number (lexer, 0);
472 /* Returns true if the current token is a string. */
474 lex_is_string (const struct lexer *lexer)
476 return lex_next_is_string (lexer, 0);
479 /* Returns the value of the current token, which must be a
480 floating point number. */
482 lex_number (const struct lexer *lexer)
484 return lex_next_number (lexer, 0);
487 /* Returns true iff the current token is an integer. */
489 lex_is_integer (const struct lexer *lexer)
491 return lex_next_is_integer (lexer, 0);
494 /* Returns the value of the current token, which must be an
497 lex_integer (const struct lexer *lexer)
499 return lex_next_integer (lexer, 0);
502 /* Token testing functions with lookahead.
504 A value of 0 for N as an argument to any of these functions refers to the
505 current token. Lookahead is limited to the current command. Any N greater
506 than the number of tokens remaining in the current command will be treated
507 as referring to a T_ENDCMD token. */
509 /* Returns true if the token N ahead of the current token is a number. */
511 lex_next_is_number (const struct lexer *lexer, int n)
513 enum token_type next_token = lex_next_token (lexer, n);
514 return next_token == T_POS_NUM || next_token == T_NEG_NUM;
517 /* Returns true if the token N ahead of the current token is a string. */
519 lex_next_is_string (const struct lexer *lexer, int n)
521 return lex_next_token (lexer, n) == T_STRING;
524 /* Returns the value of the token N ahead of the current token, which must be a
525 floating point number. */
527 lex_next_number (const struct lexer *lexer, int n)
529 assert (lex_next_is_number (lexer, n));
530 return lex_next_tokval (lexer, n);
533 /* Returns true if the token N ahead of the current token is an integer. */
535 lex_next_is_integer (const struct lexer *lexer, int n)
539 if (!lex_next_is_number (lexer, n))
542 value = lex_next_tokval (lexer, n);
543 return value > LONG_MIN && value <= LONG_MAX && floor (value) == value;
546 /* Returns the value of the token N ahead of the current token, which must be
549 lex_next_integer (const struct lexer *lexer, int n)
551 assert (lex_next_is_integer (lexer, n));
552 return lex_next_tokval (lexer, n);
555 /* Token matching functions. */
557 /* If the current token has the specified TYPE, skips it and returns true.
558 Otherwise, returns false. */
560 lex_match (struct lexer *lexer, enum token_type type)
562 if (lex_token (lexer) == type)
571 /* If the current token matches IDENTIFIER, skips it and returns true.
572 IDENTIFIER may be abbreviated to its first three letters. Otherwise,
575 IDENTIFIER must be an ASCII string. */
577 lex_match_id (struct lexer *lexer, const char *identifier)
579 return lex_match_id_n (lexer, identifier, 3);
582 /* If the current token is IDENTIFIER, skips it and returns true. IDENTIFIER
583 may be abbreviated to its first N letters. Otherwise, returns false.
585 IDENTIFIER must be an ASCII string. */
587 lex_match_id_n (struct lexer *lexer, const char *identifier, size_t n)
589 if (lex_token (lexer) == T_ID
590 && lex_id_match_n (ss_cstr (identifier), lex_tokss (lexer), n))
599 /* If the current token is integer X, skips it and returns true. Otherwise,
602 lex_match_int (struct lexer *lexer, int x)
604 if (lex_is_integer (lexer) && lex_integer (lexer) == x)
613 /* Forced matches. */
615 /* If this token is IDENTIFIER, skips it and returns true. IDENTIFIER may be
616 abbreviated to its first 3 letters. Otherwise, reports an error and returns
619 IDENTIFIER must be an ASCII string. */
621 lex_force_match_id (struct lexer *lexer, const char *identifier)
623 if (lex_match_id (lexer, identifier))
627 lex_error_expecting (lexer, identifier);
632 /* If the current token has the specified TYPE, skips it and returns true.
633 Otherwise, reports an error and returns false. */
635 lex_force_match (struct lexer *lexer, enum token_type type)
637 if (lex_token (lexer) == type)
644 const char *type_string = token_type_to_string (type);
647 char *s = xasprintf ("`%s'", type_string);
648 lex_error_expecting (lexer, s);
652 lex_error_expecting (lexer, token_type_to_name (type));
658 /* If the current token is a string, does nothing and returns true.
659 Otherwise, reports an error and returns false. */
661 lex_force_string (struct lexer *lexer)
663 if (lex_is_string (lexer))
667 lex_error (lexer, _("expecting string"));
672 /* If the current token is a string or an identifier, does nothing and returns
673 true. Otherwise, reports an error and returns false.
675 This is meant for use in syntactic situations where we want to encourage the
676 user to supply a quoted string, but for compatibility we also accept
677 identifiers. (One example of such a situation is file names.) Therefore,
678 the error message issued when the current token is wrong only says that a
679 string is expected and doesn't mention that an identifier would also be
682 lex_force_string_or_id (struct lexer *lexer)
684 return lex_token (lexer) == T_ID || lex_force_string (lexer);
687 /* If the current token is an integer, does nothing and returns true.
688 Otherwise, reports an error and returns false. */
690 lex_force_int (struct lexer *lexer)
692 if (lex_is_integer (lexer))
696 lex_error (lexer, _("expecting integer"));
701 /* If the current token is an integer in the range MIN...MAX (inclusive), does
702 nothing and returns true. Otherwise, reports an error and returns false.
703 If NAME is nonnull, then it is used in the error message. */
705 lex_force_int_range (struct lexer *lexer, const char *name, long min, long max)
707 bool is_integer = lex_is_integer (lexer);
708 bool too_small = is_integer && lex_integer (lexer) < min;
709 bool too_big = is_integer && lex_integer (lexer) > max;
710 if (is_integer && !too_small && !too_big)
715 /* Weird, maybe a bug in the caller. Just report that we needed an
718 lex_error (lexer, _("Integer expected for %s."), name);
720 lex_error (lexer, _("Integer expected."));
725 lex_error (lexer, _("Expected %ld for %s."), min, name);
727 lex_error (lexer, _("Expected %ld."), min);
729 else if (min + 1 == max)
732 lex_error (lexer, _("Expected %ld or %ld for %s."), min, min + 1, name);
734 lex_error (lexer, _("Expected %ld or %ld."), min, min + 1);
738 bool report_lower_bound = (min > INT_MIN / 2) || too_small;
739 bool report_upper_bound = (max < INT_MAX / 2) || too_big;
741 if (report_lower_bound && report_upper_bound)
745 _("Expected integer between %ld and %ld for %s."),
748 lex_error (lexer, _("Expected integer between %ld and %ld."),
751 else if (report_lower_bound)
756 lex_error (lexer, _("Expected non-negative integer for %s."),
759 lex_error (lexer, _("Expected non-negative integer."));
764 lex_error (lexer, _("Expected positive integer for %s."),
767 lex_error (lexer, _("Expected positive integer."));
770 else if (report_upper_bound)
774 _("Expected integer less than or equal to %ld for %s."),
777 lex_error (lexer, _("Expected integer less than or equal to %ld."),
783 lex_error (lexer, _("Integer expected for %s."), name);
785 lex_error (lexer, _("Integer expected."));
791 /* If the current token is a number, does nothing and returns true.
792 Otherwise, reports an error and returns false. */
794 lex_force_num (struct lexer *lexer)
796 if (lex_is_number (lexer))
799 lex_error (lexer, _("expecting number"));
803 /* If the current token is an identifier, does nothing and returns true.
804 Otherwise, reports an error and returns false. */
806 lex_force_id (struct lexer *lexer)
808 if (lex_token (lexer) == T_ID)
811 lex_error (lexer, _("expecting identifier"));
815 /* Token accessors. */
817 /* Returns the type of LEXER's current token. */
819 lex_token (const struct lexer *lexer)
821 return lex_next_token (lexer, 0);
824 /* Returns the number in LEXER's current token.
826 Only T_NEG_NUM and T_POS_NUM tokens have meaningful values. For other
827 tokens this function will always return zero. */
829 lex_tokval (const struct lexer *lexer)
831 return lex_next_tokval (lexer, 0);
834 /* Returns the null-terminated string in LEXER's current token, UTF-8 encoded.
836 Only T_ID and T_STRING tokens have meaningful strings. For other tokens
837 this functions this function will always return NULL.
839 The UTF-8 encoding of the returned string is correct for variable names and
840 other identifiers. Use filename_to_utf8() to use it as a filename. Use
841 data_in() to use it in a "union value". */
843 lex_tokcstr (const struct lexer *lexer)
845 return lex_next_tokcstr (lexer, 0);
848 /* Returns the string in LEXER's current token, UTF-8 encoded. The string is
849 null-terminated (but the null terminator is not included in the returned
850 substring's 'length').
852 Only T_ID and T_STRING tokens have meaningful strings. For other tokens
853 this functions this function will always return NULL.
855 The UTF-8 encoding of the returned string is correct for variable names and
856 other identifiers. Use filename_to_utf8() to use it as a filename. Use
857 data_in() to use it in a "union value". */
859 lex_tokss (const struct lexer *lexer)
861 return lex_next_tokss (lexer, 0);
866 A value of 0 for N as an argument to any of these functions refers to the
867 current token. Lookahead is limited to the current command. Any N greater
868 than the number of tokens remaining in the current command will be treated
869 as referring to a T_ENDCMD token. */
871 static const struct lex_token *
872 lex_next__ (const struct lexer *lexer_, int n)
874 struct lexer *lexer = CONST_CAST (struct lexer *, lexer_);
875 struct lex_source *src = lex_source__ (lexer);
878 return lex_source_next__ (src, n);
881 static const struct lex_token stop_token = { .token = { .type = T_STOP } };
886 static const struct lex_token *
887 lex_source_front (const struct lex_source *src)
889 return &src->tokens[deque_front (&src->deque, 0)];
892 static const struct lex_token *
893 lex_source_next__ (const struct lex_source *src, int n)
895 while (deque_count (&src->deque) <= n)
897 if (!deque_is_empty (&src->deque))
899 const struct lex_token *front = lex_source_front (src);
900 if (front->token.type == T_STOP || front->token.type == T_ENDCMD)
904 lex_source_get (src);
907 return &src->tokens[deque_back (&src->deque, n)];
910 /* Returns the "struct token" of the token N after the current one in LEXER.
911 The returned pointer can be invalidated by pretty much any succeeding call
912 into the lexer, although the string pointer within the returned token is
913 only invalidated by consuming the token (e.g. with lex_get()). */
915 lex_next (const struct lexer *lexer, int n)
917 return &lex_next__ (lexer, n)->token;
920 /* Returns the type of the token N after the current one in LEXER. */
922 lex_next_token (const struct lexer *lexer, int n)
924 return lex_next (lexer, n)->type;
927 /* Returns the number in the tokn N after the current one in LEXER.
929 Only T_NEG_NUM and T_POS_NUM tokens have meaningful values. For other
930 tokens this function will always return zero. */
932 lex_next_tokval (const struct lexer *lexer, int n)
934 const struct token *token = lex_next (lexer, n);
935 return token->number;
938 /* Returns the null-terminated string in the token N after the current one, in
941 Only T_ID and T_STRING tokens have meaningful strings. For other tokens
942 this functions this function will always return NULL.
944 The UTF-8 encoding of the returned string is correct for variable names and
945 other identifiers. Use filename_to_utf8() to use it as a filename. Use
946 data_in() to use it in a "union value". */
948 lex_next_tokcstr (const struct lexer *lexer, int n)
950 return lex_next_tokss (lexer, n).string;
953 /* Returns the string in the token N after the current one, in UTF-8 encoding.
954 The string is null-terminated (but the null terminator is not included in
955 the returned substring's 'length').
957 Only T_ID, T_MACRO_ID, T_STRING tokens have meaningful strings. For other
958 tokens this functions this function will always return NULL.
960 The UTF-8 encoding of the returned string is correct for variable names and
961 other identifiers. Use filename_to_utf8() to use it as a filename. Use
962 data_in() to use it in a "union value". */
964 lex_next_tokss (const struct lexer *lexer, int n)
966 return lex_next (lexer, n)->string;
970 lex_next_representation (const struct lexer *lexer, int n0, int n1)
972 return lex_source_get_syntax__ (lex_source__ (lexer), n0, n1);
976 lex_tokens_match (const struct token *actual, const struct token *expected)
978 if (actual->type != expected->type)
981 switch (actual->type)
985 return actual->number == expected->number;
988 return lex_id_match (expected->string, actual->string);
991 return (actual->string.length == expected->string.length
992 && !memcmp (actual->string.string, expected->string.string,
993 actual->string.length));
1000 /* If LEXER is positioned at the sequence of tokens that may be parsed from S,
1001 skips it and returns true. Otherwise, returns false.
1003 S may consist of an arbitrary sequence of tokens, e.g. "KRUSKAL-WALLIS",
1004 "2SLS", or "END INPUT PROGRAM". Identifiers may be abbreviated to their
1005 first three letters. */
1007 lex_match_phrase (struct lexer *lexer, const char *s)
1009 struct string_lexer slex;
1014 string_lexer_init (&slex, s, strlen (s), SEG_MODE_INTERACTIVE);
1015 while (string_lexer_next (&slex, &token))
1016 if (token.type != SCAN_SKIP)
1018 bool match = lex_tokens_match (lex_next (lexer, i++), &token);
1019 token_uninit (&token);
1030 lex_source_get_first_line_number (const struct lex_source *src, int n)
1032 return lex_source_next__ (src, n)->first_line;
1036 count_newlines (char *s, size_t length)
1041 while ((newline = memchr (s, '\n', length)) != NULL)
1044 length -= (newline + 1) - s;
1052 lex_source_get_last_line_number (const struct lex_source *src, int n)
1054 const struct lex_token *token = lex_source_next__ (src, n);
1056 if (token->first_line == 0)
1060 char *token_str = &src->buffer[token->token_pos - src->tail];
1061 return token->first_line + count_newlines (token_str, token->token_len) + 1;
1066 count_columns (const char *s_, size_t length)
1068 const uint8_t *s = CHAR_CAST (const uint8_t *, s_);
1074 for (ofs = 0; ofs < length; ofs += mblen)
1078 mblen = u8_mbtouc (&uc, s + ofs, length - ofs);
1081 int width = uc_width (uc, "UTF-8");
1086 columns = ROUND_UP (columns + 1, 8);
1093 lex_source_get_first_column (const struct lex_source *src, int n)
1095 const struct lex_token *token = lex_source_next__ (src, n);
1096 return count_columns (&src->buffer[token->line_pos - src->tail],
1097 token->token_pos - token->line_pos);
1101 lex_source_get_last_column (const struct lex_source *src, int n)
1103 const struct lex_token *token = lex_source_next__ (src, n);
1104 char *start, *end, *newline;
1106 start = &src->buffer[token->line_pos - src->tail];
1107 end = &src->buffer[(token->token_pos + token->token_len) - src->tail];
1108 newline = memrchr (start, '\n', end - start);
1109 if (newline != NULL)
1110 start = newline + 1;
1111 return count_columns (start, end - start);
1114 /* Returns the 1-based line number of the start of the syntax that represents
1115 the token N after the current one in LEXER. Returns 0 for a T_STOP token or
1116 if the token is drawn from a source that does not have line numbers. */
1118 lex_get_first_line_number (const struct lexer *lexer, int n)
1120 const struct lex_source *src = lex_source__ (lexer);
1121 return src != NULL ? lex_source_get_first_line_number (src, n) : 0;
1124 /* Returns the 1-based line number of the end of the syntax that represents the
1125 token N after the current one in LEXER, plus 1. Returns 0 for a T_STOP
1126 token or if the token is drawn from a source that does not have line
1129 Most of the time, a single token is wholly within a single line of syntax,
1130 but there are two exceptions: a T_STRING token can be made up of multiple
1131 segments on adjacent lines connected with "+" punctuators, and a T_NEG_NUM
1132 token can consist of a "-" on one line followed by the number on the next.
1135 lex_get_last_line_number (const struct lexer *lexer, int n)
1137 const struct lex_source *src = lex_source__ (lexer);
1138 return src != NULL ? lex_source_get_last_line_number (src, n) : 0;
1141 /* Returns the 1-based column number of the start of the syntax that represents
1142 the token N after the current one in LEXER. Returns 0 for a T_STOP
1145 Column numbers are measured according to the width of characters as shown in
1146 a typical fixed-width font, in which CJK characters have width 2 and
1147 combining characters have width 0. */
1149 lex_get_first_column (const struct lexer *lexer, int n)
1151 const struct lex_source *src = lex_source__ (lexer);
1152 return src != NULL ? lex_source_get_first_column (src, n) : 0;
1155 /* Returns the 1-based column number of the end of the syntax that represents
1156 the token N after the current one in LEXER, plus 1. Returns 0 for a T_STOP
1159 Column numbers are measured according to the width of characters as shown in
1160 a typical fixed-width font, in which CJK characters have width 2 and
1161 combining characters have width 0. */
1163 lex_get_last_column (const struct lexer *lexer, int n)
1165 const struct lex_source *src = lex_source__ (lexer);
1166 return src != NULL ? lex_source_get_last_column (src, n) : 0;
1169 /* Returns the name of the syntax file from which the current command is drawn.
1170 Returns NULL for a T_STOP token or if the command's source does not have
1173 There is no version of this function that takes an N argument because
1174 lookahead only works to the end of a command and any given command is always
1175 within a single syntax file. */
1177 lex_get_file_name (const struct lexer *lexer)
1179 struct lex_source *src = lex_source__ (lexer);
1180 return src == NULL ? NULL : src->reader->file_name;
1184 lex_get_encoding (const struct lexer *lexer)
1186 struct lex_source *src = lex_source__ (lexer);
1187 return src == NULL ? NULL : src->reader->encoding;
1190 /* Returns the syntax mode for the syntax file from which the current drawn is
1191 drawn. Returns SEG_MODE_AUTO for a T_STOP token or if the command's source
1192 does not have line numbers.
1194 There is no version of this function that takes an N argument because
1195 lookahead only works to the end of a command and any given command is always
1196 within a single syntax file. */
1198 lex_get_syntax_mode (const struct lexer *lexer)
1200 struct lex_source *src = lex_source__ (lexer);
1201 return src == NULL ? SEG_MODE_AUTO : src->reader->syntax;
1204 /* Returns the error mode for the syntax file from which the current drawn is
1205 drawn. Returns LEX_ERROR_TERMINAL for a T_STOP token or if the command's
1206 source does not have line numbers.
1208 There is no version of this function that takes an N argument because
1209 lookahead only works to the end of a command and any given command is always
1210 within a single syntax file. */
1212 lex_get_error_mode (const struct lexer *lexer)
1214 struct lex_source *src = lex_source__ (lexer);
1215 return src == NULL ? LEX_ERROR_TERMINAL : src->reader->error;
1218 /* If the source that LEXER is currently reading has error mode
1219 LEX_ERROR_TERMINAL, discards all buffered input and tokens, so that the next
1220 token to be read comes directly from whatever is next read from the stream.
1222 It makes sense to call this function after encountering an error in a
1223 command entered on the console, because usually the user would prefer not to
1224 have cascading errors. */
1226 lex_interactive_reset (struct lexer *lexer)
1228 struct lex_source *src = lex_source__ (lexer);
1229 if (src != NULL && src->reader->error == LEX_ERROR_TERMINAL)
1231 src->head = src->tail = 0;
1232 src->journal_pos = src->seg_pos = src->line_pos = 0;
1233 src->n_newlines = 0;
1234 src->suppress_next_newline = false;
1235 segmenter_init (&src->segmenter, segmenter_get_mode (&src->segmenter));
1236 while (!deque_is_empty (&src->deque))
1237 lex_source_pop__ (src);
1238 lex_source_push_endcmd__ (src);
1242 /* Advances past any tokens in LEXER up to a T_ENDCMD or T_STOP. */
1244 lex_discard_rest_of_command (struct lexer *lexer)
1246 while (lex_token (lexer) != T_STOP && lex_token (lexer) != T_ENDCMD)
1250 /* Discards all lookahead tokens in LEXER, then discards all input sources
1251 until it encounters one with error mode LEX_ERROR_TERMINAL or until it
1252 runs out of input sources. */
1254 lex_discard_noninteractive (struct lexer *lexer)
1256 struct lex_source *src = lex_source__ (lexer);
1260 while (!deque_is_empty (&src->deque))
1261 lex_source_pop__ (src);
1263 for (; src != NULL && src->reader->error != LEX_ERROR_TERMINAL;
1264 src = lex_source__ (lexer))
1265 lex_source_destroy (src);
1270 lex_source_max_tail__ (const struct lex_source *src)
1272 const struct lex_token *token;
1275 assert (src->seg_pos >= src->line_pos);
1276 max_tail = MIN (src->journal_pos, src->line_pos);
1278 /* Use the oldest token also. (We know that src->deque cannot be empty
1279 because we are in the process of adding a new token, which is already
1280 initialized enough to use here.) */
1281 token = &src->tokens[deque_back (&src->deque, 0)];
1282 assert (token->token_pos >= token->line_pos);
1283 max_tail = MIN (max_tail, token->line_pos);
1289 lex_source_expand__ (struct lex_source *src)
1291 if (src->head - src->tail >= src->allocated)
1293 size_t max_tail = lex_source_max_tail__ (src);
1294 if (max_tail > src->tail)
1296 /* Advance the tail, freeing up room at the head. */
1297 memmove (src->buffer, src->buffer + (max_tail - src->tail),
1298 src->head - max_tail);
1299 src->tail = max_tail;
1303 /* Buffer is completely full. Expand it. */
1304 src->buffer = x2realloc (src->buffer, &src->allocated);
1309 /* There's space available at the head of the buffer. Nothing to do. */
1314 lex_source_read__ (struct lex_source *src)
1318 lex_source_expand__ (src);
1320 size_t head_ofs = src->head - src->tail;
1321 size_t space = src->allocated - head_ofs;
1322 enum prompt_style prompt = segmenter_get_prompt (&src->segmenter);
1323 size_t n = src->reader->class->read (src->reader, &src->buffer[head_ofs],
1325 assert (n <= space);
1330 src->reader->eof = true;
1331 lex_source_expand__ (src);
1337 while (!memchr (&src->buffer[src->seg_pos - src->tail], '\n',
1338 src->head - src->seg_pos));
1341 static struct lex_source *
1342 lex_source__ (const struct lexer *lexer)
1344 return (ll_is_empty (&lexer->sources) ? NULL
1345 : ll_data (ll_head (&lexer->sources), struct lex_source, ll));
1348 static struct substring
1349 lex_tokens_get_syntax__ (const struct lex_source *src,
1350 const struct lex_token *token0,
1351 const struct lex_token *token1)
1353 size_t start = token0->token_pos;
1354 size_t end = token1->token_pos + token1->token_len;
1356 return ss_buffer (&src->buffer[start - src->tail], end - start);
1359 static struct substring
1360 lex_source_get_syntax__ (const struct lex_source *src, int n0, int n1)
1362 return lex_tokens_get_syntax__ (src,
1363 lex_source_next__ (src, n0),
1364 lex_source_next__ (src, MAX (n0, n1)));
1368 lex_ellipsize__ (struct substring in, char *out, size_t out_size)
1374 assert (out_size >= 16);
1375 out_maxlen = out_size - 1;
1376 if (in.length > out_maxlen - 3)
1379 for (out_len = 0; out_len < in.length; out_len += mblen)
1381 if (in.string[out_len] == '\n'
1382 || in.string[out_len] == '\0'
1383 || (in.string[out_len] == '\r'
1384 && out_len + 1 < in.length
1385 && in.string[out_len + 1] == '\n'))
1388 mblen = u8_mblen (CHAR_CAST (const uint8_t *, in.string + out_len),
1389 in.length - out_len);
1394 if (out_len + mblen > out_maxlen)
1398 memcpy (out, in.string, out_len);
1399 strcpy (&out[out_len], out_len < in.length ? "..." : "");
1403 lex_source_error_valist (struct lex_source *src, int n0, int n1,
1404 const char *format, va_list args)
1406 const struct lex_token *token;
1411 token = lex_source_next__ (src, n0);
1412 if (token->token.type == T_ENDCMD)
1413 ds_put_cstr (&s, _("Syntax error at end of command"));
1414 else if (token->from_macro)
1416 /* XXX this isn't ideal, we should get the actual syntax */
1417 char *syntax = token_to_string (&token->token);
1419 ds_put_format (&s, _("Syntax error at `%s'"), syntax);
1421 ds_put_cstr (&s, _("Syntax error"));
1426 struct substring syntax = lex_source_get_syntax__ (src, n0, n1);
1427 if (!ss_is_empty (syntax))
1429 char syntax_cstr[64];
1431 lex_ellipsize__ (syntax, syntax_cstr, sizeof syntax_cstr);
1432 ds_put_format (&s, _("Syntax error at `%s'"), syntax_cstr);
1435 ds_put_cstr (&s, _("Syntax error"));
1440 ds_put_cstr (&s, ": ");
1441 ds_put_vformat (&s, format, args);
1443 if (ds_last (&s) != '.')
1444 ds_put_byte (&s, '.');
1447 .category = MSG_C_SYNTAX,
1448 .severity = MSG_S_ERROR,
1449 .file_name = src->reader->file_name,
1450 .first_line = lex_source_get_first_line_number (src, n0),
1451 .last_line = lex_source_get_last_line_number (src, n1),
1452 .first_column = lex_source_get_first_column (src, n0),
1453 .last_column = lex_source_get_last_column (src, n1),
1454 .text = ds_steal_cstr (&s),
1459 static void PRINTF_FORMAT (2, 3)
1460 lex_get_error (struct lex_source *src, const char *format, ...)
1465 va_start (args, format);
1467 n = deque_count (&src->deque) - 1;
1468 lex_source_error_valist (src, n, n, format, args);
1469 lex_source_pop_front (src);
1474 /* Attempts to append an additional token into SRC's deque, reading more from
1475 the underlying lex_reader if necessary. Returns true if a new token was
1476 added to SRC's deque, false otherwise. */
1478 lex_source_try_get (struct lex_source *src)
1480 /* State maintained while scanning tokens. Usually we only need a single
1481 state, but scanner_push() can return SCAN_SAVE to indicate that the state
1482 needs to be saved and possibly restored later with SCAN_BACK. */
1485 struct segmenter segmenter;
1486 enum segment_type last_segment;
1487 int newlines; /* Number of newlines encountered so far. */
1488 /* Maintained here so we can update lex_source's similar members when we
1494 /* Initialize state. */
1495 struct state state =
1497 .segmenter = src->segmenter,
1499 .seg_pos = src->seg_pos,
1500 .line_pos = src->line_pos,
1502 struct state saved = state;
1504 /* Append a new token to SRC and initialize it. */
1505 struct lex_token *token = lex_push_token__ (src);
1506 struct scanner scanner;
1507 scanner_init (&scanner, &token->token);
1508 token->line_pos = src->line_pos;
1509 token->token_pos = src->seg_pos;
1510 if (src->reader->line_number > 0)
1511 token->first_line = src->reader->line_number + src->n_newlines;
1513 token->first_line = 0;
1515 /* Extract segments and pass them through the scanner until we obtain a
1519 /* Extract a segment. */
1520 const char *segment = &src->buffer[state.seg_pos - src->tail];
1521 size_t seg_maxlen = src->head - state.seg_pos;
1522 enum segment_type type;
1523 int seg_len = segmenter_push (&state.segmenter, segment, seg_maxlen,
1524 src->reader->eof, &type);
1527 /* The segmenter needs more input to produce a segment. */
1528 assert (!src->reader->eof);
1529 lex_source_read__ (src);
1533 /* Update state based on the segment. */
1534 state.last_segment = type;
1535 state.seg_pos += seg_len;
1536 if (type == SEG_NEWLINE)
1539 state.line_pos = state.seg_pos;
1542 /* Pass the segment into the scanner and try to get a token out. */
1543 enum scan_result result = scanner_push (&scanner, type,
1544 ss_buffer (segment, seg_len),
1546 if (result == SCAN_SAVE)
1548 else if (result == SCAN_BACK)
1553 else if (result == SCAN_DONE)
1557 /* If we've reached the end of a line, or the end of a command, then pass
1558 the line to the output engine as a syntax text item. */
1559 int n_lines = state.newlines;
1560 if (state.last_segment == SEG_END_COMMAND && !src->suppress_next_newline)
1563 src->suppress_next_newline = true;
1565 else if (n_lines > 0 && src->suppress_next_newline)
1568 src->suppress_next_newline = false;
1570 for (int i = 0; i < n_lines; i++)
1572 /* Beginning of line. */
1573 const char *line = &src->buffer[src->journal_pos - src->tail];
1575 /* Calculate line length, including \n or \r\n end-of-line if present.
1577 We use src->head even though that may be beyond what we've actually
1578 converted to tokens (which is only through state.line_pos). That's
1579 because, if we're emitting the line due to SEG_END_COMMAND, we want to
1580 take the whole line through the newline, not just through the '.'. */
1581 size_t max_len = src->head - src->journal_pos;
1582 const char *newline = memchr (line, '\n', max_len);
1583 size_t line_len = newline ? newline - line + 1 : max_len;
1585 /* Calculate line length excluding end-of-line. */
1586 size_t copy_len = line_len;
1587 if (copy_len > 0 && line[copy_len - 1] == '\n')
1589 if (copy_len > 0 && line[copy_len - 1] == '\r')
1592 /* Submit the line as syntax. */
1593 output_item_submit (text_item_create_nocopy (TEXT_ITEM_SYNTAX,
1594 xmemdup0 (line, copy_len),
1597 src->journal_pos += line_len;
1600 token->token_len = state.seg_pos - src->seg_pos;
1602 src->segmenter = state.segmenter;
1603 src->seg_pos = state.seg_pos;
1604 src->line_pos = state.line_pos;
1605 src->n_newlines += state.newlines;
1607 switch (token->token.type)
1613 token->token.type = T_ENDCMD;
1617 case SCAN_BAD_HEX_LENGTH:
1618 lex_get_error (src, _("String of hex digits has %d characters, which "
1619 "is not a multiple of 2"),
1620 (int) token->token.number);
1623 case SCAN_BAD_HEX_DIGIT:
1624 case SCAN_BAD_UNICODE_DIGIT:
1625 lex_get_error (src, _("`%c' is not a valid hex digit"),
1626 (int) token->token.number);
1629 case SCAN_BAD_UNICODE_LENGTH:
1630 lex_get_error (src, _("Unicode string contains %d bytes, which is "
1631 "not in the valid range of 1 to 8 bytes"),
1632 (int) token->token.number);
1635 case SCAN_BAD_UNICODE_CODE_POINT:
1636 lex_get_error (src, _("U+%04X is not a valid Unicode code point"),
1637 (int) token->token.number);
1640 case SCAN_EXPECTED_QUOTE:
1641 lex_get_error (src, _("Unterminated string constant"));
1644 case SCAN_EXPECTED_EXPONENT:
1645 lex_get_error (src, _("Missing exponent following `%s'"),
1646 token->token.string.string);
1649 case SCAN_UNEXPECTED_CHAR:
1652 lex_get_error (src, _("Bad character %s in input"),
1653 uc_name (token->token.number, c_name));
1658 lex_source_pop_front (src);
1666 lex_source_get__ (struct lex_source *src)
1672 else if (lex_source_try_get (src))
1678 lex_source_get (const struct lex_source *src_)
1680 struct lex_source *src = CONST_CAST (struct lex_source *, src_);
1682 size_t old_count = deque_count (&src->deque);
1683 if (!lex_source_get__ (src))
1686 if (!settings_get_mexpand ())
1689 struct macro_expander *me;
1690 int retval = macro_expander_create (src->lexer->macros,
1691 &lex_source_front (src)->token,
1695 if (!lex_source_get__ (src))
1697 /* This should not be reachable because we always get a T_STOP at the
1698 end of input and the macro_expander should always terminate
1699 expansion on T_STOP. */
1703 const struct lex_token *front = lex_source_front (src);
1704 const struct macro_token mt = {
1705 .token = front->token,
1706 .representation = lex_tokens_get_syntax__ (src, front, front)
1708 retval = macro_expander_add (me, &mt);
1712 /* XXX handle case where there's a macro invocation starting from some
1713 later token we've already obtained */
1714 macro_expander_destroy (me);
1718 /* XXX handle case where the macro invocation doesn't use all the tokens */
1719 while (deque_count (&src->deque) > old_count)
1720 lex_source_pop_front (src);
1722 struct macro_tokens expansion = { .n = 0 };
1723 macro_expander_get_expansion (me, &expansion);
1724 macro_expander_destroy (me);
1726 for (size_t i = 0; i < expansion.n; i++)
1728 *lex_push_token__ (src) = (struct lex_token) {
1729 .token = expansion.mts[i].token,
1734 ss_dealloc (&expansion.mts[i].representation); /* XXX should feed into lexer */
1736 free (expansion.mts);
1742 lex_source_push_endcmd__ (struct lex_source *src)
1744 struct lex_token *token = lex_push_token__ (src);
1745 token->token.type = T_ENDCMD;
1746 token->token_pos = 0;
1747 token->token_len = 0;
1748 token->line_pos = 0;
1749 token->first_line = 0;
1752 static struct lex_source *
1753 lex_source_create (struct lexer *lexer, struct lex_reader *reader)
1755 struct lex_source *src;
1757 src = xzalloc (sizeof *src);
1758 src->reader = reader;
1759 segmenter_init (&src->segmenter, reader->syntax);
1761 src->tokens = deque_init (&src->deque, 4, sizeof *src->tokens);
1763 lex_source_push_endcmd__ (src);
1769 lex_source_destroy (struct lex_source *src)
1771 char *file_name = src->reader->file_name;
1772 char *encoding = src->reader->encoding;
1773 if (src->reader->class->destroy != NULL)
1774 src->reader->class->destroy (src->reader);
1778 while (!deque_is_empty (&src->deque))
1779 lex_source_pop__ (src);
1781 ll_remove (&src->ll);
1785 struct lex_file_reader
1787 struct lex_reader reader;
1788 struct u8_istream *istream;
1791 static struct lex_reader_class lex_file_reader_class;
1793 /* Creates and returns a new lex_reader that will read from file FILE_NAME (or
1794 from stdin if FILE_NAME is "-"). The file is expected to be encoded with
1795 ENCODING, which should take one of the forms accepted by
1796 u8_istream_for_file(). SYNTAX and ERROR become the syntax mode and error
1797 mode of the new reader, respectively.
1799 Returns a null pointer if FILE_NAME cannot be opened. */
1801 lex_reader_for_file (const char *file_name, const char *encoding,
1802 enum segmenter_mode syntax,
1803 enum lex_error_mode error)
1805 struct lex_file_reader *r;
1806 struct u8_istream *istream;
1808 istream = (!strcmp(file_name, "-")
1809 ? u8_istream_for_fd (encoding, STDIN_FILENO)
1810 : u8_istream_for_file (encoding, file_name, O_RDONLY));
1811 if (istream == NULL)
1813 msg (ME, _("Opening `%s': %s."), file_name, strerror (errno));
1817 r = xmalloc (sizeof *r);
1818 lex_reader_init (&r->reader, &lex_file_reader_class);
1819 r->reader.syntax = syntax;
1820 r->reader.error = error;
1821 r->reader.file_name = xstrdup (file_name);
1822 r->reader.encoding = xstrdup_if_nonnull (encoding);
1823 r->reader.line_number = 1;
1824 r->istream = istream;
1829 static struct lex_file_reader *
1830 lex_file_reader_cast (struct lex_reader *r)
1832 return UP_CAST (r, struct lex_file_reader, reader);
1836 lex_file_read (struct lex_reader *r_, char *buf, size_t n,
1837 enum prompt_style prompt_style UNUSED)
1839 struct lex_file_reader *r = lex_file_reader_cast (r_);
1840 ssize_t n_read = u8_istream_read (r->istream, buf, n);
1843 msg (ME, _("Error reading `%s': %s."), r_->file_name, strerror (errno));
1850 lex_file_close (struct lex_reader *r_)
1852 struct lex_file_reader *r = lex_file_reader_cast (r_);
1854 if (u8_istream_fileno (r->istream) != STDIN_FILENO)
1856 if (u8_istream_close (r->istream) != 0)
1857 msg (ME, _("Error closing `%s': %s."), r_->file_name, strerror (errno));
1860 u8_istream_free (r->istream);
1865 static struct lex_reader_class lex_file_reader_class =
1871 struct lex_string_reader
1873 struct lex_reader reader;
1878 static struct lex_reader_class lex_string_reader_class;
1880 /* Creates and returns a new lex_reader for the contents of S, which must be
1881 encoded in the given ENCODING. The new reader takes ownership of S and will free it
1882 with ss_dealloc() when it is closed. */
1884 lex_reader_for_substring_nocopy (struct substring s, const char *encoding)
1886 struct lex_string_reader *r;
1888 r = xmalloc (sizeof *r);
1889 lex_reader_init (&r->reader, &lex_string_reader_class);
1890 r->reader.syntax = SEG_MODE_AUTO;
1891 r->reader.encoding = xstrdup_if_nonnull (encoding);
1898 /* Creates and returns a new lex_reader for a copy of null-terminated string S,
1899 which must be encoded in ENCODING. The caller retains ownership of S. */
1901 lex_reader_for_string (const char *s, const char *encoding)
1903 struct substring ss;
1904 ss_alloc_substring (&ss, ss_cstr (s));
1905 return lex_reader_for_substring_nocopy (ss, encoding);
1908 /* Formats FORMAT as a printf()-like format string and creates and returns a
1909 new lex_reader for the formatted result. */
1911 lex_reader_for_format (const char *format, const char *encoding, ...)
1913 struct lex_reader *r;
1916 va_start (args, encoding);
1917 r = lex_reader_for_substring_nocopy (ss_cstr (xvasprintf (format, args)), encoding);
1923 static struct lex_string_reader *
1924 lex_string_reader_cast (struct lex_reader *r)
1926 return UP_CAST (r, struct lex_string_reader, reader);
1930 lex_string_read (struct lex_reader *r_, char *buf, size_t n,
1931 enum prompt_style prompt_style UNUSED)
1933 struct lex_string_reader *r = lex_string_reader_cast (r_);
1936 chunk = MIN (n, r->s.length - r->offset);
1937 memcpy (buf, r->s.string + r->offset, chunk);
1944 lex_string_close (struct lex_reader *r_)
1946 struct lex_string_reader *r = lex_string_reader_cast (r_);
1952 static struct lex_reader_class lex_string_reader_class =