1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2013, 2016 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/lexer/lexer.h"
33 #include "language/command.h"
34 #include "language/lexer/macro.h"
35 #include "language/lexer/scan.h"
36 #include "language/lexer/segment.h"
37 #include "language/lexer/token.h"
38 #include "libpspp/assertion.h"
39 #include "libpspp/cast.h"
40 #include "libpspp/deque.h"
41 #include "libpspp/i18n.h"
42 #include "libpspp/ll.h"
43 #include "libpspp/message.h"
44 #include "libpspp/misc.h"
45 #include "libpspp/str.h"
46 #include "libpspp/u8-istream.h"
47 #include "output/journal.h"
48 #include "output/output-item.h"
50 #include "gl/c-ctype.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
53 #include "gl/xmemdup0.h"
56 #define _(msgid) gettext (msgid)
57 #define N_(msgid) msgid
59 /* A token within a lex_source. */
62 /* The regular token information. */
65 /* Location of token in terms of the lex_source's buffer.
66 src->tail <= line_pos <= token_pos <= src->head. */
67 size_t token_pos; /* Start of token. */
68 size_t token_len; /* Length of source for token in bytes. */
69 size_t line_pos; /* Start of line containing token_pos. */
70 int first_line; /* Line number at token_pos. */
74 /* A source of tokens, corresponding to a syntax file.
76 This is conceptually a lex_reader wrapped with everything needed to convert
77 its UTF-8 bytes into tokens. */
80 struct ll ll; /* In lexer's list of sources. */
81 struct lex_reader *reader;
83 struct segmenter segmenter;
84 bool eof; /* True if T_STOP was read from 'reader'. */
86 /* Buffer of UTF-8 bytes. */
88 size_t allocated; /* Number of bytes allocated. */
89 size_t tail; /* &buffer[0] offset into UTF-8 source. */
90 size_t head; /* &buffer[head - tail] offset into source. */
92 /* Positions in source file, tail <= pos <= head for each member here. */
93 size_t journal_pos; /* First byte not yet output to journal. */
94 size_t seg_pos; /* First byte not yet scanned as token. */
95 size_t line_pos; /* First byte of line containing seg_pos. */
97 int n_newlines; /* Number of new-lines up to seg_pos. */
98 bool suppress_next_newline;
101 struct deque deque; /* Indexes into 'tokens'. */
102 struct lex_token *tokens; /* Lookahead tokens for parser. */
105 static struct lex_source *lex_source_create (struct lexer *,
106 struct lex_reader *);
107 static void lex_source_destroy (struct lex_source *);
112 struct ll_list sources; /* Contains "struct lex_source"s. */
113 struct macro_set *macros;
116 static struct lex_source *lex_source__ (const struct lexer *);
117 static struct substring lex_source_get_syntax__ (const struct lex_source *,
119 static const struct lex_token *lex_next__ (const struct lexer *, int n);
120 static void lex_source_push_endcmd__ (struct lex_source *);
122 static void lex_source_pop__ (struct lex_source *);
123 static bool lex_source_get (const struct lex_source *);
124 static void lex_source_error_valist (struct lex_source *, int n0, int n1,
125 const char *format, va_list)
126 PRINTF_FORMAT (4, 0);
127 static const struct lex_token *lex_source_next__ (const struct lex_source *,
130 /* Initializes READER with the specified CLASS and otherwise some reasonable
131 defaults. The caller should fill in the others members as desired. */
133 lex_reader_init (struct lex_reader *reader,
134 const struct lex_reader_class *class)
136 reader->class = class;
137 reader->syntax = SEG_MODE_AUTO;
138 reader->error = LEX_ERROR_CONTINUE;
139 reader->file_name = NULL;
140 reader->encoding = NULL;
141 reader->line_number = 0;
145 /* Frees any file name already in READER and replaces it by a copy of
146 FILE_NAME, or if FILE_NAME is null then clears any existing name. */
148 lex_reader_set_file_name (struct lex_reader *reader, const char *file_name)
150 free (reader->file_name);
151 reader->file_name = xstrdup_if_nonnull (file_name);
154 /* Creates and returns a new lexer. */
158 struct lexer *lexer = xmalloc (sizeof *lexer);
159 *lexer = (struct lexer) {
160 .sources = LL_INITIALIZER (lexer->sources),
161 .macros = macro_set_create (),
166 /* Destroys LEXER. */
168 lex_destroy (struct lexer *lexer)
172 struct lex_source *source, *next;
174 ll_for_each_safe (source, next, struct lex_source, ll, &lexer->sources)
175 lex_source_destroy (source);
176 macro_set_destroy (lexer->macros);
181 /* Adds M to LEXER's set of macros. M replaces any existing macro with the
182 same name. Takes ownership of M. */
184 lex_define_macro (struct lexer *lexer, struct macro *m)
186 macro_set_add (lexer->macros, m);
189 /* Inserts READER into LEXER so that the next token read by LEXER comes from
190 READER. Before the caller, LEXER must either be empty or at a T_ENDCMD
193 lex_include (struct lexer *lexer, struct lex_reader *reader)
195 assert (ll_is_empty (&lexer->sources) || lex_token (lexer) == T_ENDCMD);
196 ll_push_head (&lexer->sources, &lex_source_create (lexer, reader)->ll);
199 /* Appends READER to LEXER, so that it will be read after all other current
200 readers have already been read. */
202 lex_append (struct lexer *lexer, struct lex_reader *reader)
204 ll_push_tail (&lexer->sources, &lex_source_create (lexer, reader)->ll);
209 static struct lex_token *
210 lex_push_token__ (struct lex_source *src)
212 struct lex_token *token;
214 if (deque_is_full (&src->deque))
215 src->tokens = deque_expand (&src->deque, src->tokens, sizeof *src->tokens);
217 token = &src->tokens[deque_push_front (&src->deque)];
218 token->token = (struct token) { .type = T_STOP };
219 token->from_macro = false;
224 lex_source_pop__ (struct lex_source *src)
226 token_uninit (&src->tokens[deque_pop_back (&src->deque)].token);
230 lex_source_pop_front (struct lex_source *src)
232 token_uninit (&src->tokens[deque_pop_front (&src->deque)].token);
235 /* Advances LEXER to the next token, consuming the current token. */
237 lex_get (struct lexer *lexer)
239 struct lex_source *src;
241 src = lex_source__ (lexer);
245 if (!deque_is_empty (&src->deque))
246 lex_source_pop__ (src);
248 while (deque_is_empty (&src->deque))
249 if (!lex_source_get (src))
251 lex_source_destroy (src);
252 src = lex_source__ (lexer);
258 /* Issuing errors. */
260 /* Prints a syntax error message containing the current token and
261 given message MESSAGE (if non-null). */
263 lex_error (struct lexer *lexer, const char *format, ...)
267 va_start (args, format);
268 lex_next_error_valist (lexer, 0, 0, format, args);
272 /* Prints a syntax error message containing the current token and
273 given message MESSAGE (if non-null). */
275 lex_error_valist (struct lexer *lexer, const char *format, va_list args)
277 lex_next_error_valist (lexer, 0, 0, format, args);
280 /* Prints a syntax error message containing the current token and
281 given message MESSAGE (if non-null). */
283 lex_next_error (struct lexer *lexer, int n0, int n1, const char *format, ...)
287 va_start (args, format);
288 lex_next_error_valist (lexer, n0, n1, format, args);
292 /* Prints a syntax error message saying that one of the strings provided as
293 varargs, up to the first NULL, is expected. */
295 (lex_error_expecting) (struct lexer *lexer, ...)
299 va_start (args, lexer);
300 lex_error_expecting_valist (lexer, args);
304 /* Prints a syntax error message saying that one of the options provided in
305 ARGS, up to the first NULL, is expected. */
307 lex_error_expecting_valist (struct lexer *lexer, va_list args)
309 enum { MAX_OPTIONS = 9 };
310 const char *options[MAX_OPTIONS];
312 while (n < MAX_OPTIONS)
314 const char *option = va_arg (args, const char *);
318 options[n++] = option;
320 lex_error_expecting_array (lexer, options, n);
324 lex_error_expecting_array (struct lexer *lexer, const char **options, size_t n)
329 lex_error (lexer, NULL);
333 lex_error (lexer, _("expecting %s"), options[0]);
337 lex_error (lexer, _("expecting %s or %s"), options[0], options[1]);
341 lex_error (lexer, _("expecting %s, %s, or %s"), options[0], options[1],
346 lex_error (lexer, _("expecting %s, %s, %s, or %s"),
347 options[0], options[1], options[2], options[3]);
351 lex_error (lexer, _("expecting %s, %s, %s, %s, or %s"),
352 options[0], options[1], options[2], options[3], options[4]);
356 lex_error (lexer, _("expecting %s, %s, %s, %s, %s, or %s"),
357 options[0], options[1], options[2], options[3], options[4],
362 lex_error (lexer, _("expecting %s, %s, %s, %s, %s, %s, or %s"),
363 options[0], options[1], options[2], options[3], options[4],
364 options[5], options[6]);
368 lex_error (lexer, _("expecting %s, %s, %s, %s, %s, %s, %s, or %s"),
369 options[0], options[1], options[2], options[3], options[4],
370 options[5], options[6], options[7]);
374 lex_error (lexer, NULL);
378 /* Reports an error to the effect that subcommand SBC may only be specified
381 This function does not take a lexer as an argument or use lex_error(),
382 because the result would ordinarily just be redundant: "Syntax error at
383 SUBCOMMAND: Subcommand SUBCOMMAND may only be specified once.", which does
384 not help the user find the error. */
386 lex_sbc_only_once (const char *sbc)
388 msg (SE, _("Subcommand %s may only be specified once."), sbc);
391 /* Reports an error to the effect that subcommand SBC is missing.
393 This function does not take a lexer as an argument or use lex_error(),
394 because a missing subcommand can normally be detected only after the whole
395 command has been parsed, and so lex_error() would always report "Syntax
396 error at end of command", which does not help the user find the error. */
398 lex_sbc_missing (const char *sbc)
400 msg (SE, _("Required subcommand %s was not specified."), sbc);
403 /* Reports an error to the effect that specification SPEC may only be specified
404 once within subcommand SBC. */
406 lex_spec_only_once (struct lexer *lexer, const char *sbc, const char *spec)
408 lex_error (lexer, _("%s may only be specified once within subcommand %s"),
412 /* Reports an error to the effect that specification SPEC is missing within
415 lex_spec_missing (struct lexer *lexer, const char *sbc, const char *spec)
417 lex_error (lexer, _("Required %s specification missing from %s subcommand"),
421 /* Prints a syntax error message containing the current token and
422 given message MESSAGE (if non-null). */
424 lex_next_error_valist (struct lexer *lexer, int n0, int n1,
425 const char *format, va_list args)
427 struct lex_source *src = lex_source__ (lexer);
430 lex_source_error_valist (src, n0, n1, format, args);
436 ds_put_format (&s, _("Syntax error at end of input"));
439 ds_put_cstr (&s, ": ");
440 ds_put_vformat (&s, format, args);
442 ds_put_byte (&s, '.');
443 msg (SE, "%s", ds_cstr (&s));
448 /* Checks that we're at end of command.
449 If so, returns a successful command completion code.
450 If not, flags a syntax error and returns an error command
453 lex_end_of_command (struct lexer *lexer)
455 if (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_STOP)
457 lex_error (lexer, _("expecting end of command"));
464 /* Token testing functions. */
466 /* Returns true if the current token is a number. */
468 lex_is_number (const struct lexer *lexer)
470 return lex_next_is_number (lexer, 0);
473 /* Returns true if the current token is a string. */
475 lex_is_string (const struct lexer *lexer)
477 return lex_next_is_string (lexer, 0);
480 /* Returns the value of the current token, which must be a
481 floating point number. */
483 lex_number (const struct lexer *lexer)
485 return lex_next_number (lexer, 0);
488 /* Returns true iff the current token is an integer. */
490 lex_is_integer (const struct lexer *lexer)
492 return lex_next_is_integer (lexer, 0);
495 /* Returns the value of the current token, which must be an
498 lex_integer (const struct lexer *lexer)
500 return lex_next_integer (lexer, 0);
503 /* Token testing functions with lookahead.
505 A value of 0 for N as an argument to any of these functions refers to the
506 current token. Lookahead is limited to the current command. Any N greater
507 than the number of tokens remaining in the current command will be treated
508 as referring to a T_ENDCMD token. */
510 /* Returns true if the token N ahead of the current token is a number. */
512 lex_next_is_number (const struct lexer *lexer, int n)
514 enum token_type next_token = lex_next_token (lexer, n);
515 return next_token == T_POS_NUM || next_token == T_NEG_NUM;
518 /* Returns true if the token N ahead of the current token is a string. */
520 lex_next_is_string (const struct lexer *lexer, int n)
522 return lex_next_token (lexer, n) == T_STRING;
525 /* Returns the value of the token N ahead of the current token, which must be a
526 floating point number. */
528 lex_next_number (const struct lexer *lexer, int n)
530 assert (lex_next_is_number (lexer, n));
531 return lex_next_tokval (lexer, n);
534 /* Returns true if the token N ahead of the current token is an integer. */
536 lex_next_is_integer (const struct lexer *lexer, int n)
540 if (!lex_next_is_number (lexer, n))
543 value = lex_next_tokval (lexer, n);
544 return value > LONG_MIN && value <= LONG_MAX && floor (value) == value;
547 /* Returns the value of the token N ahead of the current token, which must be
550 lex_next_integer (const struct lexer *lexer, int n)
552 assert (lex_next_is_integer (lexer, n));
553 return lex_next_tokval (lexer, n);
556 /* Token matching functions. */
558 /* If the current token has the specified TYPE, skips it and returns true.
559 Otherwise, returns false. */
561 lex_match (struct lexer *lexer, enum token_type type)
563 if (lex_token (lexer) == type)
572 /* If the current token matches IDENTIFIER, skips it and returns true.
573 IDENTIFIER may be abbreviated to its first three letters. Otherwise,
576 IDENTIFIER must be an ASCII string. */
578 lex_match_id (struct lexer *lexer, const char *identifier)
580 return lex_match_id_n (lexer, identifier, 3);
583 /* If the current token is IDENTIFIER, skips it and returns true. IDENTIFIER
584 may be abbreviated to its first N letters. Otherwise, returns false.
586 IDENTIFIER must be an ASCII string. */
588 lex_match_id_n (struct lexer *lexer, const char *identifier, size_t n)
590 if (lex_token (lexer) == T_ID
591 && lex_id_match_n (ss_cstr (identifier), lex_tokss (lexer), n))
600 /* If the current token is integer X, skips it and returns true. Otherwise,
603 lex_match_int (struct lexer *lexer, int x)
605 if (lex_is_integer (lexer) && lex_integer (lexer) == x)
614 /* Forced matches. */
616 /* If this token is IDENTIFIER, skips it and returns true. IDENTIFIER may be
617 abbreviated to its first 3 letters. Otherwise, reports an error and returns
620 IDENTIFIER must be an ASCII string. */
622 lex_force_match_id (struct lexer *lexer, const char *identifier)
624 if (lex_match_id (lexer, identifier))
628 lex_error_expecting (lexer, identifier);
633 /* If the current token has the specified TYPE, skips it and returns true.
634 Otherwise, reports an error and returns false. */
636 lex_force_match (struct lexer *lexer, enum token_type type)
638 if (lex_token (lexer) == type)
645 const char *type_string = token_type_to_string (type);
648 char *s = xasprintf ("`%s'", type_string);
649 lex_error_expecting (lexer, s);
653 lex_error_expecting (lexer, token_type_to_name (type));
659 /* If the current token is a string, does nothing and returns true.
660 Otherwise, reports an error and returns false. */
662 lex_force_string (struct lexer *lexer)
664 if (lex_is_string (lexer))
668 lex_error (lexer, _("expecting string"));
673 /* If the current token is a string or an identifier, does nothing and returns
674 true. Otherwise, reports an error and returns false.
676 This is meant for use in syntactic situations where we want to encourage the
677 user to supply a quoted string, but for compatibility we also accept
678 identifiers. (One example of such a situation is file names.) Therefore,
679 the error message issued when the current token is wrong only says that a
680 string is expected and doesn't mention that an identifier would also be
683 lex_force_string_or_id (struct lexer *lexer)
685 return lex_token (lexer) == T_ID || lex_force_string (lexer);
688 /* If the current token is an integer, does nothing and returns true.
689 Otherwise, reports an error and returns false. */
691 lex_force_int (struct lexer *lexer)
693 if (lex_is_integer (lexer))
697 lex_error (lexer, _("expecting integer"));
702 /* If the current token is an integer in the range MIN...MAX (inclusive), does
703 nothing and returns true. Otherwise, reports an error and returns false.
704 If NAME is nonnull, then it is used in the error message. */
706 lex_force_int_range (struct lexer *lexer, const char *name, long min, long max)
708 bool is_integer = lex_is_integer (lexer);
709 bool too_small = is_integer && lex_integer (lexer) < min;
710 bool too_big = is_integer && lex_integer (lexer) > max;
711 if (is_integer && !too_small && !too_big)
716 /* Weird, maybe a bug in the caller. Just report that we needed an
719 lex_error (lexer, _("Integer expected for %s."), name);
721 lex_error (lexer, _("Integer expected."));
726 lex_error (lexer, _("Expected %ld for %s."), min, name);
728 lex_error (lexer, _("Expected %ld."), min);
730 else if (min + 1 == max)
733 lex_error (lexer, _("Expected %ld or %ld for %s."), min, min + 1, name);
735 lex_error (lexer, _("Expected %ld or %ld."), min, min + 1);
739 bool report_lower_bound = (min > INT_MIN / 2) || too_small;
740 bool report_upper_bound = (max < INT_MAX / 2) || too_big;
742 if (report_lower_bound && report_upper_bound)
746 _("Expected integer between %ld and %ld for %s."),
749 lex_error (lexer, _("Expected integer between %ld and %ld."),
752 else if (report_lower_bound)
757 lex_error (lexer, _("Expected non-negative integer for %s."),
760 lex_error (lexer, _("Expected non-negative integer."));
765 lex_error (lexer, _("Expected positive integer for %s."),
768 lex_error (lexer, _("Expected positive integer."));
771 else if (report_upper_bound)
775 _("Expected integer less than or equal to %ld for %s."),
778 lex_error (lexer, _("Expected integer less than or equal to %ld."),
784 lex_error (lexer, _("Integer expected for %s."), name);
786 lex_error (lexer, _("Integer expected."));
792 /* If the current token is a number, does nothing and returns true.
793 Otherwise, reports an error and returns false. */
795 lex_force_num (struct lexer *lexer)
797 if (lex_is_number (lexer))
800 lex_error (lexer, _("expecting number"));
804 /* If the current token is an identifier, does nothing and returns true.
805 Otherwise, reports an error and returns false. */
807 lex_force_id (struct lexer *lexer)
809 if (lex_token (lexer) == T_ID)
812 lex_error (lexer, _("expecting identifier"));
816 /* Token accessors. */
818 /* Returns the type of LEXER's current token. */
820 lex_token (const struct lexer *lexer)
822 return lex_next_token (lexer, 0);
825 /* Returns the number in LEXER's current token.
827 Only T_NEG_NUM and T_POS_NUM tokens have meaningful values. For other
828 tokens this function will always return zero. */
830 lex_tokval (const struct lexer *lexer)
832 return lex_next_tokval (lexer, 0);
835 /* Returns the null-terminated string in LEXER's current token, UTF-8 encoded.
837 Only T_ID and T_STRING tokens have meaningful strings. For other tokens
838 this functions this function will always return NULL.
840 The UTF-8 encoding of the returned string is correct for variable names and
841 other identifiers. Use filename_to_utf8() to use it as a filename. Use
842 data_in() to use it in a "union value". */
844 lex_tokcstr (const struct lexer *lexer)
846 return lex_next_tokcstr (lexer, 0);
849 /* Returns the string in LEXER's current token, UTF-8 encoded. The string is
850 null-terminated (but the null terminator is not included in the returned
851 substring's 'length').
853 Only T_ID and T_STRING tokens have meaningful strings. For other tokens
854 this functions this function will always return NULL.
856 The UTF-8 encoding of the returned string is correct for variable names and
857 other identifiers. Use filename_to_utf8() to use it as a filename. Use
858 data_in() to use it in a "union value". */
860 lex_tokss (const struct lexer *lexer)
862 return lex_next_tokss (lexer, 0);
867 A value of 0 for N as an argument to any of these functions refers to the
868 current token. Lookahead is limited to the current command. Any N greater
869 than the number of tokens remaining in the current command will be treated
870 as referring to a T_ENDCMD token. */
872 static const struct lex_token *
873 lex_next__ (const struct lexer *lexer_, int n)
875 struct lexer *lexer = CONST_CAST (struct lexer *, lexer_);
876 struct lex_source *src = lex_source__ (lexer);
879 return lex_source_next__ (src, n);
882 static const struct lex_token stop_token = { .token = { .type = T_STOP } };
887 static const struct lex_token *
888 lex_source_front (const struct lex_source *src)
890 return &src->tokens[deque_front (&src->deque, 0)];
893 static const struct lex_token *
894 lex_source_next__ (const struct lex_source *src, int n)
896 while (deque_count (&src->deque) <= n)
898 if (!deque_is_empty (&src->deque))
900 const struct lex_token *front = lex_source_front (src);
901 if (front->token.type == T_STOP || front->token.type == T_ENDCMD)
905 lex_source_get (src);
908 return &src->tokens[deque_back (&src->deque, n)];
911 /* Returns the "struct token" of the token N after the current one in LEXER.
912 The returned pointer can be invalidated by pretty much any succeeding call
913 into the lexer, although the string pointer within the returned token is
914 only invalidated by consuming the token (e.g. with lex_get()). */
916 lex_next (const struct lexer *lexer, int n)
918 return &lex_next__ (lexer, n)->token;
921 /* Returns the type of the token N after the current one in LEXER. */
923 lex_next_token (const struct lexer *lexer, int n)
925 return lex_next (lexer, n)->type;
928 /* Returns the number in the tokn N after the current one in LEXER.
930 Only T_NEG_NUM and T_POS_NUM tokens have meaningful values. For other
931 tokens this function will always return zero. */
933 lex_next_tokval (const struct lexer *lexer, int n)
935 const struct token *token = lex_next (lexer, n);
936 return token->number;
939 /* Returns the null-terminated string in the token N after the current one, in
942 Only T_ID and T_STRING tokens have meaningful strings. For other tokens
943 this functions this function will always return NULL.
945 The UTF-8 encoding of the returned string is correct for variable names and
946 other identifiers. Use filename_to_utf8() to use it as a filename. Use
947 data_in() to use it in a "union value". */
949 lex_next_tokcstr (const struct lexer *lexer, int n)
951 return lex_next_tokss (lexer, n).string;
954 /* Returns the string in the token N after the current one, in UTF-8 encoding.
955 The string is null-terminated (but the null terminator is not included in
956 the returned substring's 'length').
958 Only T_ID, T_MACRO_ID, T_STRING tokens have meaningful strings. For other
959 tokens this functions this function will always return NULL.
961 The UTF-8 encoding of the returned string is correct for variable names and
962 other identifiers. Use filename_to_utf8() to use it as a filename. Use
963 data_in() to use it in a "union value". */
965 lex_next_tokss (const struct lexer *lexer, int n)
967 return lex_next (lexer, n)->string;
971 lex_next_representation (const struct lexer *lexer, int n0, int n1)
973 return lex_source_get_syntax__ (lex_source__ (lexer), n0, n1);
977 lex_next_is_from_macro (const struct lexer *lexer, int n)
979 return lex_next__ (lexer, n)->from_macro;
983 lex_tokens_match (const struct token *actual, const struct token *expected)
985 if (actual->type != expected->type)
988 switch (actual->type)
992 return actual->number == expected->number;
995 return lex_id_match (expected->string, actual->string);
998 return (actual->string.length == expected->string.length
999 && !memcmp (actual->string.string, expected->string.string,
1000 actual->string.length));
1007 /* If LEXER is positioned at the sequence of tokens that may be parsed from S,
1008 skips it and returns true. Otherwise, returns false.
1010 S may consist of an arbitrary sequence of tokens, e.g. "KRUSKAL-WALLIS",
1011 "2SLS", or "END INPUT PROGRAM". Identifiers may be abbreviated to their
1012 first three letters. */
1014 lex_match_phrase (struct lexer *lexer, const char *s)
1016 struct string_lexer slex;
1021 string_lexer_init (&slex, s, strlen (s), SEG_MODE_INTERACTIVE);
1022 while (string_lexer_next (&slex, &token))
1023 if (token.type != SCAN_SKIP)
1025 bool match = lex_tokens_match (lex_next (lexer, i++), &token);
1026 token_uninit (&token);
1037 lex_source_get_first_line_number (const struct lex_source *src, int n)
1039 return lex_source_next__ (src, n)->first_line;
1043 count_newlines (char *s, size_t length)
1048 while ((newline = memchr (s, '\n', length)) != NULL)
1051 length -= (newline + 1) - s;
1059 lex_source_get_last_line_number (const struct lex_source *src, int n)
1061 const struct lex_token *token = lex_source_next__ (src, n);
1063 if (token->first_line == 0)
1067 char *token_str = &src->buffer[token->token_pos - src->tail];
1068 return token->first_line + count_newlines (token_str, token->token_len) + 1;
1073 count_columns (const char *s_, size_t length)
1075 const uint8_t *s = CHAR_CAST (const uint8_t *, s_);
1081 for (ofs = 0; ofs < length; ofs += mblen)
1085 mblen = u8_mbtouc (&uc, s + ofs, length - ofs);
1088 int width = uc_width (uc, "UTF-8");
1093 columns = ROUND_UP (columns + 1, 8);
1100 lex_source_get_first_column (const struct lex_source *src, int n)
1102 const struct lex_token *token = lex_source_next__ (src, n);
1103 return count_columns (&src->buffer[token->line_pos - src->tail],
1104 token->token_pos - token->line_pos);
1108 lex_source_get_last_column (const struct lex_source *src, int n)
1110 const struct lex_token *token = lex_source_next__ (src, n);
1111 char *start, *end, *newline;
1113 start = &src->buffer[token->line_pos - src->tail];
1114 end = &src->buffer[(token->token_pos + token->token_len) - src->tail];
1115 newline = memrchr (start, '\n', end - start);
1116 if (newline != NULL)
1117 start = newline + 1;
1118 return count_columns (start, end - start);
1121 /* Returns the 1-based line number of the start of the syntax that represents
1122 the token N after the current one in LEXER. Returns 0 for a T_STOP token or
1123 if the token is drawn from a source that does not have line numbers. */
1125 lex_get_first_line_number (const struct lexer *lexer, int n)
1127 const struct lex_source *src = lex_source__ (lexer);
1128 return src != NULL ? lex_source_get_first_line_number (src, n) : 0;
1131 /* Returns the 1-based line number of the end of the syntax that represents the
1132 token N after the current one in LEXER, plus 1. Returns 0 for a T_STOP
1133 token or if the token is drawn from a source that does not have line
1136 Most of the time, a single token is wholly within a single line of syntax,
1137 but there are two exceptions: a T_STRING token can be made up of multiple
1138 segments on adjacent lines connected with "+" punctuators, and a T_NEG_NUM
1139 token can consist of a "-" on one line followed by the number on the next.
1142 lex_get_last_line_number (const struct lexer *lexer, int n)
1144 const struct lex_source *src = lex_source__ (lexer);
1145 return src != NULL ? lex_source_get_last_line_number (src, n) : 0;
1148 /* Returns the 1-based column number of the start of the syntax that represents
1149 the token N after the current one in LEXER. Returns 0 for a T_STOP
1152 Column numbers are measured according to the width of characters as shown in
1153 a typical fixed-width font, in which CJK characters have width 2 and
1154 combining characters have width 0. */
1156 lex_get_first_column (const struct lexer *lexer, int n)
1158 const struct lex_source *src = lex_source__ (lexer);
1159 return src != NULL ? lex_source_get_first_column (src, n) : 0;
1162 /* Returns the 1-based column number of the end of the syntax that represents
1163 the token N after the current one in LEXER, plus 1. Returns 0 for a T_STOP
1166 Column numbers are measured according to the width of characters as shown in
1167 a typical fixed-width font, in which CJK characters have width 2 and
1168 combining characters have width 0. */
1170 lex_get_last_column (const struct lexer *lexer, int n)
1172 const struct lex_source *src = lex_source__ (lexer);
1173 return src != NULL ? lex_source_get_last_column (src, n) : 0;
1176 /* Returns the name of the syntax file from which the current command is drawn.
1177 Returns NULL for a T_STOP token or if the command's source does not have
1180 There is no version of this function that takes an N argument because
1181 lookahead only works to the end of a command and any given command is always
1182 within a single syntax file. */
1184 lex_get_file_name (const struct lexer *lexer)
1186 struct lex_source *src = lex_source__ (lexer);
1187 return src == NULL ? NULL : src->reader->file_name;
1191 lex_get_encoding (const struct lexer *lexer)
1193 struct lex_source *src = lex_source__ (lexer);
1194 return src == NULL ? NULL : src->reader->encoding;
1197 /* Returns the syntax mode for the syntax file from which the current drawn is
1198 drawn. Returns SEG_MODE_AUTO for a T_STOP token or if the command's source
1199 does not have line numbers.
1201 There is no version of this function that takes an N argument because
1202 lookahead only works to the end of a command and any given command is always
1203 within a single syntax file. */
1205 lex_get_syntax_mode (const struct lexer *lexer)
1207 struct lex_source *src = lex_source__ (lexer);
1208 return src == NULL ? SEG_MODE_AUTO : src->reader->syntax;
1211 /* Returns the error mode for the syntax file from which the current drawn is
1212 drawn. Returns LEX_ERROR_TERMINAL for a T_STOP token or if the command's
1213 source does not have line numbers.
1215 There is no version of this function that takes an N argument because
1216 lookahead only works to the end of a command and any given command is always
1217 within a single syntax file. */
1219 lex_get_error_mode (const struct lexer *lexer)
1221 struct lex_source *src = lex_source__ (lexer);
1222 return src == NULL ? LEX_ERROR_TERMINAL : src->reader->error;
1225 /* If the source that LEXER is currently reading has error mode
1226 LEX_ERROR_TERMINAL, discards all buffered input and tokens, so that the next
1227 token to be read comes directly from whatever is next read from the stream.
1229 It makes sense to call this function after encountering an error in a
1230 command entered on the console, because usually the user would prefer not to
1231 have cascading errors. */
1233 lex_interactive_reset (struct lexer *lexer)
1235 struct lex_source *src = lex_source__ (lexer);
1236 if (src != NULL && src->reader->error == LEX_ERROR_TERMINAL)
1238 src->head = src->tail = 0;
1239 src->journal_pos = src->seg_pos = src->line_pos = 0;
1240 src->n_newlines = 0;
1241 src->suppress_next_newline = false;
1242 segmenter_init (&src->segmenter, segmenter_get_mode (&src->segmenter));
1243 while (!deque_is_empty (&src->deque))
1244 lex_source_pop__ (src);
1245 lex_source_push_endcmd__ (src);
1249 /* Advances past any tokens in LEXER up to a T_ENDCMD or T_STOP. */
1251 lex_discard_rest_of_command (struct lexer *lexer)
1253 while (lex_token (lexer) != T_STOP && lex_token (lexer) != T_ENDCMD)
1257 /* Discards all lookahead tokens in LEXER, then discards all input sources
1258 until it encounters one with error mode LEX_ERROR_TERMINAL or until it
1259 runs out of input sources. */
1261 lex_discard_noninteractive (struct lexer *lexer)
1263 struct lex_source *src = lex_source__ (lexer);
1267 while (!deque_is_empty (&src->deque))
1268 lex_source_pop__ (src);
1270 for (; src != NULL && src->reader->error != LEX_ERROR_TERMINAL;
1271 src = lex_source__ (lexer))
1272 lex_source_destroy (src);
1277 lex_source_max_tail__ (const struct lex_source *src)
1279 const struct lex_token *token;
1282 assert (src->seg_pos >= src->line_pos);
1283 max_tail = MIN (src->journal_pos, src->line_pos);
1285 /* Use the oldest token also. (We know that src->deque cannot be empty
1286 because we are in the process of adding a new token, which is already
1287 initialized enough to use here.) */
1288 token = &src->tokens[deque_back (&src->deque, 0)];
1289 assert (token->token_pos >= token->line_pos);
1290 max_tail = MIN (max_tail, token->line_pos);
1296 lex_source_expand__ (struct lex_source *src)
1298 if (src->head - src->tail >= src->allocated)
1300 size_t max_tail = lex_source_max_tail__ (src);
1301 if (max_tail > src->tail)
1303 /* Advance the tail, freeing up room at the head. */
1304 memmove (src->buffer, src->buffer + (max_tail - src->tail),
1305 src->head - max_tail);
1306 src->tail = max_tail;
1310 /* Buffer is completely full. Expand it. */
1311 src->buffer = x2realloc (src->buffer, &src->allocated);
1316 /* There's space available at the head of the buffer. Nothing to do. */
1321 lex_source_read__ (struct lex_source *src)
1325 lex_source_expand__ (src);
1327 size_t head_ofs = src->head - src->tail;
1328 size_t space = src->allocated - head_ofs;
1329 enum prompt_style prompt = segmenter_get_prompt (&src->segmenter);
1330 size_t n = src->reader->class->read (src->reader, &src->buffer[head_ofs],
1332 assert (n <= space);
1337 src->reader->eof = true;
1338 lex_source_expand__ (src);
1344 while (!memchr (&src->buffer[src->seg_pos - src->tail], '\n',
1345 src->head - src->seg_pos));
1348 static struct lex_source *
1349 lex_source__ (const struct lexer *lexer)
1351 return (ll_is_empty (&lexer->sources) ? NULL
1352 : ll_data (ll_head (&lexer->sources), struct lex_source, ll));
1355 static struct substring
1356 lex_tokens_get_syntax__ (const struct lex_source *src,
1357 const struct lex_token *token0,
1358 const struct lex_token *token1)
1360 size_t start = token0->token_pos;
1361 size_t end = token1->token_pos + token1->token_len;
1363 return ss_buffer (&src->buffer[start - src->tail], end - start);
1366 static struct substring
1367 lex_source_get_syntax__ (const struct lex_source *src, int n0, int n1)
1369 return lex_tokens_get_syntax__ (src,
1370 lex_source_next__ (src, n0),
1371 lex_source_next__ (src, MAX (n0, n1)));
1375 lex_ellipsize__ (struct substring in, char *out, size_t out_size)
1381 assert (out_size >= 16);
1382 out_maxlen = out_size - 1;
1383 if (in.length > out_maxlen - 3)
1386 for (out_len = 0; out_len < in.length; out_len += mblen)
1388 if (in.string[out_len] == '\n'
1389 || in.string[out_len] == '\0'
1390 || (in.string[out_len] == '\r'
1391 && out_len + 1 < in.length
1392 && in.string[out_len + 1] == '\n'))
1395 mblen = u8_mblen (CHAR_CAST (const uint8_t *, in.string + out_len),
1396 in.length - out_len);
1401 if (out_len + mblen > out_maxlen)
1405 memcpy (out, in.string, out_len);
1406 strcpy (&out[out_len], out_len < in.length ? "..." : "");
1410 lex_source_error_valist (struct lex_source *src, int n0, int n1,
1411 const char *format, va_list args)
1413 const struct lex_token *token;
1418 token = lex_source_next__ (src, n0);
1419 if (token->token.type == T_ENDCMD)
1420 ds_put_cstr (&s, _("Syntax error at end of command"));
1421 else if (token->from_macro)
1423 /* XXX this isn't ideal, we should get the actual syntax */
1424 char *syntax = token_to_string (&token->token);
1426 ds_put_format (&s, _("Syntax error at `%s'"), syntax);
1428 ds_put_cstr (&s, _("Syntax error"));
1433 struct substring syntax = lex_source_get_syntax__ (src, n0, n1);
1434 if (!ss_is_empty (syntax))
1436 char syntax_cstr[64];
1438 lex_ellipsize__ (syntax, syntax_cstr, sizeof syntax_cstr);
1439 ds_put_format (&s, _("Syntax error at `%s'"), syntax_cstr);
1442 ds_put_cstr (&s, _("Syntax error"));
1447 ds_put_cstr (&s, ": ");
1448 ds_put_vformat (&s, format, args);
1450 if (ds_last (&s) != '.')
1451 ds_put_byte (&s, '.');
1454 .category = MSG_C_SYNTAX,
1455 .severity = MSG_S_ERROR,
1456 .file_name = src->reader->file_name,
1457 .first_line = lex_source_get_first_line_number (src, n0),
1458 .last_line = lex_source_get_last_line_number (src, n1),
1459 .first_column = lex_source_get_first_column (src, n0),
1460 .last_column = lex_source_get_last_column (src, n1),
1461 .text = ds_steal_cstr (&s),
1466 static void PRINTF_FORMAT (2, 3)
1467 lex_get_error (struct lex_source *src, const char *format, ...)
1472 va_start (args, format);
1474 n = deque_count (&src->deque) - 1;
1475 lex_source_error_valist (src, n, n, format, args);
1476 lex_source_pop_front (src);
1481 /* Attempts to append an additional token into SRC's deque, reading more from
1482 the underlying lex_reader if necessary. Returns true if a new token was
1483 added to SRC's deque, false otherwise. */
1485 lex_source_try_get (struct lex_source *src)
1487 /* State maintained while scanning tokens. Usually we only need a single
1488 state, but scanner_push() can return SCAN_SAVE to indicate that the state
1489 needs to be saved and possibly restored later with SCAN_BACK. */
1492 struct segmenter segmenter;
1493 enum segment_type last_segment;
1494 int newlines; /* Number of newlines encountered so far. */
1495 /* Maintained here so we can update lex_source's similar members when we
1501 /* Initialize state. */
1502 struct state state =
1504 .segmenter = src->segmenter,
1506 .seg_pos = src->seg_pos,
1507 .line_pos = src->line_pos,
1509 struct state saved = state;
1511 /* Append a new token to SRC and initialize it. */
1512 struct lex_token *token = lex_push_token__ (src);
1513 struct scanner scanner;
1514 scanner_init (&scanner, &token->token);
1515 token->line_pos = src->line_pos;
1516 token->token_pos = src->seg_pos;
1517 if (src->reader->line_number > 0)
1518 token->first_line = src->reader->line_number + src->n_newlines;
1520 token->first_line = 0;
1522 /* Extract segments and pass them through the scanner until we obtain a
1526 /* Extract a segment. */
1527 const char *segment = &src->buffer[state.seg_pos - src->tail];
1528 size_t seg_maxlen = src->head - state.seg_pos;
1529 enum segment_type type;
1530 int seg_len = segmenter_push (&state.segmenter, segment, seg_maxlen,
1531 src->reader->eof, &type);
1534 /* The segmenter needs more input to produce a segment. */
1535 assert (!src->reader->eof);
1536 lex_source_read__ (src);
1540 /* Update state based on the segment. */
1541 state.last_segment = type;
1542 state.seg_pos += seg_len;
1543 if (type == SEG_NEWLINE)
1546 state.line_pos = state.seg_pos;
1549 /* Pass the segment into the scanner and try to get a token out. */
1550 enum scan_result result = scanner_push (&scanner, type,
1551 ss_buffer (segment, seg_len),
1553 if (result == SCAN_SAVE)
1555 else if (result == SCAN_BACK)
1560 else if (result == SCAN_DONE)
1564 /* If we've reached the end of a line, or the end of a command, then pass
1565 the line to the output engine as a syntax text item. */
1566 int n_lines = state.newlines;
1567 if (state.last_segment == SEG_END_COMMAND && !src->suppress_next_newline)
1570 src->suppress_next_newline = true;
1572 else if (n_lines > 0 && src->suppress_next_newline)
1575 src->suppress_next_newline = false;
1577 for (int i = 0; i < n_lines; i++)
1579 /* Beginning of line. */
1580 const char *line = &src->buffer[src->journal_pos - src->tail];
1582 /* Calculate line length, including \n or \r\n end-of-line if present.
1584 We use src->head even though that may be beyond what we've actually
1585 converted to tokens (which is only through state.line_pos). That's
1586 because, if we're emitting the line due to SEG_END_COMMAND, we want to
1587 take the whole line through the newline, not just through the '.'. */
1588 size_t max_len = src->head - src->journal_pos;
1589 const char *newline = memchr (line, '\n', max_len);
1590 size_t line_len = newline ? newline - line + 1 : max_len;
1592 /* Calculate line length excluding end-of-line. */
1593 size_t copy_len = line_len;
1594 if (copy_len > 0 && line[copy_len - 1] == '\n')
1596 if (copy_len > 0 && line[copy_len - 1] == '\r')
1599 /* Submit the line as syntax. */
1600 output_item_submit (text_item_create_nocopy (TEXT_ITEM_SYNTAX,
1601 xmemdup0 (line, copy_len),
1604 src->journal_pos += line_len;
1607 token->token_len = state.seg_pos - src->seg_pos;
1609 src->segmenter = state.segmenter;
1610 src->seg_pos = state.seg_pos;
1611 src->line_pos = state.line_pos;
1612 src->n_newlines += state.newlines;
1614 switch (token->token.type)
1620 token->token.type = T_ENDCMD;
1624 case SCAN_BAD_HEX_LENGTH:
1625 lex_get_error (src, _("String of hex digits has %d characters, which "
1626 "is not a multiple of 2"),
1627 (int) token->token.number);
1630 case SCAN_BAD_HEX_DIGIT:
1631 case SCAN_BAD_UNICODE_DIGIT:
1632 lex_get_error (src, _("`%c' is not a valid hex digit"),
1633 (int) token->token.number);
1636 case SCAN_BAD_UNICODE_LENGTH:
1637 lex_get_error (src, _("Unicode string contains %d bytes, which is "
1638 "not in the valid range of 1 to 8 bytes"),
1639 (int) token->token.number);
1642 case SCAN_BAD_UNICODE_CODE_POINT:
1643 lex_get_error (src, _("U+%04X is not a valid Unicode code point"),
1644 (int) token->token.number);
1647 case SCAN_EXPECTED_QUOTE:
1648 lex_get_error (src, _("Unterminated string constant"));
1651 case SCAN_EXPECTED_EXPONENT:
1652 lex_get_error (src, _("Missing exponent following `%s'"),
1653 token->token.string.string);
1656 case SCAN_UNEXPECTED_CHAR:
1659 lex_get_error (src, _("Bad character %s in input"),
1660 uc_name (token->token.number, c_name));
1665 lex_source_pop_front (src);
1673 lex_source_get__ (struct lex_source *src)
1679 else if (lex_source_try_get (src))
1685 lex_source_get (const struct lex_source *src_)
1687 struct lex_source *src = CONST_CAST (struct lex_source *, src_);
1689 size_t old_count = deque_count (&src->deque);
1690 if (!lex_source_get__ (src))
1693 if (!settings_get_mexpand ())
1696 struct macro_expander *me;
1697 int retval = macro_expander_create (src->lexer->macros,
1698 &lex_source_front (src)->token,
1702 if (!lex_source_get__ (src))
1704 /* This should not be reachable because we always get a T_ENDCMD at
1705 the end of an input file (transformed from T_STOP by
1706 lex_source_try_get()) and the macro_expander should always
1707 terminate expansion on T_ENDCMD. */
1711 const struct lex_token *front = lex_source_front (src);
1712 const struct macro_token mt = {
1713 .token = front->token,
1714 .representation = lex_tokens_get_syntax__ (src, front, front)
1716 retval = macro_expander_add (me, &mt);
1720 /* XXX handle case where there's a macro invocation starting from some
1721 later token we've already obtained */
1722 macro_expander_destroy (me);
1726 /* XXX handle case where the macro invocation doesn't use all the tokens */
1727 while (deque_count (&src->deque) > old_count)
1728 lex_source_pop_front (src);
1730 struct macro_tokens expansion = { .n = 0 };
1731 macro_expander_get_expansion (me, &expansion);
1732 macro_expander_destroy (me);
1734 if (settings_get_mprint ())
1736 struct string mprint = DS_EMPTY_INITIALIZER;
1737 macro_tokens_to_representation (&expansion, &mprint);
1738 output_item_submit (text_item_create (TEXT_ITEM_LOG, ds_cstr (&mprint),
1739 _("Macro Expansion")));
1740 ds_destroy (&mprint);
1743 for (size_t i = 0; i < expansion.n; i++)
1745 *lex_push_token__ (src) = (struct lex_token) {
1746 .token = expansion.mts[i].token,
1751 ss_dealloc (&expansion.mts[i].representation); /* XXX should feed into lexer */
1753 free (expansion.mts);
1759 lex_source_push_endcmd__ (struct lex_source *src)
1761 *lex_push_token__ (src) = (struct lex_token) { .token = { .type = T_ENDCMD } };
1764 static struct lex_source *
1765 lex_source_create (struct lexer *lexer, struct lex_reader *reader)
1767 struct lex_source *src;
1769 src = xzalloc (sizeof *src);
1770 src->reader = reader;
1771 segmenter_init (&src->segmenter, reader->syntax);
1773 src->tokens = deque_init (&src->deque, 4, sizeof *src->tokens);
1775 lex_source_push_endcmd__ (src);
1781 lex_source_destroy (struct lex_source *src)
1783 char *file_name = src->reader->file_name;
1784 char *encoding = src->reader->encoding;
1785 if (src->reader->class->destroy != NULL)
1786 src->reader->class->destroy (src->reader);
1790 while (!deque_is_empty (&src->deque))
1791 lex_source_pop__ (src);
1793 ll_remove (&src->ll);
1797 struct lex_file_reader
1799 struct lex_reader reader;
1800 struct u8_istream *istream;
1803 static struct lex_reader_class lex_file_reader_class;
1805 /* Creates and returns a new lex_reader that will read from file FILE_NAME (or
1806 from stdin if FILE_NAME is "-"). The file is expected to be encoded with
1807 ENCODING, which should take one of the forms accepted by
1808 u8_istream_for_file(). SYNTAX and ERROR become the syntax mode and error
1809 mode of the new reader, respectively.
1811 Returns a null pointer if FILE_NAME cannot be opened. */
1813 lex_reader_for_file (const char *file_name, const char *encoding,
1814 enum segmenter_mode syntax,
1815 enum lex_error_mode error)
1817 struct lex_file_reader *r;
1818 struct u8_istream *istream;
1820 istream = (!strcmp(file_name, "-")
1821 ? u8_istream_for_fd (encoding, STDIN_FILENO)
1822 : u8_istream_for_file (encoding, file_name, O_RDONLY));
1823 if (istream == NULL)
1825 msg (ME, _("Opening `%s': %s."), file_name, strerror (errno));
1829 r = xmalloc (sizeof *r);
1830 lex_reader_init (&r->reader, &lex_file_reader_class);
1831 r->reader.syntax = syntax;
1832 r->reader.error = error;
1833 r->reader.file_name = xstrdup (file_name);
1834 r->reader.encoding = xstrdup_if_nonnull (encoding);
1835 r->reader.line_number = 1;
1836 r->istream = istream;
1841 static struct lex_file_reader *
1842 lex_file_reader_cast (struct lex_reader *r)
1844 return UP_CAST (r, struct lex_file_reader, reader);
1848 lex_file_read (struct lex_reader *r_, char *buf, size_t n,
1849 enum prompt_style prompt_style UNUSED)
1851 struct lex_file_reader *r = lex_file_reader_cast (r_);
1852 ssize_t n_read = u8_istream_read (r->istream, buf, n);
1855 msg (ME, _("Error reading `%s': %s."), r_->file_name, strerror (errno));
1862 lex_file_close (struct lex_reader *r_)
1864 struct lex_file_reader *r = lex_file_reader_cast (r_);
1866 if (u8_istream_fileno (r->istream) != STDIN_FILENO)
1868 if (u8_istream_close (r->istream) != 0)
1869 msg (ME, _("Error closing `%s': %s."), r_->file_name, strerror (errno));
1872 u8_istream_free (r->istream);
1877 static struct lex_reader_class lex_file_reader_class =
1883 struct lex_string_reader
1885 struct lex_reader reader;
1890 static struct lex_reader_class lex_string_reader_class;
1892 /* Creates and returns a new lex_reader for the contents of S, which must be
1893 encoded in the given ENCODING. The new reader takes ownership of S and will free it
1894 with ss_dealloc() when it is closed. */
1896 lex_reader_for_substring_nocopy (struct substring s, const char *encoding)
1898 struct lex_string_reader *r;
1900 r = xmalloc (sizeof *r);
1901 lex_reader_init (&r->reader, &lex_string_reader_class);
1902 r->reader.syntax = SEG_MODE_AUTO;
1903 r->reader.encoding = xstrdup_if_nonnull (encoding);
1910 /* Creates and returns a new lex_reader for a copy of null-terminated string S,
1911 which must be encoded in ENCODING. The caller retains ownership of S. */
1913 lex_reader_for_string (const char *s, const char *encoding)
1915 struct substring ss;
1916 ss_alloc_substring (&ss, ss_cstr (s));
1917 return lex_reader_for_substring_nocopy (ss, encoding);
1920 /* Formats FORMAT as a printf()-like format string and creates and returns a
1921 new lex_reader for the formatted result. */
1923 lex_reader_for_format (const char *format, const char *encoding, ...)
1925 struct lex_reader *r;
1928 va_start (args, encoding);
1929 r = lex_reader_for_substring_nocopy (ss_cstr (xvasprintf (format, args)), encoding);
1935 static struct lex_string_reader *
1936 lex_string_reader_cast (struct lex_reader *r)
1938 return UP_CAST (r, struct lex_string_reader, reader);
1942 lex_string_read (struct lex_reader *r_, char *buf, size_t n,
1943 enum prompt_style prompt_style UNUSED)
1945 struct lex_string_reader *r = lex_string_reader_cast (r_);
1948 chunk = MIN (n, r->s.length - r->offset);
1949 memcpy (buf, r->s.string + r->offset, chunk);
1956 lex_string_close (struct lex_reader *r_)
1958 struct lex_string_reader *r = lex_string_reader_cast (r_);
1964 static struct lex_reader_class lex_string_reader_class =