1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2013, 2016 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/lexer/lexer.h"
33 #include "language/command.h"
34 #include "language/lexer/macro.h"
35 #include "language/lexer/scan.h"
36 #include "language/lexer/segment.h"
37 #include "language/lexer/token.h"
38 #include "libpspp/assertion.h"
39 #include "libpspp/cast.h"
40 #include "libpspp/deque.h"
41 #include "libpspp/i18n.h"
42 #include "libpspp/ll.h"
43 #include "libpspp/message.h"
44 #include "libpspp/misc.h"
45 #include "libpspp/str.h"
46 #include "libpspp/u8-istream.h"
47 #include "output/journal.h"
48 #include "output/output-item.h"
50 #include "gl/c-ctype.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
53 #include "gl/xmemdup0.h"
56 #define _(msgid) gettext (msgid)
57 #define N_(msgid) msgid
59 /* A token within a lex_source. */
62 /* The regular token information. */
65 /* Location of token in terms of the lex_source's buffer.
66 src->tail <= line_pos <= token_pos <= src->head. */
67 size_t token_pos; /* Start of token. */
68 size_t token_len; /* Length of source for token in bytes. */
69 size_t line_pos; /* Start of line containing token_pos. */
70 int first_line; /* Line number at token_pos. */
74 /* A source of tokens, corresponding to a syntax file.
76 This is conceptually a lex_reader wrapped with everything needed to convert
77 its UTF-8 bytes into tokens. */
80 struct ll ll; /* In lexer's list of sources. */
81 struct lex_reader *reader;
83 struct segmenter segmenter;
84 bool eof; /* True if T_STOP was read from 'reader'. */
86 /* Buffer of UTF-8 bytes. */
88 size_t allocated; /* Number of bytes allocated. */
89 size_t tail; /* &buffer[0] offset into UTF-8 source. */
90 size_t head; /* &buffer[head - tail] offset into source. */
92 /* Positions in source file, tail <= pos <= head for each member here. */
93 size_t journal_pos; /* First byte not yet output to journal. */
94 size_t seg_pos; /* First byte not yet scanned as token. */
95 size_t line_pos; /* First byte of line containing seg_pos. */
97 int n_newlines; /* Number of new-lines up to seg_pos. */
98 bool suppress_next_newline;
101 struct deque deque; /* Indexes into 'tokens'. */
102 struct lex_token *tokens; /* Lookahead tokens for parser. */
105 static struct lex_source *lex_source_create (struct lexer *,
106 struct lex_reader *);
107 static void lex_source_destroy (struct lex_source *);
112 struct ll_list sources; /* Contains "struct lex_source"s. */
113 struct macro_set *macros;
116 static struct lex_source *lex_source__ (const struct lexer *);
117 static const struct lex_token *lex_next__ (const struct lexer *, int n);
118 static void lex_source_push_endcmd__ (struct lex_source *);
120 static void lex_source_pop__ (struct lex_source *);
121 static bool lex_source_get (const struct lex_source *);
122 static void lex_source_error_valist (struct lex_source *, int n0, int n1,
123 const char *format, va_list)
124 PRINTF_FORMAT (4, 0);
125 static const struct lex_token *lex_source_next__ (const struct lex_source *,
128 /* Initializes READER with the specified CLASS and otherwise some reasonable
129 defaults. The caller should fill in the others members as desired. */
131 lex_reader_init (struct lex_reader *reader,
132 const struct lex_reader_class *class)
134 reader->class = class;
135 reader->syntax = SEG_MODE_AUTO;
136 reader->error = LEX_ERROR_CONTINUE;
137 reader->file_name = NULL;
138 reader->encoding = NULL;
139 reader->line_number = 0;
143 /* Frees any file name already in READER and replaces it by a copy of
144 FILE_NAME, or if FILE_NAME is null then clears any existing name. */
146 lex_reader_set_file_name (struct lex_reader *reader, const char *file_name)
148 free (reader->file_name);
149 reader->file_name = xstrdup_if_nonnull (file_name);
152 /* Creates and returns a new lexer. */
156 struct lexer *lexer = xmalloc (sizeof *lexer);
157 *lexer = (struct lexer) {
158 .sources = LL_INITIALIZER (lexer->sources),
159 .macros = macro_set_create (),
164 /* Destroys LEXER. */
166 lex_destroy (struct lexer *lexer)
170 struct lex_source *source, *next;
172 ll_for_each_safe (source, next, struct lex_source, ll, &lexer->sources)
173 lex_source_destroy (source);
174 macro_set_destroy (lexer->macros);
179 /* Adds M to LEXER's set of macros. M replaces any existing macro with the
180 same name. Takes ownership of M. */
182 lex_define_macro (struct lexer *lexer, struct macro *m)
184 macro_set_add (lexer->macros, m);
187 /* Inserts READER into LEXER so that the next token read by LEXER comes from
188 READER. Before the caller, LEXER must either be empty or at a T_ENDCMD
191 lex_include (struct lexer *lexer, struct lex_reader *reader)
193 assert (ll_is_empty (&lexer->sources) || lex_token (lexer) == T_ENDCMD);
194 ll_push_head (&lexer->sources, &lex_source_create (lexer, reader)->ll);
197 /* Appends READER to LEXER, so that it will be read after all other current
198 readers have already been read. */
200 lex_append (struct lexer *lexer, struct lex_reader *reader)
202 ll_push_tail (&lexer->sources, &lex_source_create (lexer, reader)->ll);
207 static struct lex_token *
208 lex_push_token__ (struct lex_source *src)
210 struct lex_token *token;
212 if (deque_is_full (&src->deque))
213 src->tokens = deque_expand (&src->deque, src->tokens, sizeof *src->tokens);
215 token = &src->tokens[deque_push_front (&src->deque)];
216 token->token = (struct token) { .type = T_STOP };
221 lex_source_pop__ (struct lex_source *src)
223 token_uninit (&src->tokens[deque_pop_back (&src->deque)].token);
227 lex_source_pop_front (struct lex_source *src)
229 token_uninit (&src->tokens[deque_pop_front (&src->deque)].token);
232 /* Advances LEXER to the next token, consuming the current token. */
234 lex_get (struct lexer *lexer)
236 struct lex_source *src;
238 src = lex_source__ (lexer);
242 if (!deque_is_empty (&src->deque))
243 lex_source_pop__ (src);
245 while (deque_is_empty (&src->deque))
246 if (!lex_source_get (src))
248 lex_source_destroy (src);
249 src = lex_source__ (lexer);
255 /* Issuing errors. */
257 /* Prints a syntax error message containing the current token and
258 given message MESSAGE (if non-null). */
260 lex_error (struct lexer *lexer, const char *format, ...)
264 va_start (args, format);
265 lex_next_error_valist (lexer, 0, 0, format, args);
269 /* Prints a syntax error message containing the current token and
270 given message MESSAGE (if non-null). */
272 lex_error_valist (struct lexer *lexer, const char *format, va_list args)
274 lex_next_error_valist (lexer, 0, 0, format, args);
277 /* Prints a syntax error message containing the current token and
278 given message MESSAGE (if non-null). */
280 lex_next_error (struct lexer *lexer, int n0, int n1, const char *format, ...)
284 va_start (args, format);
285 lex_next_error_valist (lexer, n0, n1, format, args);
289 /* Prints a syntax error message saying that one of the strings provided as
290 varargs, up to the first NULL, is expected. */
292 (lex_error_expecting) (struct lexer *lexer, ...)
296 va_start (args, lexer);
297 lex_error_expecting_valist (lexer, args);
301 /* Prints a syntax error message saying that one of the options provided in
302 ARGS, up to the first NULL, is expected. */
304 lex_error_expecting_valist (struct lexer *lexer, va_list args)
306 enum { MAX_OPTIONS = 9 };
307 const char *options[MAX_OPTIONS];
309 while (n < MAX_OPTIONS)
311 const char *option = va_arg (args, const char *);
315 options[n++] = option;
317 lex_error_expecting_array (lexer, options, n);
321 lex_error_expecting_array (struct lexer *lexer, const char **options, size_t n)
326 lex_error (lexer, NULL);
330 lex_error (lexer, _("expecting %s"), options[0]);
334 lex_error (lexer, _("expecting %s or %s"), options[0], options[1]);
338 lex_error (lexer, _("expecting %s, %s, or %s"), options[0], options[1],
343 lex_error (lexer, _("expecting %s, %s, %s, or %s"),
344 options[0], options[1], options[2], options[3]);
348 lex_error (lexer, _("expecting %s, %s, %s, %s, or %s"),
349 options[0], options[1], options[2], options[3], options[4]);
353 lex_error (lexer, _("expecting %s, %s, %s, %s, %s, or %s"),
354 options[0], options[1], options[2], options[3], options[4],
359 lex_error (lexer, _("expecting %s, %s, %s, %s, %s, %s, or %s"),
360 options[0], options[1], options[2], options[3], options[4],
361 options[5], options[6]);
365 lex_error (lexer, _("expecting %s, %s, %s, %s, %s, %s, %s, or %s"),
366 options[0], options[1], options[2], options[3], options[4],
367 options[5], options[6], options[7]);
371 lex_error (lexer, NULL);
375 /* Reports an error to the effect that subcommand SBC may only be specified
378 This function does not take a lexer as an argument or use lex_error(),
379 because the result would ordinarily just be redundant: "Syntax error at
380 SUBCOMMAND: Subcommand SUBCOMMAND may only be specified once.", which does
381 not help the user find the error. */
383 lex_sbc_only_once (const char *sbc)
385 msg (SE, _("Subcommand %s may only be specified once."), sbc);
388 /* Reports an error to the effect that subcommand SBC is missing.
390 This function does not take a lexer as an argument or use lex_error(),
391 because a missing subcommand can normally be detected only after the whole
392 command has been parsed, and so lex_error() would always report "Syntax
393 error at end of command", which does not help the user find the error. */
395 lex_sbc_missing (const char *sbc)
397 msg (SE, _("Required subcommand %s was not specified."), sbc);
400 /* Reports an error to the effect that specification SPEC may only be specified
401 once within subcommand SBC. */
403 lex_spec_only_once (struct lexer *lexer, const char *sbc, const char *spec)
405 lex_error (lexer, _("%s may only be specified once within subcommand %s"),
409 /* Reports an error to the effect that specification SPEC is missing within
412 lex_spec_missing (struct lexer *lexer, const char *sbc, const char *spec)
414 lex_error (lexer, _("Required %s specification missing from %s subcommand"),
418 /* Prints a syntax error message containing the current token and
419 given message MESSAGE (if non-null). */
421 lex_next_error_valist (struct lexer *lexer, int n0, int n1,
422 const char *format, va_list args)
424 struct lex_source *src = lex_source__ (lexer);
427 lex_source_error_valist (src, n0, n1, format, args);
433 ds_put_format (&s, _("Syntax error at end of input"));
436 ds_put_cstr (&s, ": ");
437 ds_put_vformat (&s, format, args);
439 ds_put_byte (&s, '.');
440 msg (SE, "%s", ds_cstr (&s));
445 /* Checks that we're at end of command.
446 If so, returns a successful command completion code.
447 If not, flags a syntax error and returns an error command
450 lex_end_of_command (struct lexer *lexer)
452 if (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_STOP)
454 lex_error (lexer, _("expecting end of command"));
461 /* Token testing functions. */
463 /* Returns true if the current token is a number. */
465 lex_is_number (const struct lexer *lexer)
467 return lex_next_is_number (lexer, 0);
470 /* Returns true if the current token is a string. */
472 lex_is_string (const struct lexer *lexer)
474 return lex_next_is_string (lexer, 0);
477 /* Returns the value of the current token, which must be a
478 floating point number. */
480 lex_number (const struct lexer *lexer)
482 return lex_next_number (lexer, 0);
485 /* Returns true iff the current token is an integer. */
487 lex_is_integer (const struct lexer *lexer)
489 return lex_next_is_integer (lexer, 0);
492 /* Returns the value of the current token, which must be an
495 lex_integer (const struct lexer *lexer)
497 return lex_next_integer (lexer, 0);
500 /* Token testing functions with lookahead.
502 A value of 0 for N as an argument to any of these functions refers to the
503 current token. Lookahead is limited to the current command. Any N greater
504 than the number of tokens remaining in the current command will be treated
505 as referring to a T_ENDCMD token. */
507 /* Returns true if the token N ahead of the current token is a number. */
509 lex_next_is_number (const struct lexer *lexer, int n)
511 enum token_type next_token = lex_next_token (lexer, n);
512 return next_token == T_POS_NUM || next_token == T_NEG_NUM;
515 /* Returns true if the token N ahead of the current token is a string. */
517 lex_next_is_string (const struct lexer *lexer, int n)
519 return lex_next_token (lexer, n) == T_STRING;
522 /* Returns the value of the token N ahead of the current token, which must be a
523 floating point number. */
525 lex_next_number (const struct lexer *lexer, int n)
527 assert (lex_next_is_number (lexer, n));
528 return lex_next_tokval (lexer, n);
531 /* Returns true if the token N ahead of the current token is an integer. */
533 lex_next_is_integer (const struct lexer *lexer, int n)
537 if (!lex_next_is_number (lexer, n))
540 value = lex_next_tokval (lexer, n);
541 return value > LONG_MIN && value <= LONG_MAX && floor (value) == value;
544 /* Returns the value of the token N ahead of the current token, which must be
547 lex_next_integer (const struct lexer *lexer, int n)
549 assert (lex_next_is_integer (lexer, n));
550 return lex_next_tokval (lexer, n);
553 /* Token matching functions. */
555 /* If the current token has the specified TYPE, skips it and returns true.
556 Otherwise, returns false. */
558 lex_match (struct lexer *lexer, enum token_type type)
560 if (lex_token (lexer) == type)
569 /* If the current token matches IDENTIFIER, skips it and returns true.
570 IDENTIFIER may be abbreviated to its first three letters. Otherwise,
573 IDENTIFIER must be an ASCII string. */
575 lex_match_id (struct lexer *lexer, const char *identifier)
577 return lex_match_id_n (lexer, identifier, 3);
580 /* If the current token is IDENTIFIER, skips it and returns true. IDENTIFIER
581 may be abbreviated to its first N letters. Otherwise, returns false.
583 IDENTIFIER must be an ASCII string. */
585 lex_match_id_n (struct lexer *lexer, const char *identifier, size_t n)
587 if (lex_token (lexer) == T_ID
588 && lex_id_match_n (ss_cstr (identifier), lex_tokss (lexer), n))
597 /* If the current token is integer X, skips it and returns true. Otherwise,
600 lex_match_int (struct lexer *lexer, int x)
602 if (lex_is_integer (lexer) && lex_integer (lexer) == x)
611 /* Forced matches. */
613 /* If this token is IDENTIFIER, skips it and returns true. IDENTIFIER may be
614 abbreviated to its first 3 letters. Otherwise, reports an error and returns
617 IDENTIFIER must be an ASCII string. */
619 lex_force_match_id (struct lexer *lexer, const char *identifier)
621 if (lex_match_id (lexer, identifier))
625 lex_error_expecting (lexer, identifier);
630 /* If the current token has the specified TYPE, skips it and returns true.
631 Otherwise, reports an error and returns false. */
633 lex_force_match (struct lexer *lexer, enum token_type type)
635 if (lex_token (lexer) == type)
642 const char *type_string = token_type_to_string (type);
645 char *s = xasprintf ("`%s'", type_string);
646 lex_error_expecting (lexer, s);
650 lex_error_expecting (lexer, token_type_to_name (type));
656 /* If the current token is a string, does nothing and returns true.
657 Otherwise, reports an error and returns false. */
659 lex_force_string (struct lexer *lexer)
661 if (lex_is_string (lexer))
665 lex_error (lexer, _("expecting string"));
670 /* If the current token is a string or an identifier, does nothing and returns
671 true. Otherwise, reports an error and returns false.
673 This is meant for use in syntactic situations where we want to encourage the
674 user to supply a quoted string, but for compatibility we also accept
675 identifiers. (One example of such a situation is file names.) Therefore,
676 the error message issued when the current token is wrong only says that a
677 string is expected and doesn't mention that an identifier would also be
680 lex_force_string_or_id (struct lexer *lexer)
682 return lex_token (lexer) == T_ID || lex_force_string (lexer);
685 /* If the current token is an integer, does nothing and returns true.
686 Otherwise, reports an error and returns false. */
688 lex_force_int (struct lexer *lexer)
690 if (lex_is_integer (lexer))
694 lex_error (lexer, _("expecting integer"));
699 /* If the current token is an integer in the range MIN...MAX (inclusive), does
700 nothing and returns true. Otherwise, reports an error and returns false.
701 If NAME is nonnull, then it is used in the error message. */
703 lex_force_int_range (struct lexer *lexer, const char *name, long min, long max)
705 bool is_integer = lex_is_integer (lexer);
706 bool too_small = is_integer && lex_integer (lexer) < min;
707 bool too_big = is_integer && lex_integer (lexer) > max;
708 if (is_integer && !too_small && !too_big)
713 /* Weird, maybe a bug in the caller. Just report that we needed an
716 lex_error (lexer, _("Integer expected for %s."), name);
718 lex_error (lexer, _("Integer expected."));
723 lex_error (lexer, _("Expected %ld for %s."), min, name);
725 lex_error (lexer, _("Expected %ld."), min);
727 else if (min + 1 == max)
730 lex_error (lexer, _("Expected %ld or %ld for %s."), min, min + 1, name);
732 lex_error (lexer, _("Expected %ld or %ld."), min, min + 1);
736 bool report_lower_bound = (min > INT_MIN / 2) || too_small;
737 bool report_upper_bound = (max < INT_MAX / 2) || too_big;
739 if (report_lower_bound && report_upper_bound)
743 _("Expected integer between %ld and %ld for %s."),
746 lex_error (lexer, _("Expected integer between %ld and %ld."),
749 else if (report_lower_bound)
754 lex_error (lexer, _("Expected non-negative integer for %s."),
757 lex_error (lexer, _("Expected non-negative integer."));
762 lex_error (lexer, _("Expected positive integer for %s."),
765 lex_error (lexer, _("Expected positive integer."));
768 else if (report_upper_bound)
772 _("Expected integer less than or equal to %ld for %s."),
775 lex_error (lexer, _("Expected integer less than or equal to %ld."),
781 lex_error (lexer, _("Integer expected for %s."), name);
783 lex_error (lexer, _("Integer expected."));
789 /* If the current token is a number, does nothing and returns true.
790 Otherwise, reports an error and returns false. */
792 lex_force_num (struct lexer *lexer)
794 if (lex_is_number (lexer))
797 lex_error (lexer, _("expecting number"));
801 /* If the current token is an identifier, does nothing and returns true.
802 Otherwise, reports an error and returns false. */
804 lex_force_id (struct lexer *lexer)
806 if (lex_token (lexer) == T_ID)
809 lex_error (lexer, _("expecting identifier"));
813 /* Token accessors. */
815 /* Returns the type of LEXER's current token. */
817 lex_token (const struct lexer *lexer)
819 return lex_next_token (lexer, 0);
822 /* Returns the number in LEXER's current token.
824 Only T_NEG_NUM and T_POS_NUM tokens have meaningful values. For other
825 tokens this function will always return zero. */
827 lex_tokval (const struct lexer *lexer)
829 return lex_next_tokval (lexer, 0);
832 /* Returns the null-terminated string in LEXER's current token, UTF-8 encoded.
834 Only T_ID and T_STRING tokens have meaningful strings. For other tokens
835 this functions this function will always return NULL.
837 The UTF-8 encoding of the returned string is correct for variable names and
838 other identifiers. Use filename_to_utf8() to use it as a filename. Use
839 data_in() to use it in a "union value". */
841 lex_tokcstr (const struct lexer *lexer)
843 return lex_next_tokcstr (lexer, 0);
846 /* Returns the string in LEXER's current token, UTF-8 encoded. The string is
847 null-terminated (but the null terminator is not included in the returned
848 substring's 'length').
850 Only T_ID and T_STRING tokens have meaningful strings. For other tokens
851 this functions this function will always return NULL.
853 The UTF-8 encoding of the returned string is correct for variable names and
854 other identifiers. Use filename_to_utf8() to use it as a filename. Use
855 data_in() to use it in a "union value". */
857 lex_tokss (const struct lexer *lexer)
859 return lex_next_tokss (lexer, 0);
864 A value of 0 for N as an argument to any of these functions refers to the
865 current token. Lookahead is limited to the current command. Any N greater
866 than the number of tokens remaining in the current command will be treated
867 as referring to a T_ENDCMD token. */
869 static const struct lex_token *
870 lex_next__ (const struct lexer *lexer_, int n)
872 struct lexer *lexer = CONST_CAST (struct lexer *, lexer_);
873 struct lex_source *src = lex_source__ (lexer);
876 return lex_source_next__ (src, n);
879 static const struct lex_token stop_token = { .token = { .type = T_STOP } };
884 static const struct lex_token *
885 lex_source_front (const struct lex_source *src)
887 return &src->tokens[deque_front (&src->deque, 0)];
890 static const struct lex_token *
891 lex_source_next__ (const struct lex_source *src, int n)
893 while (deque_count (&src->deque) <= n)
895 if (!deque_is_empty (&src->deque))
897 const struct lex_token *front = lex_source_front (src);
898 if (front->token.type == T_STOP || front->token.type == T_ENDCMD)
902 lex_source_get (src);
905 return &src->tokens[deque_back (&src->deque, n)];
908 /* Returns the "struct token" of the token N after the current one in LEXER.
909 The returned pointer can be invalidated by pretty much any succeeding call
910 into the lexer, although the string pointer within the returned token is
911 only invalidated by consuming the token (e.g. with lex_get()). */
913 lex_next (const struct lexer *lexer, int n)
915 return &lex_next__ (lexer, n)->token;
918 /* Returns the type of the token N after the current one in LEXER. */
920 lex_next_token (const struct lexer *lexer, int n)
922 return lex_next (lexer, n)->type;
925 /* Returns the number in the tokn N after the current one in LEXER.
927 Only T_NEG_NUM and T_POS_NUM tokens have meaningful values. For other
928 tokens this function will always return zero. */
930 lex_next_tokval (const struct lexer *lexer, int n)
932 const struct token *token = lex_next (lexer, n);
933 return token->number;
936 /* Returns the null-terminated string in the token N after the current one, in
939 Only T_ID and T_STRING tokens have meaningful strings. For other tokens
940 this functions this function will always return NULL.
942 The UTF-8 encoding of the returned string is correct for variable names and
943 other identifiers. Use filename_to_utf8() to use it as a filename. Use
944 data_in() to use it in a "union value". */
946 lex_next_tokcstr (const struct lexer *lexer, int n)
948 return lex_next_tokss (lexer, n).string;
951 /* Returns the string in the token N after the current one, in UTF-8 encoding.
952 The string is null-terminated (but the null terminator is not included in
953 the returned substring's 'length').
955 Only T_ID, T_MACRO_ID, T_STRING tokens have meaningful strings. For other
956 tokens this functions this function will always return NULL.
958 The UTF-8 encoding of the returned string is correct for variable names and
959 other identifiers. Use filename_to_utf8() to use it as a filename. Use
960 data_in() to use it in a "union value". */
962 lex_next_tokss (const struct lexer *lexer, int n)
964 return lex_next (lexer, n)->string;
968 lex_tokens_match (const struct token *actual, const struct token *expected)
970 if (actual->type != expected->type)
973 switch (actual->type)
977 return actual->number == expected->number;
980 return lex_id_match (expected->string, actual->string);
983 return (actual->string.length == expected->string.length
984 && !memcmp (actual->string.string, expected->string.string,
985 actual->string.length));
992 /* If LEXER is positioned at the sequence of tokens that may be parsed from S,
993 skips it and returns true. Otherwise, returns false.
995 S may consist of an arbitrary sequence of tokens, e.g. "KRUSKAL-WALLIS",
996 "2SLS", or "END INPUT PROGRAM". Identifiers may be abbreviated to their
997 first three letters. */
999 lex_match_phrase (struct lexer *lexer, const char *s)
1001 struct string_lexer slex;
1006 string_lexer_init (&slex, s, strlen (s), SEG_MODE_INTERACTIVE);
1007 while (string_lexer_next (&slex, &token))
1008 if (token.type != SCAN_SKIP)
1010 bool match = lex_tokens_match (lex_next (lexer, i++), &token);
1011 token_uninit (&token);
1022 lex_source_get_first_line_number (const struct lex_source *src, int n)
1024 return lex_source_next__ (src, n)->first_line;
1028 count_newlines (char *s, size_t length)
1033 while ((newline = memchr (s, '\n', length)) != NULL)
1036 length -= (newline + 1) - s;
1044 lex_source_get_last_line_number (const struct lex_source *src, int n)
1046 const struct lex_token *token = lex_source_next__ (src, n);
1048 if (token->first_line == 0)
1052 char *token_str = &src->buffer[token->token_pos - src->tail];
1053 return token->first_line + count_newlines (token_str, token->token_len) + 1;
1058 count_columns (const char *s_, size_t length)
1060 const uint8_t *s = CHAR_CAST (const uint8_t *, s_);
1066 for (ofs = 0; ofs < length; ofs += mblen)
1070 mblen = u8_mbtouc (&uc, s + ofs, length - ofs);
1073 int width = uc_width (uc, "UTF-8");
1078 columns = ROUND_UP (columns + 1, 8);
1085 lex_source_get_first_column (const struct lex_source *src, int n)
1087 const struct lex_token *token = lex_source_next__ (src, n);
1088 return count_columns (&src->buffer[token->line_pos - src->tail],
1089 token->token_pos - token->line_pos);
1093 lex_source_get_last_column (const struct lex_source *src, int n)
1095 const struct lex_token *token = lex_source_next__ (src, n);
1096 char *start, *end, *newline;
1098 start = &src->buffer[token->line_pos - src->tail];
1099 end = &src->buffer[(token->token_pos + token->token_len) - src->tail];
1100 newline = memrchr (start, '\n', end - start);
1101 if (newline != NULL)
1102 start = newline + 1;
1103 return count_columns (start, end - start);
1106 /* Returns the 1-based line number of the start of the syntax that represents
1107 the token N after the current one in LEXER. Returns 0 for a T_STOP token or
1108 if the token is drawn from a source that does not have line numbers. */
1110 lex_get_first_line_number (const struct lexer *lexer, int n)
1112 const struct lex_source *src = lex_source__ (lexer);
1113 return src != NULL ? lex_source_get_first_line_number (src, n) : 0;
1116 /* Returns the 1-based line number of the end of the syntax that represents the
1117 token N after the current one in LEXER, plus 1. Returns 0 for a T_STOP
1118 token or if the token is drawn from a source that does not have line
1121 Most of the time, a single token is wholly within a single line of syntax,
1122 but there are two exceptions: a T_STRING token can be made up of multiple
1123 segments on adjacent lines connected with "+" punctuators, and a T_NEG_NUM
1124 token can consist of a "-" on one line followed by the number on the next.
1127 lex_get_last_line_number (const struct lexer *lexer, int n)
1129 const struct lex_source *src = lex_source__ (lexer);
1130 return src != NULL ? lex_source_get_last_line_number (src, n) : 0;
1133 /* Returns the 1-based column number of the start of the syntax that represents
1134 the token N after the current one in LEXER. Returns 0 for a T_STOP
1137 Column numbers are measured according to the width of characters as shown in
1138 a typical fixed-width font, in which CJK characters have width 2 and
1139 combining characters have width 0. */
1141 lex_get_first_column (const struct lexer *lexer, int n)
1143 const struct lex_source *src = lex_source__ (lexer);
1144 return src != NULL ? lex_source_get_first_column (src, n) : 0;
1147 /* Returns the 1-based column number of the end of the syntax that represents
1148 the token N after the current one in LEXER, plus 1. Returns 0 for a T_STOP
1151 Column numbers are measured according to the width of characters as shown in
1152 a typical fixed-width font, in which CJK characters have width 2 and
1153 combining characters have width 0. */
1155 lex_get_last_column (const struct lexer *lexer, int n)
1157 const struct lex_source *src = lex_source__ (lexer);
1158 return src != NULL ? lex_source_get_last_column (src, n) : 0;
1161 /* Returns the name of the syntax file from which the current command is drawn.
1162 Returns NULL for a T_STOP token or if the command's source does not have
1165 There is no version of this function that takes an N argument because
1166 lookahead only works to the end of a command and any given command is always
1167 within a single syntax file. */
1169 lex_get_file_name (const struct lexer *lexer)
1171 struct lex_source *src = lex_source__ (lexer);
1172 return src == NULL ? NULL : src->reader->file_name;
1176 lex_get_encoding (const struct lexer *lexer)
1178 struct lex_source *src = lex_source__ (lexer);
1179 return src == NULL ? NULL : src->reader->encoding;
1183 /* Returns the syntax mode for the syntax file from which the current drawn is
1184 drawn. Returns SEG_MODE_AUTO for a T_STOP token or if the command's source
1185 does not have line numbers.
1187 There is no version of this function that takes an N argument because
1188 lookahead only works to the end of a command and any given command is always
1189 within a single syntax file. */
1191 lex_get_syntax_mode (const struct lexer *lexer)
1193 struct lex_source *src = lex_source__ (lexer);
1194 return src == NULL ? SEG_MODE_AUTO : src->reader->syntax;
1197 /* Returns the error mode for the syntax file from which the current drawn is
1198 drawn. Returns LEX_ERROR_TERMINAL for a T_STOP token or if the command's
1199 source does not have line numbers.
1201 There is no version of this function that takes an N argument because
1202 lookahead only works to the end of a command and any given command is always
1203 within a single syntax file. */
1205 lex_get_error_mode (const struct lexer *lexer)
1207 struct lex_source *src = lex_source__ (lexer);
1208 return src == NULL ? LEX_ERROR_TERMINAL : src->reader->error;
1211 /* If the source that LEXER is currently reading has error mode
1212 LEX_ERROR_TERMINAL, discards all buffered input and tokens, so that the next
1213 token to be read comes directly from whatever is next read from the stream.
1215 It makes sense to call this function after encountering an error in a
1216 command entered on the console, because usually the user would prefer not to
1217 have cascading errors. */
1219 lex_interactive_reset (struct lexer *lexer)
1221 struct lex_source *src = lex_source__ (lexer);
1222 if (src != NULL && src->reader->error == LEX_ERROR_TERMINAL)
1224 src->head = src->tail = 0;
1225 src->journal_pos = src->seg_pos = src->line_pos = 0;
1226 src->n_newlines = 0;
1227 src->suppress_next_newline = false;
1228 segmenter_init (&src->segmenter, segmenter_get_mode (&src->segmenter));
1229 while (!deque_is_empty (&src->deque))
1230 lex_source_pop__ (src);
1231 lex_source_push_endcmd__ (src);
1235 /* Advances past any tokens in LEXER up to a T_ENDCMD or T_STOP. */
1237 lex_discard_rest_of_command (struct lexer *lexer)
1239 while (lex_token (lexer) != T_STOP && lex_token (lexer) != T_ENDCMD)
1243 /* Discards all lookahead tokens in LEXER, then discards all input sources
1244 until it encounters one with error mode LEX_ERROR_TERMINAL or until it
1245 runs out of input sources. */
1247 lex_discard_noninteractive (struct lexer *lexer)
1249 struct lex_source *src = lex_source__ (lexer);
1253 while (!deque_is_empty (&src->deque))
1254 lex_source_pop__ (src);
1256 for (; src != NULL && src->reader->error != LEX_ERROR_TERMINAL;
1257 src = lex_source__ (lexer))
1258 lex_source_destroy (src);
1263 lex_source_max_tail__ (const struct lex_source *src)
1265 const struct lex_token *token;
1268 assert (src->seg_pos >= src->line_pos);
1269 max_tail = MIN (src->journal_pos, src->line_pos);
1271 /* Use the oldest token also. (We know that src->deque cannot be empty
1272 because we are in the process of adding a new token, which is already
1273 initialized enough to use here.) */
1274 token = &src->tokens[deque_back (&src->deque, 0)];
1275 assert (token->token_pos >= token->line_pos);
1276 max_tail = MIN (max_tail, token->line_pos);
1282 lex_source_expand__ (struct lex_source *src)
1284 if (src->head - src->tail >= src->allocated)
1286 size_t max_tail = lex_source_max_tail__ (src);
1287 if (max_tail > src->tail)
1289 /* Advance the tail, freeing up room at the head. */
1290 memmove (src->buffer, src->buffer + (max_tail - src->tail),
1291 src->head - max_tail);
1292 src->tail = max_tail;
1296 /* Buffer is completely full. Expand it. */
1297 src->buffer = x2realloc (src->buffer, &src->allocated);
1302 /* There's space available at the head of the buffer. Nothing to do. */
1307 lex_source_read__ (struct lex_source *src)
1311 lex_source_expand__ (src);
1313 size_t head_ofs = src->head - src->tail;
1314 size_t space = src->allocated - head_ofs;
1315 enum prompt_style prompt = segmenter_get_prompt (&src->segmenter);
1316 size_t n = src->reader->class->read (src->reader, &src->buffer[head_ofs],
1318 assert (n <= space);
1323 src->reader->eof = true;
1324 lex_source_expand__ (src);
1330 while (!memchr (&src->buffer[src->seg_pos - src->tail], '\n',
1331 src->head - src->seg_pos));
1334 static struct lex_source *
1335 lex_source__ (const struct lexer *lexer)
1337 return (ll_is_empty (&lexer->sources) ? NULL
1338 : ll_data (ll_head (&lexer->sources), struct lex_source, ll));
1341 static struct substring
1342 lex_source_get_syntax__ (const struct lex_source *src, int n0, int n1)
1344 const struct lex_token *token0 = lex_source_next__ (src, n0);
1345 const struct lex_token *token1 = lex_source_next__ (src, MAX (n0, n1));
1346 size_t start = token0->token_pos;
1347 size_t end = token1->token_pos + token1->token_len;
1349 return ss_buffer (&src->buffer[start - src->tail], end - start);
1353 lex_ellipsize__ (struct substring in, char *out, size_t out_size)
1359 assert (out_size >= 16);
1360 out_maxlen = out_size - 1;
1361 if (in.length > out_maxlen - 3)
1364 for (out_len = 0; out_len < in.length; out_len += mblen)
1366 if (in.string[out_len] == '\n'
1367 || in.string[out_len] == '\0'
1368 || (in.string[out_len] == '\r'
1369 && out_len + 1 < in.length
1370 && in.string[out_len + 1] == '\n'))
1373 mblen = u8_mblen (CHAR_CAST (const uint8_t *, in.string + out_len),
1374 in.length - out_len);
1379 if (out_len + mblen > out_maxlen)
1383 memcpy (out, in.string, out_len);
1384 strcpy (&out[out_len], out_len < in.length ? "..." : "");
1388 lex_source_error_valist (struct lex_source *src, int n0, int n1,
1389 const char *format, va_list args)
1391 const struct lex_token *token;
1396 token = lex_source_next__ (src, n0);
1397 if (token->token.type == T_ENDCMD)
1398 ds_put_cstr (&s, _("Syntax error at end of command"));
1399 else if (token->from_macro)
1401 /* XXX this isn't ideal, we should get the actual syntax */
1402 char *syntax = token_to_string (&token->token);
1404 ds_put_format (&s, _("Syntax error at `%s'"), syntax);
1406 ds_put_cstr (&s, _("Syntax error"));
1411 struct substring syntax = lex_source_get_syntax__ (src, n0, n1);
1412 if (!ss_is_empty (syntax))
1414 char syntax_cstr[64];
1416 lex_ellipsize__ (syntax, syntax_cstr, sizeof syntax_cstr);
1417 ds_put_format (&s, _("Syntax error at `%s'"), syntax_cstr);
1420 ds_put_cstr (&s, _("Syntax error"));
1425 ds_put_cstr (&s, ": ");
1426 ds_put_vformat (&s, format, args);
1428 if (ds_last (&s) != '.')
1429 ds_put_byte (&s, '.');
1432 .category = MSG_C_SYNTAX,
1433 .severity = MSG_S_ERROR,
1434 .file_name = src->reader->file_name,
1435 .first_line = lex_source_get_first_line_number (src, n0),
1436 .last_line = lex_source_get_last_line_number (src, n1),
1437 .first_column = lex_source_get_first_column (src, n0),
1438 .last_column = lex_source_get_last_column (src, n1),
1439 .text = ds_steal_cstr (&s),
1444 static void PRINTF_FORMAT (2, 3)
1445 lex_get_error (struct lex_source *src, const char *format, ...)
1450 va_start (args, format);
1452 n = deque_count (&src->deque) - 1;
1453 lex_source_error_valist (src, n, n, format, args);
1454 lex_source_pop_front (src);
1459 /* Attempts to append an additional token into SRC's deque, reading more from
1460 the underlying lex_reader if necessary. Returns true if a new token was
1461 added to SRC's deque, false otherwise. */
1463 lex_source_try_get (struct lex_source *src)
1465 /* State maintained while scanning tokens. Usually we only need a single
1466 state, but scanner_push() can return SCAN_SAVE to indicate that the state
1467 needs to be saved and possibly restored later with SCAN_BACK. */
1470 struct segmenter segmenter;
1471 enum segment_type last_segment;
1472 int newlines; /* Number of newlines encountered so far. */
1473 /* Maintained here so we can update lex_source's similar members when we
1479 /* Initialize state. */
1480 struct state state =
1482 .segmenter = src->segmenter,
1484 .seg_pos = src->seg_pos,
1485 .line_pos = src->line_pos,
1487 struct state saved = state;
1489 /* Append a new token to SRC and initialize it. */
1490 struct lex_token *token = lex_push_token__ (src);
1491 struct scanner scanner;
1492 scanner_init (&scanner, &token->token);
1493 token->line_pos = src->line_pos;
1494 token->token_pos = src->seg_pos;
1495 if (src->reader->line_number > 0)
1496 token->first_line = src->reader->line_number + src->n_newlines;
1498 token->first_line = 0;
1500 /* Extract segments and pass them through the scanner until we obtain a
1504 /* Extract a segment. */
1505 const char *segment = &src->buffer[state.seg_pos - src->tail];
1506 size_t seg_maxlen = src->head - state.seg_pos;
1507 enum segment_type type;
1508 int seg_len = segmenter_push (&state.segmenter, segment, seg_maxlen,
1509 src->reader->eof, &type);
1512 /* The segmenter needs more input to produce a segment. */
1513 assert (!src->reader->eof);
1514 lex_source_read__ (src);
1518 /* Update state based on the segment. */
1519 state.last_segment = type;
1520 state.seg_pos += seg_len;
1521 if (type == SEG_NEWLINE)
1524 state.line_pos = state.seg_pos;
1527 /* Pass the segment into the scanner and try to get a token out. */
1528 enum scan_result result = scanner_push (&scanner, type,
1529 ss_buffer (segment, seg_len),
1531 if (result == SCAN_SAVE)
1533 else if (result == SCAN_BACK)
1538 else if (result == SCAN_DONE)
1542 /* If we've reached the end of a line, or the end of a command, then pass
1543 the line to the output engine as a syntax text item. */
1544 int n_lines = state.newlines;
1545 if (state.last_segment == SEG_END_COMMAND && !src->suppress_next_newline)
1548 src->suppress_next_newline = true;
1550 else if (n_lines > 0 && src->suppress_next_newline)
1553 src->suppress_next_newline = false;
1555 for (int i = 0; i < n_lines; i++)
1557 /* Beginning of line. */
1558 const char *line = &src->buffer[src->journal_pos - src->tail];
1560 /* Calculate line length, including \n or \r\n end-of-line if present.
1562 We use src->head even though that may be beyond what we've actually
1563 converted to tokens (which is only through state.line_pos). That's
1564 because, if we're emitting the line due to SEG_END_COMMAND, we want to
1565 take the whole line through the newline, not just through the '.'. */
1566 size_t max_len = src->head - src->journal_pos;
1567 const char *newline = memchr (line, '\n', max_len);
1568 size_t line_len = newline ? newline - line + 1 : max_len;
1570 /* Calculate line length excluding end-of-line. */
1571 size_t copy_len = line_len;
1572 if (copy_len > 0 && line[copy_len - 1] == '\n')
1574 if (copy_len > 0 && line[copy_len - 1] == '\r')
1577 /* Submit the line as syntax. */
1578 output_item_submit (text_item_create_nocopy (TEXT_ITEM_SYNTAX,
1579 xmemdup0 (line, copy_len),
1582 src->journal_pos += line_len;
1585 token->token_len = state.seg_pos - src->seg_pos;
1587 src->segmenter = state.segmenter;
1588 src->seg_pos = state.seg_pos;
1589 src->line_pos = state.line_pos;
1590 src->n_newlines += state.newlines;
1592 switch (token->token.type)
1598 token->token.type = T_ENDCMD;
1602 case SCAN_BAD_HEX_LENGTH:
1603 lex_get_error (src, _("String of hex digits has %d characters, which "
1604 "is not a multiple of 2"),
1605 (int) token->token.number);
1608 case SCAN_BAD_HEX_DIGIT:
1609 case SCAN_BAD_UNICODE_DIGIT:
1610 lex_get_error (src, _("`%c' is not a valid hex digit"),
1611 (int) token->token.number);
1614 case SCAN_BAD_UNICODE_LENGTH:
1615 lex_get_error (src, _("Unicode string contains %d bytes, which is "
1616 "not in the valid range of 1 to 8 bytes"),
1617 (int) token->token.number);
1620 case SCAN_BAD_UNICODE_CODE_POINT:
1621 lex_get_error (src, _("U+%04X is not a valid Unicode code point"),
1622 (int) token->token.number);
1625 case SCAN_EXPECTED_QUOTE:
1626 lex_get_error (src, _("Unterminated string constant"));
1629 case SCAN_EXPECTED_EXPONENT:
1630 lex_get_error (src, _("Missing exponent following `%s'"),
1631 token->token.string.string);
1634 case SCAN_UNEXPECTED_CHAR:
1637 lex_get_error (src, _("Bad character %s in input"),
1638 uc_name (token->token.number, c_name));
1643 lex_source_pop_front (src);
1651 lex_source_get__ (struct lex_source *src)
1657 else if (lex_source_try_get (src))
1663 lex_source_get (const struct lex_source *src_)
1665 struct lex_source *src = CONST_CAST (struct lex_source *, src_);
1667 size_t old_count = deque_count (&src->deque);
1668 if (!lex_source_get__ (src))
1671 if (!settings_get_mexpand ())
1674 struct macro_expander *me;
1675 int retval = macro_expander_create (src->lexer->macros,
1676 &lex_source_front (src)->token, &me);
1679 if (!lex_source_get__ (src))
1681 /* This should not be reachable because we always get a T_STOP at the
1682 end of input and the macro_expander should always terminate
1683 expansion on T_STOP. */
1687 retval = macro_expander_add (me, &lex_source_front (src)->token);
1691 /* XXX handle case where there's a macro invocation starting from some
1692 later token we've already obtained */
1693 macro_expander_destroy (me);
1697 /* XXX handle case where the macro invocation doesn't use all the tokens */
1698 while (deque_count (&src->deque) > old_count)
1699 lex_source_pop_front (src);
1701 struct tokens expansion = { .tokens = NULL };
1702 macro_expander_get_expansion (me, &expansion);
1703 macro_expander_destroy (me);
1705 for (size_t i = 0; i < expansion.n; i++)
1707 *lex_push_token__ (src) = (struct lex_token) {
1708 .token = expansion.tokens[i],
1713 free (expansion.tokens);
1719 lex_source_push_endcmd__ (struct lex_source *src)
1721 struct lex_token *token = lex_push_token__ (src);
1722 token->token.type = T_ENDCMD;
1723 token->token_pos = 0;
1724 token->token_len = 0;
1725 token->line_pos = 0;
1726 token->first_line = 0;
1729 static struct lex_source *
1730 lex_source_create (struct lexer *lexer, struct lex_reader *reader)
1732 struct lex_source *src;
1734 src = xzalloc (sizeof *src);
1735 src->reader = reader;
1736 segmenter_init (&src->segmenter, reader->syntax);
1738 src->tokens = deque_init (&src->deque, 4, sizeof *src->tokens);
1740 lex_source_push_endcmd__ (src);
1746 lex_source_destroy (struct lex_source *src)
1748 char *file_name = src->reader->file_name;
1749 char *encoding = src->reader->encoding;
1750 if (src->reader->class->destroy != NULL)
1751 src->reader->class->destroy (src->reader);
1755 while (!deque_is_empty (&src->deque))
1756 lex_source_pop__ (src);
1758 ll_remove (&src->ll);
1762 struct lex_file_reader
1764 struct lex_reader reader;
1765 struct u8_istream *istream;
1768 static struct lex_reader_class lex_file_reader_class;
1770 /* Creates and returns a new lex_reader that will read from file FILE_NAME (or
1771 from stdin if FILE_NAME is "-"). The file is expected to be encoded with
1772 ENCODING, which should take one of the forms accepted by
1773 u8_istream_for_file(). SYNTAX and ERROR become the syntax mode and error
1774 mode of the new reader, respectively.
1776 Returns a null pointer if FILE_NAME cannot be opened. */
1778 lex_reader_for_file (const char *file_name, const char *encoding,
1779 enum segmenter_mode syntax,
1780 enum lex_error_mode error)
1782 struct lex_file_reader *r;
1783 struct u8_istream *istream;
1785 istream = (!strcmp(file_name, "-")
1786 ? u8_istream_for_fd (encoding, STDIN_FILENO)
1787 : u8_istream_for_file (encoding, file_name, O_RDONLY));
1788 if (istream == NULL)
1790 msg (ME, _("Opening `%s': %s."), file_name, strerror (errno));
1794 r = xmalloc (sizeof *r);
1795 lex_reader_init (&r->reader, &lex_file_reader_class);
1796 r->reader.syntax = syntax;
1797 r->reader.error = error;
1798 r->reader.file_name = xstrdup (file_name);
1799 r->reader.encoding = xstrdup_if_nonnull (encoding);
1800 r->reader.line_number = 1;
1801 r->istream = istream;
1806 static struct lex_file_reader *
1807 lex_file_reader_cast (struct lex_reader *r)
1809 return UP_CAST (r, struct lex_file_reader, reader);
1813 lex_file_read (struct lex_reader *r_, char *buf, size_t n,
1814 enum prompt_style prompt_style UNUSED)
1816 struct lex_file_reader *r = lex_file_reader_cast (r_);
1817 ssize_t n_read = u8_istream_read (r->istream, buf, n);
1820 msg (ME, _("Error reading `%s': %s."), r_->file_name, strerror (errno));
1827 lex_file_close (struct lex_reader *r_)
1829 struct lex_file_reader *r = lex_file_reader_cast (r_);
1831 if (u8_istream_fileno (r->istream) != STDIN_FILENO)
1833 if (u8_istream_close (r->istream) != 0)
1834 msg (ME, _("Error closing `%s': %s."), r_->file_name, strerror (errno));
1837 u8_istream_free (r->istream);
1842 static struct lex_reader_class lex_file_reader_class =
1848 struct lex_string_reader
1850 struct lex_reader reader;
1855 static struct lex_reader_class lex_string_reader_class;
1857 /* Creates and returns a new lex_reader for the contents of S, which must be
1858 encoded in the given ENCODING. The new reader takes ownership of S and will free it
1859 with ss_dealloc() when it is closed. */
1861 lex_reader_for_substring_nocopy (struct substring s, const char *encoding)
1863 struct lex_string_reader *r;
1865 r = xmalloc (sizeof *r);
1866 lex_reader_init (&r->reader, &lex_string_reader_class);
1867 r->reader.syntax = SEG_MODE_AUTO;
1868 r->reader.encoding = xstrdup_if_nonnull (encoding);
1875 /* Creates and returns a new lex_reader for a copy of null-terminated string S,
1876 which must be encoded in ENCODING. The caller retains ownership of S. */
1878 lex_reader_for_string (const char *s, const char *encoding)
1880 struct substring ss;
1881 ss_alloc_substring (&ss, ss_cstr (s));
1882 return lex_reader_for_substring_nocopy (ss, encoding);
1885 /* Formats FORMAT as a printf()-like format string and creates and returns a
1886 new lex_reader for the formatted result. */
1888 lex_reader_for_format (const char *format, const char *encoding, ...)
1890 struct lex_reader *r;
1893 va_start (args, encoding);
1894 r = lex_reader_for_substring_nocopy (ss_cstr (xvasprintf (format, args)), encoding);
1900 static struct lex_string_reader *
1901 lex_string_reader_cast (struct lex_reader *r)
1903 return UP_CAST (r, struct lex_string_reader, reader);
1907 lex_string_read (struct lex_reader *r_, char *buf, size_t n,
1908 enum prompt_style prompt_style UNUSED)
1910 struct lex_string_reader *r = lex_string_reader_cast (r_);
1913 chunk = MIN (n, r->s.length - r->offset);
1914 memcpy (buf, r->s.string + r->offset, chunk);
1921 lex_string_close (struct lex_reader *r_)
1923 struct lex_string_reader *r = lex_string_reader_cast (r_);
1929 static struct lex_reader_class lex_string_reader_class =