1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/lexer/macro.h"
25 #include "data/settings.h"
26 #include "language/lexer/lexer.h"
27 #include "language/lexer/segment.h"
28 #include "language/lexer/scan.h"
29 #include "libpspp/assertion.h"
30 #include "libpspp/cast.h"
31 #include "libpspp/i18n.h"
32 #include "libpspp/message.h"
33 #include "libpspp/str.h"
34 #include "libpspp/string-array.h"
35 #include "libpspp/string-map.h"
36 #include "libpspp/stringi-set.h"
38 #include "gl/c-ctype.h"
39 #include "gl/ftoastr.h"
42 #define _(msgid) gettext (msgid)
44 struct macro_expansion_stack
46 const struct macro_expansion_stack *next;
48 const char *file_name;
53 static void PRINTF_FORMAT (3, 4)
54 macro_error (const struct macro_expansion_stack *stack,
55 const struct macro_token *mt,
56 const char *format, ...)
58 struct msg_stack **ms = NULL;
59 size_t allocated_ms = 0;
62 for (const struct macro_expansion_stack *p = stack; p; p = p->next)
64 if (n_ms >= allocated_ms)
65 ms = x2nrealloc (ms, &allocated_ms, sizeof *ms);
67 /* TRANSLATORS: These strings are used for explaining the context of an
68 error. The "While expanding" message appears first, followed by zero
69 or more of the "inside expansion" messages. `innermost',
70 `next_inner`, etc., are names of macros, and `foobar' is a piece of
73 foo.sps:12: At `foobar' in the expansion of 'innermost',
74 foo.sps:23: inside the expansion of 'next_inner',
75 foo.sps:34: inside the expansion of 'next_inner2',
76 foo.sps:45: inside the expansion of 'outermost',
77 foo.sps:76: This is the actual error message. */
81 if (mt && mt->representation.length)
84 lex_ellipsize (mt->representation, syntax, sizeof syntax);
85 description = xasprintf (_("At `%s' in the expansion of `%s',"),
89 description = xasprintf (_("In the expansion of `%s',"), p->name);
92 description = xasprintf (_("inside the expansion of `%s',"), p->name);
94 ms[n_ms] = xmalloc (sizeof *ms[n_ms]);
95 *ms[n_ms] = (struct msg_stack) {
97 .file_name = xstrdup_if_nonnull (p->file_name),
98 .first_line = p->first_line,
99 .last_line = p->last_line,
101 .description = description,
107 va_start (args, format);
108 char *s = xvasprintf (format, args);
111 struct msg *m = xmalloc (sizeof *m);
113 .category = MSG_C_SYNTAX,
114 .severity = MSG_S_ERROR,
123 macro_token_copy (struct macro_token *dst, const struct macro_token *src)
125 token_copy (&dst->token, &src->token);
126 ss_alloc_substring (&dst->representation, src->representation);
130 macro_token_uninit (struct macro_token *mt)
132 token_uninit (&mt->token);
133 ss_dealloc (&mt->representation);
137 macro_token_to_representation (struct macro_token *mt, struct string *s)
139 ds_put_substring (s, mt->representation);
143 is_macro_keyword (struct substring s)
145 static struct stringi_set keywords = STRINGI_SET_INITIALIZER (keywords);
146 if (stringi_set_is_empty (&keywords))
148 static const char *kws[] = {
169 for (size_t i = 0; i < sizeof kws / sizeof *kws; i++)
170 stringi_set_insert (&keywords, kws[i]);
173 ss_ltrim (&s, ss_cstr ("!"));
174 return stringi_set_contains_len (&keywords, s.string, s.length);
178 macro_tokens_copy (struct macro_tokens *dst, const struct macro_tokens *src)
180 *dst = (struct macro_tokens) {
181 .mts = xmalloc (src->n * sizeof *dst->mts),
185 for (size_t i = 0; i < src->n; i++)
186 macro_token_copy (&dst->mts[i], &src->mts[i]);
190 macro_tokens_uninit (struct macro_tokens *mts)
192 for (size_t i = 0; i < mts->n; i++)
193 macro_token_uninit (&mts->mts[i]);
198 macro_tokens_add_uninit (struct macro_tokens *mts)
200 if (mts->n >= mts->allocated)
201 mts->mts = x2nrealloc (mts->mts, &mts->allocated, sizeof *mts->mts);
202 return &mts->mts[mts->n++];
206 macro_tokens_add (struct macro_tokens *mts, const struct macro_token *mt)
208 macro_token_copy (macro_tokens_add_uninit (mts), mt);
212 macro_tokens_from_string__ (struct macro_tokens *mts, const struct substring src,
213 enum segmenter_mode mode,
214 const struct macro_expansion_stack *stack)
218 struct segmenter segmenter;
219 struct substring body;
222 struct state state = {
223 .segmenter = segmenter_init (mode, true),
226 struct state saved = state;
228 while (state.body.length > 0)
230 struct macro_token mt = {
231 .token = { .type = T_STOP },
232 .representation = { .string = state.body.string },
234 struct token *token = &mt.token;
236 struct scanner scanner;
237 scanner_init (&scanner, token);
241 enum segment_type type;
242 int seg_len = segmenter_push (&state.segmenter, state.body.string,
243 state.body.length, true, &type);
244 assert (seg_len >= 0);
246 struct substring segment = ss_head (state.body, seg_len);
247 ss_advance (&state.body, seg_len);
249 enum scan_result result = scanner_push (&scanner, type, segment, token);
250 if (result == SCAN_SAVE)
252 else if (result == SCAN_BACK)
257 else if (result == SCAN_DONE)
261 /* We have a token in 'token'. */
262 mt.representation.length = state.body.string - mt.representation.string;
263 if (is_scan_type (token->type))
265 if (token->type != SCAN_SKIP)
267 char *s = scan_token_to_error (token);
270 mt.token.type = T_STRING;
271 macro_error (stack, &mt, "%s", s);
279 macro_tokens_add (mts, &mt);
280 token_uninit (token);
285 macro_tokens_from_string (struct macro_tokens *mts, const struct substring src,
286 enum segmenter_mode mode)
288 macro_tokens_from_string__ (mts, src, mode, NULL);
292 macro_tokens_print (const struct macro_tokens *mts, FILE *stream)
294 for (size_t i = 0; i < mts->n; i++)
295 token_print (&mts->mts[i].token, stream);
300 TC_ENDCMD, /* No space before or after (new-line after). */
301 TC_BINOP, /* Space on both sides. */
302 TC_COMMA, /* Space afterward. */
303 TC_ID, /* Don't need spaces except sequentially. */
304 TC_PUNCT, /* Don't need spaces except sequentially. */
308 needs_space (enum token_class prev, enum token_class next)
310 /* Don't need a space before or after the end of a command.
311 (A new-line is needed afterward as a special case.) */
312 if (prev == TC_ENDCMD || next == TC_ENDCMD)
315 /* Binary operators always have a space on both sides. */
316 if (prev == TC_BINOP || next == TC_BINOP)
319 /* A comma always has a space afterward. */
320 if (prev == TC_COMMA)
323 /* Otherwise, PREV is TC_ID or TC_PUNCT, which only need a space if there are
324 two or them in a row. */
328 static enum token_class
329 classify_token (enum token_type type)
382 macro_tokens_to_representation (struct macro_tokens *mts, struct string *s,
383 size_t *ofs, size_t *len)
385 assert ((ofs != NULL) == (len != NULL));
390 for (size_t i = 0; i < mts->n; i++)
394 enum token_type prev = mts->mts[i - 1].token.type;
395 enum token_type next = mts->mts[i].token.type;
397 if (prev == T_ENDCMD)
398 ds_put_byte (s, '\n');
401 enum token_class pc = classify_token (prev);
402 enum token_class nc = classify_token (next);
403 if (needs_space (pc, nc))
404 ds_put_byte (s, ' ');
409 ofs[i] = s->ss.length;
410 macro_token_to_representation (&mts->mts[i], s);
412 len[i] = s->ss.length - ofs[i];
417 macro_destroy (struct macro *m)
424 for (size_t i = 0; i < m->n_params; i++)
426 struct macro_param *p = &m->params[i];
429 macro_tokens_uninit (&p->def);
437 token_uninit (&p->charend);
441 token_uninit (&p->enclose[0]);
442 token_uninit (&p->enclose[1]);
450 macro_tokens_uninit (&m->body);
455 macro_set_create (void)
457 struct macro_set *set = xmalloc (sizeof *set);
458 *set = (struct macro_set) {
459 .macros = HMAP_INITIALIZER (set->macros),
465 macro_set_destroy (struct macro_set *set)
470 struct macro *macro, *next;
471 HMAP_FOR_EACH_SAFE (macro, next, struct macro, hmap_node, &set->macros)
473 hmap_delete (&set->macros, ¯o->hmap_node);
474 macro_destroy (macro);
476 hmap_destroy (&set->macros);
481 hash_macro_name (const char *name)
483 return utf8_hash_case_string (name, 0);
486 static struct macro *
487 macro_set_find__ (struct macro_set *set, const char *name)
490 HMAP_FOR_EACH_WITH_HASH (macro, struct macro, hmap_node,
491 hash_macro_name (name), &set->macros)
492 if (!utf8_strcasecmp (macro->name, name))
499 macro_set_find (const struct macro_set *set, const char *name)
501 return macro_set_find__ (CONST_CAST (struct macro_set *, set), name);
504 /* Adds M to SET. M replaces any existing macro with the same name. Takes
507 macro_set_add (struct macro_set *set, struct macro *m)
509 struct macro *victim = macro_set_find__ (set, m->name);
512 hmap_delete (&set->macros, &victim->hmap_node);
513 macro_destroy (victim);
516 hmap_insert (&set->macros, &m->hmap_node, hash_macro_name (m->name));
524 /* Accumulating tokens in me->params toward the end of any type of
528 /* Expecting the opening delimiter of an ARG_ENCLOSE argument. */
531 /* Expecting a keyword for a keyword argument. */
534 /* Expecting an equal sign for a keyword argument. */
539 struct macro_expander
541 const struct macro_set *macros;
546 const struct macro *macro;
547 struct macro_tokens **args;
548 const struct macro_param *param;
552 me_finished (struct macro_expander *me)
554 for (size_t i = 0; i < me->macro->n_params; i++)
557 me->args[i] = xmalloc (sizeof *me->args[i]);
558 macro_tokens_copy (me->args[i], &me->macro->params[i].def);
564 me_next_arg (struct macro_expander *me)
568 assert (!me->macro->n_params);
569 return me_finished (me);
571 else if (me->param->positional)
574 if (me->param >= &me->macro->params[me->macro->n_params])
575 return me_finished (me);
578 me->state = (!me->param->positional ? ME_KEYWORD
579 : me->param->arg_type == ARG_ENCLOSE ? ME_ENCLOSE
586 for (size_t i = 0; i < me->macro->n_params; i++)
589 me->state = ME_KEYWORD;
592 return me_finished (me);
597 me_error (struct macro_expander *me)
599 me->state = ME_ERROR;
604 me_add_arg (struct macro_expander *me, const struct macro_token *mt)
606 const struct macro_param *p = me->param;
608 const struct token *token = &mt->token;
609 if ((token->type == T_ENDCMD || token->type == T_STOP)
610 && p->arg_type != ARG_CMDEND)
612 msg (SE, _("Unexpected end of command reading argument %s "
613 "to macro %s."), me->param->name, me->macro->name);
615 return me_error (me);
620 struct macro_tokens **argp = &me->args[p - me->macro->params];
622 *argp = xzalloc (sizeof **argp);
623 struct macro_tokens *arg = *argp;
624 if (p->arg_type == ARG_N_TOKENS)
626 macro_tokens_add (arg, mt);
627 if (arg->n >= p->n_tokens)
628 return me_next_arg (me);
631 else if (p->arg_type == ARG_CMDEND)
633 if (token->type == T_ENDCMD || token->type == T_STOP)
634 return me_next_arg (me);
635 macro_tokens_add (arg, mt);
640 const struct token *end
641 = p->arg_type == ARG_CHAREND ? &p->charend : &p->enclose[1];
642 if (token_equal (token, end))
643 return me_next_arg (me);
644 macro_tokens_add (arg, mt);
650 me_expected (struct macro_expander *me, const struct macro_token *actual,
651 const struct token *expected)
653 const struct substring actual_s
654 = (actual->representation.length ? actual->representation
655 : ss_cstr (_("<end of input>")));
656 char *expected_s = token_to_string (expected);
657 msg (SE, _("Found `%.*s' while expecting `%s' reading argument %s "
659 (int) actual_s.length, actual_s.string, expected_s,
660 me->param->name, me->macro->name);
663 return me_error (me);
667 me_enclose (struct macro_expander *me, const struct macro_token *mt)
669 const struct token *token = &mt->token;
672 if (token_equal (&me->param->enclose[0], token))
678 return me_expected (me, mt, &me->param->enclose[0]);
681 static const struct macro_param *
682 macro_find_parameter_by_name (const struct macro *m, struct substring name)
684 ss_ltrim (&name, ss_cstr ("!"));
686 for (size_t i = 0; i < m->n_params; i++)
688 const struct macro_param *p = &m->params[i];
689 struct substring p_name = ss_cstr (p->name + 1);
690 if (!utf8_strncasecmp (p_name.string, p_name.length,
691 name.string, name.length))
698 me_keyword (struct macro_expander *me, const struct macro_token *mt)
700 const struct token *token = &mt->token;
701 if (token->type != T_ID)
702 return me_finished (me);
704 const struct macro_param *p = macro_find_parameter_by_name (me->macro,
708 size_t arg_index = p - me->macro->params;
710 if (me->args[arg_index])
713 _("Argument %s multiply specified in call to macro %s."),
714 p->name, me->macro->name);
715 return me_error (me);
719 me->state = ME_EQUALS;
723 return me_finished (me);
727 me_equals (struct macro_expander *me, const struct macro_token *mt)
729 const struct token *token = &mt->token;
732 if (token->type == T_EQUALS)
738 return me_expected (me, mt, &(struct token) { .type = T_EQUALS });
742 macro_expander_create (const struct macro_set *macros,
743 const struct token *token,
744 struct macro_expander **mep)
747 if (macro_set_is_empty (macros))
749 if (token->type != T_ID && token->type != T_MACRO_ID)
752 const struct macro *macro = macro_set_find (macros, token->string.string);
756 struct macro_expander *me = xmalloc (sizeof *me);
757 *me = (struct macro_expander) {
764 if (!macro->n_params)
768 me->state = (!macro->params[0].positional ? ME_KEYWORD
769 : macro->params[0].arg_type == ARG_ENCLOSE ? ME_ENCLOSE
771 me->args = xcalloc (macro->n_params, sizeof *me->args);
772 me->param = macro->params;
778 macro_expander_destroy (struct macro_expander *me)
783 for (size_t i = 0; i < me->macro->n_params; i++)
786 macro_tokens_uninit (me->args[i]);
793 /* Adds TOKEN to the collection of tokens in ME that potentially need to be
796 Returns -1 if the tokens added do not actually invoke a macro. The caller
797 should consume the first token without expanding it.
799 Returns 0 if the macro expander needs more tokens, for macro arguments or to
800 decide whether this is actually a macro invocation. The caller should call
801 macro_expander_add() again with the next token.
803 Returns a positive number to indicate that the returned number of tokens
804 invoke a macro. The number returned might be less than the number of tokens
805 added because it can take a few tokens of lookahead to determine whether the
806 macro invocation is finished. The caller should call
807 macro_expander_get_expansion() to obtain the expansion. */
809 macro_expander_add (struct macro_expander *me, const struct macro_token *mt)
817 return me_add_arg (me, mt);
820 return me_enclose (me, mt);
823 return me_keyword (me, mt);
826 return me_equals (me, mt);
833 /* Each argument to a macro function is one of:
835 - A quoted string or other single literal token.
837 - An argument to the macro being expanded, e.g. !1 or a named argument.
841 - A function invocation.
843 Each function invocation yields a character sequence to be turned into a
844 sequence of tokens. The case where that character sequence is a single
845 quoted string is an important special case.
847 struct parse_macro_function_ctx
849 const struct macro_token *input;
851 int nesting_countdown;
852 enum segmenter_mode segmenter_mode;
853 const struct macro_set *macros;
854 const struct macro_expander *me;
855 const struct macro_expansion_stack *stack;
856 struct string_map *vars;
861 macro_expand (const struct macro_tokens *, int nesting_countdown,
862 enum segmenter_mode segmenter_mode, const struct macro_set *,
863 const struct macro_expander *, struct string_map *vars,
864 const struct macro_expansion_stack *stack,
865 bool *expand, bool *break_,
866 struct macro_tokens *exp);
869 expand_macro_function (struct parse_macro_function_ctx *ctx,
870 struct string *output, size_t *input_consumed);
872 /* Returns true if the pair of tokens starting at offset OFS within MTS are !*,
875 is_bang_star (const struct macro_token *mts, size_t n, size_t ofs)
878 && mts[ofs].token.type == T_MACRO_ID
879 && ss_equals (mts[ofs].token.string, ss_cstr ("!"))
880 && mts[ofs + 1].token.type == T_ASTERISK);
884 parse_function_arg (struct parse_macro_function_ctx *ctx,
885 size_t i, struct string *farg)
887 const struct macro_token *tokens = ctx->input;
888 const struct token *token = &tokens[i].token;
889 if (token->type == T_MACRO_ID)
891 const struct macro_param *param = macro_find_parameter_by_name (
892 ctx->me->macro, token->string);
895 size_t param_idx = param - ctx->me->macro->params;
896 const struct macro_tokens *marg = ctx->me->args[param_idx];
897 for (size_t i = 0; i < marg->n; i++)
900 ds_put_byte (farg, ' ');
901 ds_put_substring (farg, marg->mts[i].representation);
906 if (is_bang_star (ctx->input, ctx->n_input, i))
908 for (size_t i = 0; i < ctx->me->macro->n_params; i++)
910 if (!ctx->me->macro->params[i].positional)
913 const struct macro_tokens *marg = ctx->me->args[i];
914 for (size_t j = 0; j < marg->n; j++)
917 ds_put_byte (farg, ' ');
918 ds_put_substring (farg, marg->mts[j].representation);
926 const char *value = string_map_find__ (ctx->vars,
927 token->string.string,
928 token->string.length);
931 ds_put_cstr (farg, value);
936 struct parse_macro_function_ctx subctx = {
937 .input = &ctx->input[i],
938 .n_input = ctx->n_input - i,
939 .nesting_countdown = ctx->nesting_countdown,
940 .segmenter_mode = ctx->segmenter_mode,
941 .macros = ctx->macros,
945 .expand = ctx->expand,
947 size_t subinput_consumed;
948 if (expand_macro_function (&subctx, farg, &subinput_consumed))
949 return subinput_consumed;
952 ds_put_substring (farg, tokens[i].representation);
957 parse_macro_function (struct parse_macro_function_ctx *ctx,
958 struct string_array *args,
959 struct substring function,
960 int min_args, int max_args,
961 size_t *input_consumed)
963 const struct macro_token *tokens = ctx->input;
964 size_t n_tokens = ctx->n_input;
967 || tokens[0].token.type != T_MACRO_ID
968 || !ss_equals_case (tokens[0].token.string, function)) /* XXX abbrevs allowed */
971 if (n_tokens < 2 || tokens[1].token.type != T_LPAREN)
973 macro_error (ctx->stack, n_tokens > 1 ? &tokens[1] : NULL,
974 _("`(' expected following %s."), function.string);
978 string_array_init (args);
980 for (size_t i = 2;; )
984 if (tokens[i].token.type == T_RPAREN)
986 *input_consumed = i + 1;
987 if (args->n < min_args || args->n > max_args)
989 macro_error (ctx->stack, &tokens[i],
990 _("Wrong number of arguments to macro function %s."),
997 struct string s = DS_EMPTY_INITIALIZER;
998 i += parse_function_arg (ctx, i, &s);
1002 goto unexpected_end;
1004 string_array_append_nocopy (args, ds_steal_cstr (&s));
1006 if (tokens[i].token.type == T_COMMA)
1008 else if (tokens[i].token.type != T_RPAREN)
1010 macro_error (ctx->stack, &tokens[i],
1011 _("`,' or `)' expected in call to macro function %s."),
1018 macro_error (ctx->stack, NULL, _("Missing `)' in call to macro function %s."),
1022 string_array_destroy (args);
1027 unquote_string (const char *s, enum segmenter_mode segmenter_mode,
1028 struct string *content)
1030 struct string_lexer slex;
1031 string_lexer_init (&slex, s, strlen (s), segmenter_mode, true);
1033 struct token token1;
1034 if (!string_lexer_next (&slex, &token1))
1037 if (token1.type != T_STRING)
1039 token_uninit (&token1);
1043 struct token token2;
1044 if (string_lexer_next (&slex, &token2))
1046 token_uninit (&token1);
1047 token_uninit (&token2);
1051 ds_put_substring (content, token1.string);
1052 token_uninit (&token1);
1057 unquote_string_in_place (const char *s, enum segmenter_mode segmenter_mode,
1060 ds_init_empty (tmp);
1061 return unquote_string (s, segmenter_mode, tmp) ? ds_cstr (tmp) : s;
1065 parse_integer (const char *s, int *np)
1070 long int n = strtol (s, &tail, 10);
1071 *np = n < INT_MIN ? INT_MIN : n > INT_MAX ? INT_MAX : n;
1072 tail += strspn (tail, CC_SPACES);
1073 return *tail == '\0' && errno != ERANGE && n == *np;
1077 expand_macro_function (struct parse_macro_function_ctx *ctx,
1078 struct string *output,
1079 size_t *input_consumed)
1081 struct string_array args;
1083 if (parse_macro_function (ctx, &args, ss_cstr ("!LENGTH"), 1, 1,
1085 ds_put_format (output, "%zu", strlen (args.strings[0]));
1086 else if (parse_macro_function (ctx, &args, ss_cstr ("!BLANKS"), 1, 1,
1090 if (!parse_integer (args.strings[0], &n))
1092 macro_error (ctx->stack, NULL,
1093 _("Argument to !BLANKS must be non-negative integer "
1094 "(not \"%s\")."), args.strings[0]);
1095 string_array_destroy (&args);
1099 ds_put_byte_multiple (output, ' ', n);
1101 else if (parse_macro_function (ctx, &args, ss_cstr ("!CONCAT"), 1, INT_MAX,
1104 for (size_t i = 0; i < args.n; i++)
1105 if (!unquote_string (args.strings[i], ctx->segmenter_mode, output))
1106 ds_put_cstr (output, args.strings[i]);
1108 else if (parse_macro_function (ctx, &args, ss_cstr ("!HEAD"), 1, 1,
1112 const char *s = unquote_string_in_place (args.strings[0],
1113 ctx->segmenter_mode, &tmp);
1115 struct macro_tokens mts = { .n = 0 };
1116 macro_tokens_from_string__ (&mts, ss_cstr (s), ctx->segmenter_mode,
1119 ds_put_substring (output, mts.mts[0].representation);
1120 macro_tokens_uninit (&mts);
1123 else if (parse_macro_function (ctx, &args, ss_cstr ("!INDEX"), 2, 2,
1126 const char *haystack = args.strings[0];
1127 const char *needle = strstr (haystack, args.strings[1]);
1128 ds_put_format (output, "%zu", needle ? needle - haystack + 1 : 0);
1130 else if (parse_macro_function (ctx, &args, ss_cstr ("!QUOTE"), 1, 1,
1133 if (unquote_string (args.strings[0], ctx->segmenter_mode, NULL))
1134 ds_put_cstr (output, args.strings[0]);
1137 ds_extend (output, strlen (args.strings[0]) + 2);
1138 ds_put_byte (output, '\'');
1139 for (const char *p = args.strings[0]; *p; p++)
1142 ds_put_byte (output, '\'');
1143 ds_put_byte (output, *p);
1145 ds_put_byte (output, '\'');
1148 else if (parse_macro_function (ctx, &args, ss_cstr ("!SUBSTR"), 2, 3,
1152 if (!parse_integer (args.strings[1], &start) || start < 1)
1154 macro_error (ctx->stack, NULL,
1155 _("Second argument of !SUBSTR must be "
1156 "positive integer (not \"%s\")."),
1158 string_array_destroy (&args);
1162 int count = INT_MAX;
1163 if (args.n > 2 && (!parse_integer (args.strings[2], &count) || count < 0))
1165 macro_error (ctx->stack, NULL,
1166 _("Third argument of !SUBSTR must be "
1167 "non-negative integer (not \"%s\")."),
1169 string_array_destroy (&args);
1173 struct substring s = ss_cstr (args.strings[0]);
1174 ds_put_substring (output, ss_substr (s, start - 1, count));
1176 else if (parse_macro_function (ctx, &args, ss_cstr ("!TAIL"), 1, 1,
1180 const char *s = unquote_string_in_place (args.strings[0],
1181 ctx->segmenter_mode, &tmp);
1183 struct macro_tokens mts = { .n = 0 };
1184 macro_tokens_from_string__ (&mts, ss_cstr (s), ctx->segmenter_mode,
1188 struct macro_tokens tail = { .mts = mts.mts + 1, .n = mts.n - 1 };
1189 macro_tokens_to_representation (&tail, output, NULL, NULL);
1191 macro_tokens_uninit (&mts);
1194 else if (parse_macro_function (ctx, &args, ss_cstr ("!UNQUOTE"), 1, 1,
1197 if (!unquote_string (args.strings[0], ctx->segmenter_mode, output))
1198 ds_put_cstr (output, args.strings[0]);
1200 else if (parse_macro_function (ctx, &args, ss_cstr ("!UPCASE"), 1, 1,
1204 const char *s = unquote_string_in_place (args.strings[0],
1205 ctx->segmenter_mode, &tmp);
1206 char *upper = utf8_to_upper (s);
1207 ds_put_cstr (output, upper);
1211 else if (parse_macro_function (ctx, &args, ss_cstr ("!EVAL"), 1, 1,
1214 struct macro_tokens mts = { .n = 0 };
1215 macro_tokens_from_string__ (&mts, ss_cstr (args.strings[0]),
1216 ctx->segmenter_mode, ctx->stack);
1217 struct macro_tokens exp = { .n = 0 };
1218 macro_expand (&mts, ctx->nesting_countdown - 1, ctx->segmenter_mode,
1219 ctx->macros, ctx->me, ctx->vars,
1220 &(struct macro_expansion_stack) {
1223 }, ctx->expand, NULL, &exp);
1224 macro_tokens_to_representation (&exp, output, NULL, NULL);
1225 macro_tokens_uninit (&exp);
1226 macro_tokens_uninit (&mts);
1228 else if (ctx->n_input > 0
1229 && ctx->input[0].token.type == T_MACRO_ID
1230 && ss_equals_case (ctx->input[0].token.string, ss_cstr ("!NULL")))
1232 *input_consumed = 1;
1238 string_array_destroy (&args);
1244 int nesting_countdown;
1245 enum segmenter_mode segmenter_mode;
1246 const struct macro_set *macros;
1247 const struct macro_expander *me;
1248 const struct macro_expansion_stack *stack;
1249 struct string_map *vars;
1253 static char *macro_evaluate_or (const struct expr_context *ctx,
1254 const struct macro_token **tokens,
1255 const struct macro_token *end);
1258 macro_evaluate_literal (const struct expr_context *ctx,
1259 const struct macro_token **tokens,
1260 const struct macro_token *end)
1262 const struct macro_token *p = *tokens;
1265 if (p->token.type == T_LPAREN)
1268 char *value = macro_evaluate_or (ctx, &p, end);
1271 if (p >= end || p->token.type != T_RPAREN)
1274 macro_error (ctx->stack, p < end ? p : NULL,
1275 _("Expecting ')' in macro expression."));
1282 else if (p->token.type == T_RPAREN)
1284 macro_error (ctx->stack, p, _("Expecting literal or function invocation "
1285 "in macro expression."));
1289 struct parse_macro_function_ctx fctx = {
1292 .nesting_countdown = ctx->nesting_countdown,
1293 .segmenter_mode = ctx->segmenter_mode,
1294 .macros = ctx->macros,
1296 .stack = ctx->stack,
1298 .expand = ctx->expand,
1300 struct string function_output = DS_EMPTY_INITIALIZER;
1301 size_t function_consumed = parse_function_arg (&fctx, 0, &function_output);
1302 struct string unquoted = DS_EMPTY_INITIALIZER;
1303 if (unquote_string (ds_cstr (&function_output), ctx->segmenter_mode,
1306 ds_swap (&function_output, &unquoted);
1307 ds_destroy (&unquoted);
1309 *tokens = p + function_consumed;
1310 return ds_steal_cstr (&function_output);
1313 /* Returns true if MT is valid as a macro operator. Only operators written as
1314 symbols (e.g. <>) are usable in macro expressions, not operator written as
1315 letters (e.g. EQ). */
1317 is_macro_operator (const struct macro_token *mt)
1319 return (mt->representation.length > 0
1320 && !c_isalpha (mt->representation.string[0]));
1323 static enum token_type
1324 parse_relational_op (const struct macro_token *mt)
1326 switch (mt->token.type)
1336 return is_macro_operator (mt) ? mt->token.type : T_STOP;
1339 return (ss_equals_case (mt->token.string, ss_cstr ("!EQ")) ? T_EQ
1340 : ss_equals_case (mt->token.string, ss_cstr ("!NE")) ? T_NE
1341 : ss_equals_case (mt->token.string, ss_cstr ("!LT")) ? T_LT
1342 : ss_equals_case (mt->token.string, ss_cstr ("!GT")) ? T_GT
1343 : ss_equals_case (mt->token.string, ss_cstr ("!LE")) ? T_LE
1344 : ss_equals_case (mt->token.string, ss_cstr ("!GE")) ? T_GE
1353 macro_evaluate_relational (const struct expr_context *ctx,
1354 const struct macro_token **tokens,
1355 const struct macro_token *end)
1357 const struct macro_token *p = *tokens;
1358 char *lhs = macro_evaluate_literal (ctx, &p, end);
1362 enum token_type op = p >= end ? T_STOP : parse_relational_op (p);
1370 char *rhs = macro_evaluate_literal (ctx, &p, end);
1377 struct string lhs_tmp, rhs_tmp;
1378 int cmp = strcmp (unquote_string_in_place (lhs, ctx->segmenter_mode,
1380 unquote_string_in_place (rhs, ctx->segmenter_mode,
1382 ds_destroy (&lhs_tmp);
1383 ds_destroy (&rhs_tmp);
1388 bool b = (op == T_EQUALS || op == T_EQ ? !cmp
1390 : op == T_LT ? cmp < 0
1391 : op == T_GT ? cmp > 0
1392 : op == T_LE ? cmp <= 0
1393 : /* T_GE */ cmp >= 0);
1396 return xstrdup (b ? "1" : "0");
1400 macro_evaluate_not (const struct expr_context *ctx,
1401 const struct macro_token **tokens,
1402 const struct macro_token *end)
1404 const struct macro_token *p = *tokens;
1406 unsigned int negations = 0;
1408 && (ss_equals_case (p->representation, ss_cstr ("!NOT"))
1409 || ss_equals (p->representation, ss_cstr ("~"))))
1415 char *operand = macro_evaluate_relational (ctx, &p, end);
1416 if (!operand || !negations)
1422 bool b = strcmp (operand, "0") ^ (negations & 1);
1425 return xstrdup (b ? "1" : "0");
1429 macro_evaluate_and (const struct expr_context *ctx,
1430 const struct macro_token **tokens,
1431 const struct macro_token *end)
1433 const struct macro_token *p = *tokens;
1434 char *lhs = macro_evaluate_not (ctx, &p, end);
1439 && (ss_equals_case (p->representation, ss_cstr ("!AND"))
1440 || ss_equals (p->representation, ss_cstr ("&"))))
1443 char *rhs = macro_evaluate_not (ctx, &p, end);
1450 bool b = strcmp (lhs, "0") && strcmp (rhs, "0");
1453 lhs = xstrdup (b ? "1" : "0");
1460 macro_evaluate_or (const struct expr_context *ctx,
1461 const struct macro_token **tokens,
1462 const struct macro_token *end)
1464 const struct macro_token *p = *tokens;
1465 char *lhs = macro_evaluate_and (ctx, &p, end);
1470 && (ss_equals_case (p->representation, ss_cstr ("!OR"))
1471 || ss_equals (p->representation, ss_cstr ("|"))))
1474 char *rhs = macro_evaluate_and (ctx, &p, end);
1481 bool b = strcmp (lhs, "0") || strcmp (rhs, "0");
1484 lhs = xstrdup (b ? "1" : "0");
1491 macro_evaluate_expression (const struct macro_token **tokens, size_t n_tokens,
1492 int nesting_countdown,
1493 enum segmenter_mode segmenter_mode,
1494 const struct macro_set *macros,
1495 const struct macro_expander *me,
1496 const struct macro_expansion_stack *stack,
1497 struct string_map *vars, bool *expand)
1499 const struct expr_context ctx = {
1500 .nesting_countdown = nesting_countdown,
1501 .segmenter_mode = segmenter_mode,
1508 return macro_evaluate_or (&ctx, tokens, *tokens + n_tokens);
1512 macro_evaluate_number (const struct macro_token **tokens, size_t n_tokens,
1513 int nesting_countdown,
1514 enum segmenter_mode segmenter_mode,
1515 const struct macro_set *macros,
1516 const struct macro_expander *me,
1517 const struct macro_expansion_stack *stack,
1518 struct string_map *vars,
1519 bool *expand, double *number)
1521 char *s = macro_evaluate_expression (tokens, n_tokens, nesting_countdown,
1522 segmenter_mode, macros, me, stack, vars,
1527 struct macro_tokens mts = { .n = 0 };
1528 macro_tokens_from_string__ (&mts, ss_cstr (s), segmenter_mode, stack);
1529 if (mts.n != 1 || !token_is_number (&mts.mts[0].token))
1531 macro_error (stack, mts.n > 0 ? &mts.mts[0] : NULL,
1532 _("Macro expression must evaluate to "
1533 "a number (not \"%s\")."), s);
1535 macro_tokens_uninit (&mts);
1539 *number = token_number (&mts.mts[0].token);
1541 macro_tokens_uninit (&mts);
1545 static const struct macro_token *
1546 find_ifend_clause (const struct macro_token *p, const struct macro_token *end)
1549 for (; p < end; p++)
1551 if (p->token.type != T_MACRO_ID)
1554 if (ss_equals_case (p->token.string, ss_cstr ("!IF")))
1556 else if (ss_equals_case (p->token.string, ss_cstr ("!IFEND")))
1562 else if (ss_equals_case (p->token.string, ss_cstr ("!ELSE")) && !nesting)
1569 macro_expand_if (const struct macro_token *tokens, size_t n_tokens,
1570 int nesting_countdown, enum segmenter_mode segmenter_mode,
1571 const struct macro_set *macros,
1572 const struct macro_expander *me,
1573 const struct macro_expansion_stack *stack,
1574 struct string_map *vars,
1575 bool *expand, bool *break_, struct macro_tokens *exp)
1577 const struct macro_token *p = tokens;
1578 const struct macro_token *end = tokens + n_tokens;
1580 if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!IF")))
1584 char *result = macro_evaluate_expression (&p, end - p,
1585 nesting_countdown, segmenter_mode,
1587 stack, vars, expand);
1590 bool b = strcmp (result, "0");
1594 || p->token.type != T_MACRO_ID
1595 || !ss_equals_case (p->token.string, ss_cstr ("!THEN")))
1597 macro_error (stack, p < end ? p : NULL,
1598 _("!THEN expected in macro !IF construct."));
1602 const struct macro_token *start_then = p + 1;
1603 const struct macro_token *end_then = find_ifend_clause (start_then, end);
1606 macro_error (stack, NULL,
1607 _("!ELSE or !IFEND expected in macro !IF construct."));
1611 const struct macro_token *start_else, *end_if;
1612 if (ss_equals_case (end_then->token.string, ss_cstr ("!ELSE")))
1614 start_else = end_then + 1;
1615 end_if = find_ifend_clause (start_else, end);
1617 || !ss_equals_case (end_if->token.string, ss_cstr ("!IFEND")))
1619 macro_error (stack, end_if ? end_if : NULL,
1620 _("!IFEND expected in macro !IF construct."));
1630 const struct macro_token *start;
1635 n = end_then - start_then;
1637 else if (start_else)
1640 n = end_if - start_else;
1650 struct macro_tokens mts = {
1651 .mts = CONST_CAST (struct macro_token *, start),
1654 macro_expand (&mts, nesting_countdown, segmenter_mode, macros, me, vars,
1655 &(struct macro_expansion_stack) {
1659 expand, break_, exp);
1661 return (end_if + 1) - tokens;
1665 macro_parse_let (const struct macro_token *tokens, size_t n_tokens,
1666 int nesting_countdown, enum segmenter_mode segmenter_mode,
1667 const struct macro_set *macros,
1668 const struct macro_expander *me,
1669 const struct macro_expansion_stack *stack,
1670 struct string_map *vars, bool *expand)
1672 const struct macro_token *p = tokens;
1673 const struct macro_token *end = tokens + n_tokens;
1675 if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!LET")))
1679 if (p >= end || p->token.type != T_MACRO_ID)
1681 macro_error (stack, p < end ? p : NULL,
1682 _("Expected macro variable name following !LET."));
1685 const struct substring var_name = p->token.string;
1686 if (is_macro_keyword (var_name)
1687 || macro_find_parameter_by_name (me->macro, var_name))
1689 macro_error (stack, p < end ? p : NULL,
1690 _("Cannot use argument name or macro keyword "
1691 "\"%.*s\" as !LET variable."),
1692 (int) var_name.length, var_name.string);
1697 if (p >= end || p->token.type != T_EQUALS)
1699 macro_error (stack, p < end ? p : NULL,
1700 _("Expected `=' following !LET."));
1705 char *value = macro_evaluate_expression (&p, end - p, nesting_countdown,
1706 segmenter_mode, macros, me, stack,
1711 string_map_replace_nocopy (vars, ss_xstrdup (var_name), value);
1715 static const struct macro_token *
1716 find_doend (const struct macro_expansion_stack *stack,
1717 const struct macro_token *p, const struct macro_token *end)
1720 for (; p < end; p++)
1722 if (p->token.type != T_MACRO_ID)
1725 if (ss_equals_case (p->token.string, ss_cstr ("!DO")))
1727 else if (ss_equals_case (p->token.string, ss_cstr ("!DOEND")))
1734 macro_error (stack, NULL, _("Missing !DOEND."));
1739 macro_expand_do (const struct macro_token *tokens, size_t n_tokens,
1740 int nesting_countdown, enum segmenter_mode segmenter_mode,
1741 const struct macro_set *macros,
1742 const struct macro_expander *me,
1743 const struct macro_expansion_stack *stack,
1744 struct string_map *vars,
1745 bool *expand, struct macro_tokens *exp)
1747 const struct macro_token *p = tokens;
1748 const struct macro_token *end = tokens + n_tokens;
1750 if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!DO")))
1754 if (p >= end || p->token.type != T_MACRO_ID)
1756 macro_error (stack, p < end ? p : NULL,
1757 _("Expected macro variable name following !DO."));
1760 const struct substring var_name = p->token.string;
1761 if (is_macro_keyword (var_name)
1762 || macro_find_parameter_by_name (me->macro, var_name))
1764 macro_error (stack, p, _("Cannot use argument name or macro "
1765 "keyword as !DO variable."));
1770 struct macro_expansion_stack next_stack = {
1771 .name = "!DO", .next = stack,
1773 int miterate = settings_get_miterate ();
1774 if (p < end && p->token.type == T_MACRO_ID
1775 && ss_equals_case (p->token.string, ss_cstr ("!IN")))
1778 char *list = macro_evaluate_expression (&p, end - p, nesting_countdown,
1779 segmenter_mode, macros, me,
1780 &next_stack, vars, expand);
1784 struct macro_tokens items = { .n = 0 };
1785 macro_tokens_from_string__ (&items, ss_cstr (list), segmenter_mode,
1789 const struct macro_token *do_end = find_doend (stack, p, end);
1792 macro_tokens_uninit (&items);
1796 const struct macro_tokens inner = {
1797 .mts = CONST_CAST (struct macro_token *, p),
1800 for (size_t i = 0; i < items.n; i++)
1804 macro_error (stack, NULL,
1805 _("!DO loop over list exceeded "
1806 "maximum number of iterations %d. "
1807 "(Use SET MITERATE to change the limit.)"),
1811 string_map_replace_nocopy (vars, ss_xstrdup (var_name),
1812 ss_xstrdup (items.mts[i].representation));
1814 bool break_ = false;
1815 macro_expand (&inner, nesting_countdown, segmenter_mode, macros,
1816 me, vars, &next_stack, expand, &break_, exp);
1820 return do_end - tokens + 1;
1822 else if (p < end && p->token.type == T_EQUALS)
1826 if (!macro_evaluate_number (&p, end - p, nesting_countdown,
1827 segmenter_mode, macros, me, &next_stack,
1828 vars, expand, &first))
1831 if (p >= end || p->token.type != T_MACRO_ID
1832 || !ss_equals_case (p->token.string, ss_cstr ("!TO")))
1834 macro_error (stack, p < end ? p : NULL,
1835 _("Expected !TO in numerical !DO loop."));
1841 if (!macro_evaluate_number (&p, end - p, nesting_countdown,
1842 segmenter_mode, macros, me, &next_stack,
1843 vars, expand, &last))
1847 if (p < end && p->token.type == T_MACRO_ID
1848 && ss_equals_case (p->token.string, ss_cstr ("!BY")))
1851 if (!macro_evaluate_number (&p, end - p, nesting_countdown,
1852 segmenter_mode, macros, me, &next_stack,
1858 macro_error (stack, NULL, _("!BY value cannot be zero."));
1863 const struct macro_token *do_end = find_doend (stack, p, end);
1866 const struct macro_tokens inner = {
1867 .mts = CONST_CAST (struct macro_token *, p),
1871 if ((by > 0 && first <= last) || (by < 0 && first >= last))
1874 for (double index = first;
1875 by > 0 ? (index <= last) : (index >= last);
1880 macro_error (stack, NULL,
1881 _("Numerical !DO loop exceeded "
1882 "maximum number of iterations %d. "
1883 "(Use SET MITERATE to change the limit.)"),
1888 char index_s[DBL_BUFSIZE_BOUND];
1889 c_dtoastr (index_s, sizeof index_s, 0, 0, index);
1890 string_map_replace_nocopy (vars, ss_xstrdup (var_name),
1893 bool break_ = false;
1894 macro_expand (&inner, nesting_countdown, segmenter_mode, macros,
1895 me, vars, &next_stack, expand, &break_, exp);
1901 return do_end - tokens + 1;
1905 macro_error (stack, p < end ? p : NULL,
1906 _("Expected `=' or !IN in !DO loop."));
1912 macro_expand (const struct macro_tokens *mts, int nesting_countdown,
1913 enum segmenter_mode segmenter_mode,
1914 const struct macro_set *macros,
1915 const struct macro_expander *me, struct string_map *vars,
1916 const struct macro_expansion_stack *stack,
1917 bool *expand, bool *break_, struct macro_tokens *exp)
1919 if (nesting_countdown <= 0)
1921 macro_error (stack, NULL, _("Maximum nesting level %d exceeded. "
1922 "(Use SET MNEST to change the limit.)"),
1923 settings_get_mnest ());
1924 for (size_t i = 0; i < mts->n; i++)
1925 macro_tokens_add (exp, &mts->mts[i]);
1929 struct string_map own_vars = STRING_MAP_INITIALIZER (own_vars);
1933 for (size_t i = 0; i < mts->n && (!break_ || !*break_); i++)
1935 const struct macro_token *mt = &mts->mts[i];
1936 const struct token *token = &mt->token;
1937 if (token->type == T_MACRO_ID && me)
1939 const struct macro_param *param = macro_find_parameter_by_name (
1940 me->macro, token->string);
1943 const struct macro_tokens *arg = me->args[param - me->macro->params];
1944 if (*expand && param->expand_arg)
1945 macro_expand (arg, nesting_countdown, segmenter_mode,
1947 &(struct macro_expansion_stack) {
1948 .name = param->name,
1950 }, expand, break_, exp);
1952 for (size_t i = 0; i < arg->n; i++)
1953 macro_tokens_add (exp, &arg->mts[i]);
1957 if (is_bang_star (mts->mts, mts->n, i))
1959 for (size_t j = 0; j < me->macro->n_params; j++)
1961 const struct macro_param *param = &me->macro->params[j];
1962 if (!param->positional)
1965 const struct macro_tokens *arg = me->args[j];
1966 if (*expand && param->expand_arg)
1967 macro_expand (arg, nesting_countdown, segmenter_mode,
1969 &(struct macro_expansion_stack) {
1972 }, expand, break_, exp);
1974 for (size_t k = 0; k < arg->n; k++)
1975 macro_tokens_add (exp, &arg->mts[k]);
1981 size_t n = macro_expand_if (&mts->mts[i], mts->n - i,
1982 nesting_countdown, segmenter_mode,
1984 vars, expand, break_, exp);
1992 if (token->type == T_MACRO_ID && vars)
1994 const char *value = string_map_find__ (vars, token->string.string,
1995 token->string.length);
1998 macro_tokens_from_string__ (exp, ss_cstr (value), segmenter_mode,
2006 struct macro_expander *subme;
2007 int retval = macro_expander_create (macros, token, &subme);
2008 for (size_t j = 1; !retval; j++)
2010 const struct macro_token endcmd = { .token = { .type = T_ENDCMD } };
2011 retval = macro_expander_add (
2012 subme, i + j < mts->n ? &mts->mts[i + j] : &endcmd);
2017 macro_expand (&subme->macro->body, nesting_countdown - 1,
2018 segmenter_mode, macros, subme, NULL,
2019 &(struct macro_expansion_stack) {
2020 .name = subme->macro->name,
2021 .file_name = subme->macro->file_name,
2022 .first_line = subme->macro->first_line,
2023 .last_line = subme->macro->last_line,
2025 }, expand, break_, exp);
2026 macro_expander_destroy (subme);
2030 macro_expander_destroy (subme);
2033 if (token->type != T_MACRO_ID)
2035 macro_tokens_add (exp, mt);
2039 if (ss_equals_case (token->string, ss_cstr ("!break")))
2042 macro_error (stack, mt, _("!BREAK outside !DO."));
2050 struct parse_macro_function_ctx ctx = {
2051 .input = &mts->mts[i],
2052 .n_input = mts->n - i,
2053 .nesting_countdown = nesting_countdown,
2054 .segmenter_mode = segmenter_mode,
2061 struct string function_output = DS_EMPTY_INITIALIZER;
2062 size_t function_consumed;
2063 if (expand_macro_function (&ctx, &function_output, &function_consumed))
2065 i += function_consumed - 1;
2067 macro_tokens_from_string__ (exp, function_output.ss, segmenter_mode,
2069 ds_destroy (&function_output);
2074 size_t n = macro_parse_let (&mts->mts[i], mts->n - i,
2075 nesting_countdown, segmenter_mode,
2076 macros, me, stack, vars, expand);
2083 n = macro_expand_do (&mts->mts[i], mts->n - i,
2084 nesting_countdown, segmenter_mode,
2085 macros, me, stack, vars, expand, exp);
2092 if (ss_equals_case (token->string, ss_cstr ("!onexpand")))
2094 else if (ss_equals_case (token->string, ss_cstr ("!offexpand")))
2097 macro_tokens_add (exp, mt);
2099 if (vars == &own_vars)
2100 string_map_destroy (&own_vars);
2104 macro_expander_get_expansion (struct macro_expander *me,
2105 enum segmenter_mode segmenter_mode,
2106 struct macro_tokens *exp)
2109 struct macro_expansion_stack stack = {
2110 .name = me->macro->name,
2111 .file_name = me->macro->file_name,
2112 .first_line = me->macro->first_line,
2113 .last_line = me->macro->last_line,
2115 macro_expand (&me->macro->body, settings_get_mnest (), segmenter_mode,
2116 me->macros, me, NULL, &stack, &expand, NULL, exp);