1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/lexer/macro.h"
25 #include "data/settings.h"
26 #include "language/lexer/lexer.h"
27 #include "language/lexer/segment.h"
28 #include "language/lexer/scan.h"
29 #include "libpspp/assertion.h"
30 #include "libpspp/cast.h"
31 #include "libpspp/i18n.h"
32 #include "libpspp/message.h"
33 #include "libpspp/str.h"
34 #include "libpspp/string-array.h"
35 #include "libpspp/stringi-map.h"
36 #include "libpspp/stringi-set.h"
38 #include "gl/c-ctype.h"
39 #include "gl/ftoastr.h"
42 #define _(msgid) gettext (msgid)
44 /* An entry in the stack of macros and macro directives being expanded. The
45 stack is maintained as a linked list. Entries are not dynamically allocated
46 but on the program stack. */
47 struct macro_expansion_stack
49 /* Points to an outer stack entry, or NULL if this is the outermost. */
50 const struct macro_expansion_stack *next;
52 /* A macro name or !IF, !DO, etc. */
55 /* Location of the macro definition, if available. */
56 const char *file_name;
61 /* Reports an error during macro expansion. STACK is the stack for reporting
62 the location of the error, MT is the optional token at which the error was
63 detected, and FORMAT along with the varargs is the message to report. */
64 static void PRINTF_FORMAT (3, 4)
65 macro_error (const struct macro_expansion_stack *stack,
66 const struct macro_token *mt,
67 const char *format, ...)
69 struct msg_stack **ms = NULL;
70 size_t allocated_ms = 0;
73 for (const struct macro_expansion_stack *p = stack; p; p = p->next)
75 if (n_ms >= allocated_ms)
76 ms = x2nrealloc (ms, &allocated_ms, sizeof *ms);
78 /* TRANSLATORS: These strings are used for explaining the context of an
79 error. The "While expanding" message appears first, followed by zero
80 or more of the "inside expansion" messages. `innermost',
81 `next_inner`, etc., are names of macros, and `foobar' is a piece of
84 foo.sps:12: At `foobar' in the expansion of 'innermost',
85 foo.sps:23: inside the expansion of 'next_inner',
86 foo.sps:34: inside the expansion of 'next_inner2',
87 foo.sps:45: inside the expansion of 'outermost',
88 foo.sps:76: This is the actual error message. */
92 if (mt && mt->representation.length)
95 str_ellipsize (mt->representation, syntax, sizeof syntax);
96 description = xasprintf (_("At `%s' in the expansion of `%s',"),
100 description = xasprintf (_("In the expansion of `%s',"), p->name);
103 description = xasprintf (_("inside the expansion of `%s',"), p->name);
105 ms[n_ms] = xmalloc (sizeof *ms[n_ms]);
106 *ms[n_ms] = (struct msg_stack) {
108 .file_name = xstrdup_if_nonnull (p->file_name),
109 .first_line = p->first_line,
110 .last_line = p->last_line,
112 .description = description,
118 va_start (args, format);
119 char *s = xvasprintf (format, args);
122 struct msg *m = xmalloc (sizeof *m);
124 .category = MSG_C_SYNTAX,
125 .severity = MSG_S_ERROR,
134 macro_token_copy (struct macro_token *dst, const struct macro_token *src)
136 token_copy (&dst->token, &src->token);
137 ss_alloc_substring (&dst->representation, src->representation);
141 macro_token_uninit (struct macro_token *mt)
143 token_uninit (&mt->token);
144 ss_dealloc (&mt->representation);
148 macro_token_to_representation (struct macro_token *mt, struct string *s)
150 ds_put_substring (s, mt->representation);
153 is_macro_keyword (struct substring s)
155 static struct stringi_set keywords = STRINGI_SET_INITIALIZER (keywords);
156 if (stringi_set_is_empty (&keywords))
158 static const char *kws[] = {
179 for (size_t i = 0; i < sizeof kws / sizeof *kws; i++)
180 stringi_set_insert (&keywords, kws[i]);
183 ss_ltrim (&s, ss_cstr ("!"));
184 return stringi_set_contains_len (&keywords, s.string, s.length);
188 macro_tokens_copy (struct macro_tokens *dst, const struct macro_tokens *src)
190 *dst = (struct macro_tokens) {
191 .mts = xmalloc (src->n * sizeof *dst->mts),
195 for (size_t i = 0; i < src->n; i++)
196 macro_token_copy (&dst->mts[i], &src->mts[i]);
200 macro_tokens_uninit (struct macro_tokens *mts)
202 for (size_t i = 0; i < mts->n; i++)
203 macro_token_uninit (&mts->mts[i]);
208 macro_tokens_add_uninit (struct macro_tokens *mts)
210 if (mts->n >= mts->allocated)
211 mts->mts = x2nrealloc (mts->mts, &mts->allocated, sizeof *mts->mts);
212 return &mts->mts[mts->n++];
216 macro_tokens_add (struct macro_tokens *mts, const struct macro_token *mt)
218 macro_token_copy (macro_tokens_add_uninit (mts), mt);
221 /* Tokenizes SRC according to MODE and appends the tokens to MTS. Uses STACK,
222 if nonull, for error reporting. */
224 macro_tokens_from_string__ (struct macro_tokens *mts, const struct substring src,
225 enum segmenter_mode mode,
226 const struct macro_expansion_stack *stack)
230 struct segmenter segmenter;
231 struct substring body;
234 struct state state = {
235 .segmenter = segmenter_init (mode, true),
238 struct state saved = state;
240 while (state.body.length > 0)
242 struct macro_token mt = {
243 .token = { .type = T_STOP },
244 .representation = { .string = state.body.string },
246 struct token *token = &mt.token;
248 struct scanner scanner;
249 scanner_init (&scanner, token);
253 enum segment_type type;
254 int seg_len = segmenter_push (&state.segmenter, state.body.string,
255 state.body.length, true, &type);
256 assert (seg_len >= 0);
258 struct substring segment = ss_head (state.body, seg_len);
259 ss_advance (&state.body, seg_len);
261 enum scan_result result = scanner_push (&scanner, type, segment, token);
262 if (result == SCAN_SAVE)
264 else if (result == SCAN_BACK)
269 else if (result == SCAN_DONE)
273 /* We have a token in 'token'. */
274 mt.representation.length = state.body.string - mt.representation.string;
275 if (is_scan_type (token->type))
277 if (token->type != SCAN_SKIP)
279 char *s = scan_token_to_error (token);
282 mt.token.type = T_STRING;
283 macro_error (stack, &mt, "%s", s);
291 macro_tokens_add (mts, &mt);
292 token_uninit (token);
296 /* Tokenizes SRC according to MODE and appends the tokens to MTS. */
298 macro_tokens_from_string (struct macro_tokens *mts, const struct substring src,
299 enum segmenter_mode mode)
301 macro_tokens_from_string__ (mts, src, mode, NULL);
305 macro_tokens_print (const struct macro_tokens *mts, FILE *stream)
307 for (size_t i = 0; i < mts->n; i++)
308 token_print (&mts->mts[i].token, stream);
313 TC_ENDCMD, /* No space before or after (new-line after). */
314 TC_BINOP, /* Space on both sides. */
315 TC_COMMA, /* Space afterward. */
316 TC_ID, /* Don't need spaces except sequentially. */
317 TC_PUNCT, /* Don't need spaces except sequentially. */
321 needs_space (enum token_class prev, enum token_class next)
323 /* Don't need a space before or after the end of a command.
324 (A new-line is needed afterward as a special case.) */
325 if (prev == TC_ENDCMD || next == TC_ENDCMD)
328 /* Binary operators always have a space on both sides. */
329 if (prev == TC_BINOP || next == TC_BINOP)
332 /* A comma always has a space afterward. */
333 if (prev == TC_COMMA)
336 /* Otherwise, PREV is TC_ID or TC_PUNCT, which only need a space if there are
337 two or them in a row. */
341 static enum token_class
342 classify_token (enum token_type type)
394 /* Appends a syntax representation of the tokens in MTS to S. If OFS and LEN
395 are nonnull, sets OFS[i] to the offset within S of the start of token 'i' in
396 MTS and LEN[i] to its length. OFS[i] + LEN[i] is not necessarily OFS[i + 1]
397 because some tokens are separated by white space. */
399 macro_tokens_to_representation (struct macro_tokens *mts, struct string *s,
400 size_t *ofs, size_t *len)
402 assert ((ofs != NULL) == (len != NULL));
407 for (size_t i = 0; i < mts->n; i++)
411 enum token_type prev = mts->mts[i - 1].token.type;
412 enum token_type next = mts->mts[i].token.type;
414 if (prev == T_ENDCMD)
415 ds_put_byte (s, '\n');
418 enum token_class pc = classify_token (prev);
419 enum token_class nc = classify_token (next);
420 if (needs_space (pc, nc))
421 ds_put_byte (s, ' ');
426 ofs[i] = s->ss.length;
427 macro_token_to_representation (&mts->mts[i], s);
429 len[i] = s->ss.length - ofs[i];
434 macro_destroy (struct macro *m)
441 for (size_t i = 0; i < m->n_params; i++)
443 struct macro_param *p = &m->params[i];
446 macro_tokens_uninit (&p->def);
454 token_uninit (&p->charend);
458 token_uninit (&p->enclose[0]);
459 token_uninit (&p->enclose[1]);
467 macro_tokens_uninit (&m->body);
472 macro_set_create (void)
474 struct macro_set *set = xmalloc (sizeof *set);
475 *set = (struct macro_set) {
476 .macros = HMAP_INITIALIZER (set->macros),
482 macro_set_destroy (struct macro_set *set)
487 struct macro *macro, *next;
488 HMAP_FOR_EACH_SAFE (macro, next, struct macro, hmap_node, &set->macros)
490 hmap_delete (&set->macros, ¯o->hmap_node);
491 macro_destroy (macro);
493 hmap_destroy (&set->macros);
498 hash_macro_name (const char *name)
500 return utf8_hash_case_string (name, 0);
503 static struct macro *
504 macro_set_find__ (struct macro_set *set, const char *name)
506 if (macro_set_is_empty (set))
510 HMAP_FOR_EACH_WITH_HASH (macro, struct macro, hmap_node,
511 hash_macro_name (name), &set->macros)
512 if (!utf8_strcasecmp (macro->name, name))
519 macro_set_find (const struct macro_set *set, const char *name)
521 return macro_set_find__ (CONST_CAST (struct macro_set *, set), name);
524 /* Adds M to SET. M replaces any existing macro with the same name. Takes
527 macro_set_add (struct macro_set *set, struct macro *m)
529 struct macro *victim = macro_set_find__ (set, m->name);
532 hmap_delete (&set->macros, &victim->hmap_node);
533 macro_destroy (victim);
536 hmap_insert (&set->macros, &m->hmap_node, hash_macro_name (m->name));
539 /* Macro call parsing.. */
546 /* Accumulating tokens in mc->params toward the end of any type of
550 /* Expecting the opening delimiter of an ARG_ENCLOSE argument. */
553 /* Expecting a keyword for a keyword argument. */
556 /* Expecting an equal sign for a keyword argument. */
559 /* Macro fully parsed and ready for expansion. */
563 /* Parsing macro calls. This is a FSM driven by macro_call_create() and
564 macro_call_add() to identify the macro being called and obtain its
565 arguments. 'state' identifies the FSM state. */
568 const struct macro_set *macros;
569 const struct macro *macro;
570 struct macro_tokens **args;
574 const struct macro_param *param; /* Parameter currently being parsed. */
577 /* Completes macro expansion by initializing arguments that weren't supplied to
580 mc_finished (struct macro_call *mc)
582 mc->state = MC_FINISHED;
583 for (size_t i = 0; i < mc->macro->n_params; i++)
585 mc->args[i] = &mc->macro->params[i].def;
590 mc_next_arg (struct macro_call *mc)
594 assert (!mc->macro->n_params);
595 return mc_finished (mc);
597 else if (mc->param->positional)
600 if (mc->param >= &mc->macro->params[mc->macro->n_params])
601 return mc_finished (mc);
604 mc->state = (!mc->param->positional ? MC_KEYWORD
605 : mc->param->arg_type == ARG_ENCLOSE ? MC_ENCLOSE
612 for (size_t i = 0; i < mc->macro->n_params; i++)
615 mc->state = MC_KEYWORD;
618 return mc_finished (mc);
623 mc_error (struct macro_call *mc)
625 mc->state = MC_ERROR;
630 mc_add_arg (struct macro_call *mc, const struct macro_token *mt)
632 const struct macro_param *p = mc->param;
634 const struct token *token = &mt->token;
635 if ((token->type == T_ENDCMD || token->type == T_STOP)
636 && p->arg_type != ARG_CMDEND)
638 msg (SE, _("Unexpected end of command reading argument %s "
639 "to macro %s."), mc->param->name, mc->macro->name);
641 return mc_error (mc);
646 struct macro_tokens **argp = &mc->args[p - mc->macro->params];
648 *argp = xzalloc (sizeof **argp);
649 struct macro_tokens *arg = *argp;
650 if (p->arg_type == ARG_N_TOKENS)
652 macro_tokens_add (arg, mt);
653 if (arg->n >= p->n_tokens)
654 return mc_next_arg (mc);
657 else if (p->arg_type == ARG_CMDEND)
659 if (token->type == T_ENDCMD || token->type == T_STOP)
660 return mc_next_arg (mc);
661 macro_tokens_add (arg, mt);
666 const struct token *end
667 = p->arg_type == ARG_CHAREND ? &p->charend : &p->enclose[1];
668 if (token_equal (token, end))
669 return mc_next_arg (mc);
670 macro_tokens_add (arg, mt);
676 mc_expected (struct macro_call *mc, const struct macro_token *actual,
677 const struct token *expected)
679 const struct substring actual_s
680 = (actual->representation.length ? actual->representation
681 : ss_cstr (_("<end of input>")));
682 char *expected_s = token_to_string (expected);
683 msg (SE, _("Found `%.*s' while expecting `%s' reading argument %s "
685 (int) actual_s.length, actual_s.string, expected_s,
686 mc->param->name, mc->macro->name);
689 return mc_error (mc);
693 mc_enclose (struct macro_call *mc, const struct macro_token *mt)
695 const struct token *token = &mt->token;
698 if (token_equal (&mc->param->enclose[0], token))
704 return mc_expected (mc, mt, &mc->param->enclose[0]);
707 static const struct macro_param *
708 macro_find_parameter_by_name (const struct macro *m, struct substring name)
710 ss_ltrim (&name, ss_cstr ("!"));
712 for (size_t i = 0; i < m->n_params; i++)
714 const struct macro_param *p = &m->params[i];
715 struct substring p_name = ss_cstr (p->name + 1);
716 if (!utf8_strncasecmp (p_name.string, p_name.length,
717 name.string, name.length))
724 mc_keyword (struct macro_call *mc, const struct macro_token *mt)
726 const struct token *token = &mt->token;
727 if (token->type != T_ID)
728 return mc_finished (mc);
730 const struct macro_param *p = macro_find_parameter_by_name (mc->macro,
734 size_t arg_index = p - mc->macro->params;
736 if (mc->args[arg_index])
739 _("Argument %s multiply specified in call to macro %s."),
740 p->name, mc->macro->name);
741 return mc_error (mc);
745 mc->state = MC_EQUALS;
749 return mc_finished (mc);
753 mc_equals (struct macro_call *mc, const struct macro_token *mt)
755 const struct token *token = &mt->token;
758 if (token->type == T_EQUALS)
764 return mc_expected (mc, mt, &(struct token) { .type = T_EQUALS });
767 /* If TOKEN is the first token of a call to a macro in MACROS, create a new
768 macro expander, initializes *MCP to it. Returns 0 if more tokens are needed
769 and should be added via macro_call_add() or 1 if the caller should next call
770 macro_call_get_expansion().
772 If TOKEN is not the first token of a macro call, returns -1 and sets *MCP to
775 macro_call_create (const struct macro_set *macros,
776 const struct token *token,
777 struct macro_call **mcp)
779 const struct macro *macro = (token->type == T_ID || token->type == T_MACRO_ID
780 ? macro_set_find (macros, token->string.string)
788 struct macro_call *mc = xmalloc (sizeof *mc);
789 *mc = (struct macro_call) {
793 .state = (!macro->n_params ? MC_FINISHED
794 : !macro->params[0].positional ? MC_KEYWORD
795 : macro->params[0].arg_type == ARG_ENCLOSE ? MC_ENCLOSE
797 .args = macro->n_params ? xcalloc (macro->n_params, sizeof *mc->args) : NULL,
798 .param = macro->params,
802 return mc->state == MC_FINISHED ? 1 : 0;
806 macro_call_destroy (struct macro_call *mc)
811 for (size_t i = 0; i < mc->macro->n_params; i++)
813 struct macro_tokens *a = mc->args[i];
814 if (a && a != &mc->macro->params[i].def)
816 macro_tokens_uninit (a);
824 /* Adds TOKEN to the collection of tokens in MC that potentially need to be
827 Returns -1 if the tokens added do not actually invoke a macro. The caller
828 should consume the first token without expanding it. (Later tokens might
829 invoke a macro so it's best to feed the second token into a new expander.)
831 Returns 0 if the macro expander needs more tokens, for macro arguments or to
832 decide whether this is actually a macro invocation. The caller should call
833 macro_call_add() again with the next token.
835 Returns a positive number to indicate that the returned number of tokens
836 invoke a macro. The number returned might be less than the number of tokens
837 added because it can take a few tokens of lookahead to determine whether the
838 macro invocation is finished. The caller should call
839 macro_call_get_expansion() to obtain the expansion. */
841 macro_call_add (struct macro_call *mc, const struct macro_token *mt)
849 return mc_add_arg (mc, mt);
852 return mc_enclose (mc, mt);
855 return mc_keyword (mc, mt);
858 return mc_equals (mc, mt);
865 /* Macro expansion. */
867 struct macro_expander
869 const struct macro_set *macros;
870 const struct macro *macro;
871 struct macro_tokens **args;
872 enum segmenter_mode segmenter_mode;
873 struct stringi_map *vars;
876 int nesting_countdown;
877 const struct macro_expansion_stack *stack;
880 /* Each argument to a macro function is one of:
882 - A quoted string or other single literal token.
884 - An argument to the macro being expanded, e.g. !1 or a named argument.
888 - A function invocation.
890 Each function invocation yields a character sequence to be turned into a
891 sequence of tokens. The case where that character sequence is a single
892 quoted string is an important special case.
894 struct parse_macro_function_ctx
896 const struct macro_token *input;
898 const struct macro_expander *me;
902 macro_expand (const struct macro_tokens *, const struct macro_expander *,
903 struct macro_tokens *);
906 expand_macro_function (struct parse_macro_function_ctx *ctx,
907 struct string *output, size_t *input_consumed);
909 /* Returns true if the pair of tokens starting at offset OFS within MTS are !*,
912 is_bang_star (const struct macro_token *mts, size_t n, size_t ofs)
915 && mts[ofs].token.type == T_MACRO_ID
916 && ss_equals (mts[ofs].token.string, ss_cstr ("!"))
917 && mts[ofs + 1].token.type == T_ASTERISK);
921 parse_function_arg (struct parse_macro_function_ctx *ctx,
922 size_t i, struct string *farg)
924 const struct macro_token *tokens = ctx->input;
925 const struct token *token = &tokens[i].token;
926 if (token->type == T_MACRO_ID && ctx->me->macro)
928 const struct macro_param *param = macro_find_parameter_by_name (
929 ctx->me->macro, token->string);
932 size_t param_idx = param - ctx->me->macro->params;
933 const struct macro_tokens *marg = ctx->me->args[param_idx];
934 for (size_t i = 0; i < marg->n; i++)
937 ds_put_byte (farg, ' ');
938 ds_put_substring (farg, marg->mts[i].representation);
943 if (is_bang_star (ctx->input, ctx->n_input, i))
945 for (size_t i = 0; i < ctx->me->macro->n_params; i++)
947 if (!ctx->me->macro->params[i].positional)
950 const struct macro_tokens *marg = ctx->me->args[i];
951 for (size_t j = 0; j < marg->n; j++)
954 ds_put_byte (farg, ' ');
955 ds_put_substring (farg, marg->mts[j].representation);
961 const char *value = stringi_map_find__ (ctx->me->vars,
962 token->string.string,
963 token->string.length);
966 ds_put_cstr (farg, value);
970 struct parse_macro_function_ctx subctx = {
971 .input = &ctx->input[i],
972 .n_input = ctx->n_input - i,
975 size_t subinput_consumed;
976 if (expand_macro_function (&subctx, farg, &subinput_consumed))
977 return subinput_consumed;
980 ds_put_substring (farg, tokens[i].representation);
985 parse_macro_function (struct parse_macro_function_ctx *ctx,
986 struct string_array *args,
987 struct substring function,
988 int min_args, int max_args,
989 size_t *input_consumed)
991 const struct macro_token *tokens = ctx->input;
992 size_t n_tokens = ctx->n_input;
995 || tokens[0].token.type != T_MACRO_ID
996 || !ss_equals_case (tokens[0].token.string, function)) /* XXX abbrevs allowed */
999 if (n_tokens < 2 || tokens[1].token.type != T_LPAREN)
1001 macro_error (ctx->me->stack, n_tokens > 1 ? &tokens[1] : NULL,
1002 _("`(' expected following %s."), function.string);
1006 string_array_init (args);
1008 for (size_t i = 2;; )
1011 goto unexpected_end;
1012 if (tokens[i].token.type == T_RPAREN)
1014 *input_consumed = i + 1;
1015 if (args->n < min_args || args->n > max_args)
1017 macro_error (ctx->me->stack, &tokens[i],
1018 _("Wrong number of arguments to macro function %s."),
1025 struct string s = DS_EMPTY_INITIALIZER;
1026 i += parse_function_arg (ctx, i, &s);
1030 goto unexpected_end;
1032 string_array_append_nocopy (args, ds_steal_cstr (&s));
1034 if (tokens[i].token.type == T_COMMA)
1036 else if (tokens[i].token.type != T_RPAREN)
1038 macro_error (ctx->me->stack, &tokens[i],
1039 _("`,' or `)' expected in call to macro function %s."),
1046 macro_error (ctx->me->stack, NULL, _("Missing `)' in call to macro function %s."),
1050 string_array_destroy (args);
1055 unquote_string (const char *s, enum segmenter_mode segmenter_mode,
1056 struct string *content)
1058 struct string_lexer slex;
1059 string_lexer_init (&slex, s, strlen (s), segmenter_mode, true);
1061 struct token token1;
1062 if (!string_lexer_next (&slex, &token1))
1065 if (token1.type != T_STRING)
1067 token_uninit (&token1);
1071 struct token token2;
1072 if (string_lexer_next (&slex, &token2))
1074 token_uninit (&token1);
1075 token_uninit (&token2);
1079 ds_put_substring (content, token1.string);
1080 token_uninit (&token1);
1085 unquote_string_in_place (const char *s, enum segmenter_mode segmenter_mode,
1088 ds_init_empty (tmp);
1089 return unquote_string (s, segmenter_mode, tmp) ? ds_cstr (tmp) : s;
1093 parse_integer (const char *s, int *np)
1098 long int n = strtol (s, &tail, 10);
1099 *np = n < INT_MIN ? INT_MIN : n > INT_MAX ? INT_MAX : n;
1100 tail += strspn (tail, CC_SPACES);
1101 return *tail == '\0' && errno != ERANGE && n == *np;
1105 expand_macro_function (struct parse_macro_function_ctx *ctx,
1106 struct string *output,
1107 size_t *input_consumed)
1109 struct string_array args;
1111 if (parse_macro_function (ctx, &args, ss_cstr ("!LENGTH"), 1, 1,
1113 ds_put_format (output, "%zu", strlen (args.strings[0]));
1114 else if (parse_macro_function (ctx, &args, ss_cstr ("!BLANKS"), 1, 1,
1118 if (!parse_integer (args.strings[0], &n))
1120 macro_error (ctx->me->stack, NULL,
1121 _("Argument to !BLANKS must be non-negative integer "
1122 "(not \"%s\")."), args.strings[0]);
1123 string_array_destroy (&args);
1127 ds_put_byte_multiple (output, ' ', n);
1129 else if (parse_macro_function (ctx, &args, ss_cstr ("!CONCAT"), 1, INT_MAX,
1132 for (size_t i = 0; i < args.n; i++)
1133 if (!unquote_string (args.strings[i], ctx->me->segmenter_mode, output))
1134 ds_put_cstr (output, args.strings[i]);
1136 else if (parse_macro_function (ctx, &args, ss_cstr ("!HEAD"), 1, 1,
1140 const char *s = unquote_string_in_place (args.strings[0],
1141 ctx->me->segmenter_mode, &tmp);
1143 struct macro_tokens mts = { .n = 0 };
1144 macro_tokens_from_string__ (&mts, ss_cstr (s), ctx->me->segmenter_mode,
1147 ds_put_substring (output, mts.mts[0].representation);
1148 macro_tokens_uninit (&mts);
1151 else if (parse_macro_function (ctx, &args, ss_cstr ("!INDEX"), 2, 2,
1154 const char *haystack = args.strings[0];
1155 const char *needle = strstr (haystack, args.strings[1]);
1156 ds_put_format (output, "%zu", needle ? needle - haystack + 1 : 0);
1158 else if (parse_macro_function (ctx, &args, ss_cstr ("!QUOTE"), 1, 1,
1161 if (unquote_string (args.strings[0], ctx->me->segmenter_mode, NULL))
1162 ds_put_cstr (output, args.strings[0]);
1165 ds_extend (output, strlen (args.strings[0]) + 2);
1166 ds_put_byte (output, '\'');
1167 for (const char *p = args.strings[0]; *p; p++)
1170 ds_put_byte (output, '\'');
1171 ds_put_byte (output, *p);
1173 ds_put_byte (output, '\'');
1176 else if (parse_macro_function (ctx, &args, ss_cstr ("!SUBSTR"), 2, 3,
1180 if (!parse_integer (args.strings[1], &start) || start < 1)
1182 macro_error (ctx->me->stack, NULL,
1183 _("Second argument of !SUBSTR must be "
1184 "positive integer (not \"%s\")."),
1186 string_array_destroy (&args);
1190 int count = INT_MAX;
1191 if (args.n > 2 && (!parse_integer (args.strings[2], &count) || count < 0))
1193 macro_error (ctx->me->stack, NULL,
1194 _("Third argument of !SUBSTR must be "
1195 "non-negative integer (not \"%s\")."),
1197 string_array_destroy (&args);
1201 struct substring s = ss_cstr (args.strings[0]);
1202 ds_put_substring (output, ss_substr (s, start - 1, count));
1204 else if (parse_macro_function (ctx, &args, ss_cstr ("!TAIL"), 1, 1,
1208 const char *s = unquote_string_in_place (args.strings[0],
1209 ctx->me->segmenter_mode, &tmp);
1211 struct macro_tokens mts = { .n = 0 };
1212 macro_tokens_from_string__ (&mts, ss_cstr (s), ctx->me->segmenter_mode,
1216 struct macro_tokens tail = { .mts = mts.mts + 1, .n = mts.n - 1 };
1217 macro_tokens_to_representation (&tail, output, NULL, NULL);
1219 macro_tokens_uninit (&mts);
1222 else if (parse_macro_function (ctx, &args, ss_cstr ("!UNQUOTE"), 1, 1,
1225 if (!unquote_string (args.strings[0], ctx->me->segmenter_mode, output))
1226 ds_put_cstr (output, args.strings[0]);
1228 else if (parse_macro_function (ctx, &args, ss_cstr ("!UPCASE"), 1, 1,
1232 const char *s = unquote_string_in_place (args.strings[0],
1233 ctx->me->segmenter_mode, &tmp);
1234 char *upper = utf8_to_upper (s);
1235 ds_put_cstr (output, upper);
1239 else if (parse_macro_function (ctx, &args, ss_cstr ("!EVAL"), 1, 1,
1242 struct macro_tokens mts = { .n = 0 };
1243 macro_tokens_from_string__ (&mts, ss_cstr (args.strings[0]),
1244 ctx->me->segmenter_mode, ctx->me->stack);
1245 struct macro_tokens exp = { .n = 0 };
1246 struct macro_expansion_stack stack = {
1248 .next = ctx->me->stack
1250 struct macro_expander subme = *ctx->me;
1251 subme.break_ = NULL;
1252 subme.stack = &stack;
1254 macro_expand (&mts, &subme, &exp);
1255 macro_tokens_to_representation (&exp, output, NULL, NULL);
1256 macro_tokens_uninit (&exp);
1257 macro_tokens_uninit (&mts);
1259 else if (ctx->n_input > 0
1260 && ctx->input[0].token.type == T_MACRO_ID
1261 && ss_equals_case (ctx->input[0].token.string, ss_cstr ("!NULL")))
1263 *input_consumed = 1;
1269 string_array_destroy (&args);
1273 static char *macro_evaluate_or (const struct macro_expander *me,
1274 const struct macro_token **tokens,
1275 const struct macro_token *end);
1278 macro_evaluate_literal (const struct macro_expander *me,
1279 const struct macro_token **tokens,
1280 const struct macro_token *end)
1282 const struct macro_token *p = *tokens;
1285 if (p->token.type == T_LPAREN)
1288 char *value = macro_evaluate_or (me, &p, end);
1291 if (p >= end || p->token.type != T_RPAREN)
1294 macro_error (me->stack, p < end ? p : NULL,
1295 _("Expecting ')' in macro expression."));
1302 else if (p->token.type == T_RPAREN)
1304 macro_error (me->stack, p, _("Expecting literal or function invocation "
1305 "in macro expression."));
1309 struct parse_macro_function_ctx fctx = {
1314 struct string function_output = DS_EMPTY_INITIALIZER;
1315 size_t function_consumed = parse_function_arg (&fctx, 0, &function_output);
1316 struct string unquoted = DS_EMPTY_INITIALIZER;
1317 if (unquote_string (ds_cstr (&function_output), me->segmenter_mode,
1320 ds_swap (&function_output, &unquoted);
1321 ds_destroy (&unquoted);
1323 *tokens = p + function_consumed;
1324 return ds_steal_cstr (&function_output);
1327 /* Returns true if MT is valid as a macro operator. Only operators written as
1328 symbols (e.g. <>) are usable in macro expressions, not operator written as
1329 letters (e.g. EQ). */
1331 is_macro_operator (const struct macro_token *mt)
1333 return (mt->representation.length > 0
1334 && !c_isalpha (mt->representation.string[0]));
1337 static enum token_type
1338 parse_relational_op (const struct macro_token *mt)
1340 switch (mt->token.type)
1350 return is_macro_operator (mt) ? mt->token.type : T_STOP;
1353 return (ss_equals_case (mt->token.string, ss_cstr ("!EQ")) ? T_EQ
1354 : ss_equals_case (mt->token.string, ss_cstr ("!NE")) ? T_NE
1355 : ss_equals_case (mt->token.string, ss_cstr ("!LT")) ? T_LT
1356 : ss_equals_case (mt->token.string, ss_cstr ("!GT")) ? T_GT
1357 : ss_equals_case (mt->token.string, ss_cstr ("!LE")) ? T_LE
1358 : ss_equals_case (mt->token.string, ss_cstr ("!GE")) ? T_GE
1367 macro_evaluate_relational (const struct macro_expander *me,
1368 const struct macro_token **tokens,
1369 const struct macro_token *end)
1371 const struct macro_token *p = *tokens;
1372 char *lhs = macro_evaluate_literal (me, &p, end);
1376 enum token_type op = p >= end ? T_STOP : parse_relational_op (p);
1384 char *rhs = macro_evaluate_literal (me, &p, end);
1391 struct string lhs_tmp, rhs_tmp;
1392 int cmp = strcmp (unquote_string_in_place (lhs, me->segmenter_mode,
1394 unquote_string_in_place (rhs, me->segmenter_mode,
1396 ds_destroy (&lhs_tmp);
1397 ds_destroy (&rhs_tmp);
1402 bool b = (op == T_EQUALS || op == T_EQ ? !cmp
1404 : op == T_LT ? cmp < 0
1405 : op == T_GT ? cmp > 0
1406 : op == T_LE ? cmp <= 0
1407 : /* T_GE */ cmp >= 0);
1410 return xstrdup (b ? "1" : "0");
1414 macro_evaluate_not (const struct macro_expander *me,
1415 const struct macro_token **tokens,
1416 const struct macro_token *end)
1418 const struct macro_token *p = *tokens;
1420 unsigned int negations = 0;
1422 && (ss_equals_case (p->representation, ss_cstr ("!NOT"))
1423 || ss_equals (p->representation, ss_cstr ("~"))))
1429 char *operand = macro_evaluate_relational (me, &p, end);
1430 if (!operand || !negations)
1436 bool b = strcmp (operand, "0") ^ (negations & 1);
1439 return xstrdup (b ? "1" : "0");
1443 macro_evaluate_and (const struct macro_expander *me,
1444 const struct macro_token **tokens,
1445 const struct macro_token *end)
1447 const struct macro_token *p = *tokens;
1448 char *lhs = macro_evaluate_not (me, &p, end);
1453 && (ss_equals_case (p->representation, ss_cstr ("!AND"))
1454 || ss_equals (p->representation, ss_cstr ("&"))))
1457 char *rhs = macro_evaluate_not (me, &p, end);
1464 bool b = strcmp (lhs, "0") && strcmp (rhs, "0");
1467 lhs = xstrdup (b ? "1" : "0");
1474 macro_evaluate_or (const struct macro_expander *me,
1475 const struct macro_token **tokens,
1476 const struct macro_token *end)
1478 const struct macro_token *p = *tokens;
1479 char *lhs = macro_evaluate_and (me, &p, end);
1484 && (ss_equals_case (p->representation, ss_cstr ("!OR"))
1485 || ss_equals (p->representation, ss_cstr ("|"))))
1488 char *rhs = macro_evaluate_and (me, &p, end);
1495 bool b = strcmp (lhs, "0") || strcmp (rhs, "0");
1498 lhs = xstrdup (b ? "1" : "0");
1505 macro_evaluate_expression (const struct macro_token **tokens, size_t n_tokens,
1506 const struct macro_expander *me)
1508 return macro_evaluate_or (me, tokens, *tokens + n_tokens);
1512 macro_evaluate_number (const struct macro_token **tokens, size_t n_tokens,
1513 const struct macro_expander *me,
1516 char *s = macro_evaluate_expression (tokens, n_tokens, me);
1520 struct macro_tokens mts = { .n = 0 };
1521 macro_tokens_from_string__ (&mts, ss_cstr (s), me->segmenter_mode, me->stack);
1522 if (mts.n != 1 || !token_is_number (&mts.mts[0].token))
1524 macro_error (me->stack, mts.n > 0 ? &mts.mts[0] : NULL,
1525 _("Macro expression must evaluate to "
1526 "a number (not \"%s\")."), s);
1528 macro_tokens_uninit (&mts);
1532 *number = token_number (&mts.mts[0].token);
1534 macro_tokens_uninit (&mts);
1538 static const struct macro_token *
1539 find_ifend_clause (const struct macro_token *p, const struct macro_token *end)
1542 for (; p < end; p++)
1544 if (p->token.type != T_MACRO_ID)
1547 if (ss_equals_case (p->token.string, ss_cstr ("!IF")))
1549 else if (ss_equals_case (p->token.string, ss_cstr ("!IFEND")))
1555 else if (ss_equals_case (p->token.string, ss_cstr ("!ELSE")) && !nesting)
1562 macro_expand_if (const struct macro_token *tokens, size_t n_tokens,
1563 const struct macro_expander *me,
1564 struct macro_tokens *exp)
1566 const struct macro_token *p = tokens;
1567 const struct macro_token *end = tokens + n_tokens;
1569 if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!IF")))
1573 char *result = macro_evaluate_expression (&p, end - p, me);
1576 bool b = strcmp (result, "0");
1580 || p->token.type != T_MACRO_ID
1581 || !ss_equals_case (p->token.string, ss_cstr ("!THEN")))
1583 macro_error (me->stack, p < end ? p : NULL,
1584 _("!THEN expected in macro !IF construct."));
1588 const struct macro_token *start_then = p + 1;
1589 const struct macro_token *end_then = find_ifend_clause (start_then, end);
1592 macro_error (me->stack, NULL,
1593 _("!ELSE or !IFEND expected in macro !IF construct."));
1597 const struct macro_token *start_else, *end_if;
1598 if (ss_equals_case (end_then->token.string, ss_cstr ("!ELSE")))
1600 start_else = end_then + 1;
1601 end_if = find_ifend_clause (start_else, end);
1603 || !ss_equals_case (end_if->token.string, ss_cstr ("!IFEND")))
1605 macro_error (me->stack, end_if ? end_if : NULL,
1606 _("!IFEND expected in macro !IF construct."));
1616 const struct macro_token *start;
1621 n = end_then - start_then;
1623 else if (start_else)
1626 n = end_if - start_else;
1636 struct macro_tokens mts = {
1637 .mts = CONST_CAST (struct macro_token *, start),
1640 struct macro_expansion_stack stack = {
1644 struct macro_expander subme = *me;
1645 subme.stack = &stack;
1646 macro_expand (&mts, &subme, exp);
1648 return (end_if + 1) - tokens;
1652 macro_parse_let (const struct macro_token *tokens, size_t n_tokens,
1653 const struct macro_expander *me)
1655 const struct macro_token *p = tokens;
1656 const struct macro_token *end = tokens + n_tokens;
1658 if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!LET")))
1662 if (p >= end || p->token.type != T_MACRO_ID)
1664 macro_error (me->stack, p < end ? p : NULL,
1665 _("Expected macro variable name following !LET."));
1668 const struct substring var_name = p->token.string;
1669 if (is_macro_keyword (var_name)
1670 || (me->macro && macro_find_parameter_by_name (me->macro, var_name)))
1672 macro_error (me->stack, p < end ? p : NULL,
1673 _("Cannot use argument name or macro keyword "
1674 "\"%.*s\" as !LET variable."),
1675 (int) var_name.length, var_name.string);
1680 if (p >= end || p->token.type != T_EQUALS)
1682 macro_error (me->stack, p < end ? p : NULL,
1683 _("Expected `=' following !LET."));
1688 char *value = macro_evaluate_expression (&p, end - p, me);
1692 stringi_map_replace_nocopy (me->vars, ss_xstrdup (var_name), value);
1696 static const struct macro_token *
1697 find_doend (const struct macro_expansion_stack *stack,
1698 const struct macro_token *p, const struct macro_token *end)
1701 for (; p < end; p++)
1703 if (p->token.type != T_MACRO_ID)
1706 if (ss_equals_case (p->token.string, ss_cstr ("!DO")))
1708 else if (ss_equals_case (p->token.string, ss_cstr ("!DOEND")))
1715 macro_error (stack, NULL, _("Missing !DOEND."));
1720 macro_expand_do (const struct macro_token *tokens, size_t n_tokens,
1721 const struct macro_expander *me,
1722 struct macro_tokens *exp)
1724 const struct macro_token *p = tokens;
1725 const struct macro_token *end = tokens + n_tokens;
1727 if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!DO")))
1731 if (p >= end || p->token.type != T_MACRO_ID)
1733 macro_error (me->stack, p < end ? p : NULL,
1734 _("Expected macro variable name following !DO."));
1737 const struct substring var_name = p->token.string;
1738 if (is_macro_keyword (var_name)
1739 || (me->macro && macro_find_parameter_by_name (me->macro, var_name)))
1741 macro_error (me->stack, p, _("Cannot use argument name or macro "
1742 "keyword as !DO variable."));
1747 struct macro_expansion_stack substack = {
1751 bool break_ = false;
1752 struct macro_expander subme = *me;
1753 subme.break_ = &break_;
1754 subme.stack = &substack;
1756 int miterate = settings_get_miterate ();
1757 if (p < end && p->token.type == T_MACRO_ID
1758 && ss_equals_case (p->token.string, ss_cstr ("!IN")))
1761 char *list = macro_evaluate_expression (&p, end - p, &subme);
1765 struct macro_tokens items = { .n = 0 };
1766 macro_tokens_from_string__ (&items, ss_cstr (list), me->segmenter_mode,
1770 const struct macro_token *do_end = find_doend (subme.stack, p, end);
1773 macro_tokens_uninit (&items);
1777 const struct macro_tokens inner = {
1778 .mts = CONST_CAST (struct macro_token *, p),
1782 for (size_t i = 0; i < items.n && !break_; i++)
1786 macro_error (&substack, NULL,
1787 _("!DO loop over list exceeded "
1788 "maximum number of iterations %d. "
1789 "(Use SET MITERATE to change the limit.)"),
1793 stringi_map_replace_nocopy (me->vars, ss_xstrdup (var_name),
1794 ss_xstrdup (items.mts[i].representation));
1796 macro_expand (&inner, &subme, exp);
1798 return do_end - tokens + 1;
1800 else if (p < end && p->token.type == T_EQUALS)
1804 if (!macro_evaluate_number (&p, end - p, &subme, &first))
1807 if (p >= end || p->token.type != T_MACRO_ID
1808 || !ss_equals_case (p->token.string, ss_cstr ("!TO")))
1810 macro_error (subme.stack, p < end ? p : NULL,
1811 _("Expected !TO in numerical !DO loop."));
1817 if (!macro_evaluate_number (&p, end - p, &subme, &last))
1821 if (p < end && p->token.type == T_MACRO_ID
1822 && ss_equals_case (p->token.string, ss_cstr ("!BY")))
1825 if (!macro_evaluate_number (&p, end - p, &subme, &by))
1830 macro_error (subme.stack, NULL, _("!BY value cannot be zero."));
1835 const struct macro_token *do_end = find_doend (subme.stack, p, end);
1838 const struct macro_tokens inner = {
1839 .mts = CONST_CAST (struct macro_token *, p),
1843 if ((by > 0 && first <= last) || (by < 0 && first >= last))
1846 for (double index = first;
1847 by > 0 ? (index <= last) : (index >= last) && !break_;
1852 macro_error (subme.stack, NULL,
1853 _("Numerical !DO loop exceeded "
1854 "maximum number of iterations %d. "
1855 "(Use SET MITERATE to change the limit.)"),
1860 char index_s[DBL_BUFSIZE_BOUND];
1861 c_dtoastr (index_s, sizeof index_s, 0, 0, index);
1862 stringi_map_replace_nocopy (me->vars, ss_xstrdup (var_name),
1865 macro_expand (&inner, &subme, exp);
1869 return do_end - tokens + 1;
1873 macro_error (me->stack, p < end ? p : NULL,
1874 _("Expected `=' or !IN in !DO loop."));
1880 macro_expand (const struct macro_tokens *mts,
1881 const struct macro_expander *me,
1882 struct macro_tokens *exp)
1884 if (me->nesting_countdown <= 0)
1886 macro_error (me->stack, NULL, _("Maximum nesting level %d exceeded. "
1887 "(Use SET MNEST to change the limit.)"),
1888 settings_get_mnest ());
1889 for (size_t i = 0; i < mts->n; i++)
1890 macro_tokens_add (exp, &mts->mts[i]);
1894 for (size_t i = 0; i < mts->n && (!me->break_ || !*me->break_); i++)
1896 const struct macro_token *mt = &mts->mts[i];
1897 const struct token *token = &mt->token;
1898 if (token->type == T_MACRO_ID && me->macro)
1900 const struct macro_param *param = macro_find_parameter_by_name (
1901 me->macro, token->string);
1904 const struct macro_tokens *arg
1905 = me->args[param - me->macro->params];
1906 if (*me->expand && param->expand_arg)
1908 struct stringi_map vars = STRINGI_MAP_INITIALIZER (vars);
1909 struct macro_expansion_stack stack = {
1910 .name = param->name,
1913 struct macro_expander subme = {
1914 .macros = me->macros,
1917 .segmenter_mode = me->segmenter_mode,
1918 .expand = me->expand,
1921 .nesting_countdown = me->nesting_countdown,
1924 macro_expand (arg, &subme, exp);
1925 stringi_map_destroy (&vars);
1928 for (size_t i = 0; i < arg->n; i++)
1929 macro_tokens_add (exp, &arg->mts[i]);
1933 if (is_bang_star (mts->mts, mts->n, i))
1935 for (size_t j = 0; j < me->macro->n_params; j++)
1937 const struct macro_param *param = &me->macro->params[j];
1938 if (!param->positional)
1941 const struct macro_tokens *arg = me->args[j];
1942 if (*me->expand && param->expand_arg)
1944 struct stringi_map vars = STRINGI_MAP_INITIALIZER (vars);
1945 struct macro_expansion_stack stack = {
1949 struct macro_expander subme = {
1950 .macros = me->macros,
1953 .segmenter_mode = me->segmenter_mode,
1954 .expand = me->expand,
1957 .nesting_countdown = me->nesting_countdown,
1960 macro_expand (arg, &subme, exp);
1961 stringi_map_destroy (&vars);
1964 for (size_t k = 0; k < arg->n; k++)
1965 macro_tokens_add (exp, &arg->mts[k]);
1971 size_t n = macro_expand_if (&mts->mts[i], mts->n - i, me, exp);
1979 if (token->type == T_MACRO_ID)
1981 const char *value = stringi_map_find__ (me->vars,
1982 token->string.string,
1983 token->string.length);
1986 macro_tokens_from_string__ (exp, ss_cstr (value),
1987 me->segmenter_mode, me->stack);
1994 struct macro_call *submc;
1995 int retval = macro_call_create (me->macros, token, &submc);
1996 for (size_t j = 1; !retval; j++)
1998 const struct macro_token endcmd
1999 = { .token = { .type = T_ENDCMD } };
2000 retval = macro_call_add (
2001 submc, i + j < mts->n ? &mts->mts[i + j] : &endcmd);
2006 struct stringi_map vars = STRINGI_MAP_INITIALIZER (vars);
2007 struct macro_expansion_stack stack = {
2008 .name = submc->macro->name,
2009 .file_name = submc->macro->file_name,
2010 .first_line = submc->macro->first_line,
2011 .last_line = submc->macro->last_line,
2014 struct macro_expander subme = {
2015 .macros = submc->macros,
2016 .macro = submc->macro,
2017 .args = submc->args,
2018 .segmenter_mode = me->segmenter_mode,
2019 .expand = me->expand,
2022 .nesting_countdown = me->nesting_countdown - 1,
2025 macro_expand (&submc->macro->body, &subme, exp);
2026 macro_call_destroy (submc);
2027 stringi_map_destroy (&vars);
2031 macro_call_destroy (submc);
2034 if (token->type != T_MACRO_ID)
2036 macro_tokens_add (exp, mt);
2040 if (ss_equals_case (token->string, ss_cstr ("!break")))
2043 macro_error (me->stack, mt, _("!BREAK outside !DO."));
2051 struct parse_macro_function_ctx ctx = {
2052 .input = &mts->mts[i],
2053 .n_input = mts->n - i,
2056 struct string function_output = DS_EMPTY_INITIALIZER;
2057 size_t function_consumed;
2058 if (expand_macro_function (&ctx, &function_output, &function_consumed))
2060 i += function_consumed - 1;
2062 macro_tokens_from_string__ (exp, function_output.ss,
2063 me->segmenter_mode, me->stack);
2064 ds_destroy (&function_output);
2069 size_t n = macro_parse_let (&mts->mts[i], mts->n - i, me);
2076 n = macro_expand_do (&mts->mts[i], mts->n - i, me, exp);
2083 if (ss_equals_case (token->string, ss_cstr ("!onexpand")))
2085 else if (ss_equals_case (token->string, ss_cstr ("!offexpand")))
2086 *me->expand = false;
2088 macro_tokens_add (exp, mt);
2093 macro_call_expand (struct macro_call *mc, enum segmenter_mode segmenter_mode,
2094 struct macro_tokens *exp)
2096 assert (mc->state == MC_FINISHED);
2099 struct stringi_map vars = STRINGI_MAP_INITIALIZER (vars);
2100 struct macro_expansion_stack stack = {
2101 .name = mc->macro->name,
2102 .file_name = mc->macro->file_name,
2103 .first_line = mc->macro->first_line,
2104 .last_line = mc->macro->last_line,
2106 struct macro_expander me = {
2107 .macros = mc->macros,
2110 .segmenter_mode = segmenter_mode,
2114 .nesting_countdown = settings_get_mnest (),
2118 macro_expand (&mc->macro->body, &me, exp);
2120 stringi_map_destroy (&vars);