1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/lexer/macro.h"
25 #include "data/settings.h"
26 #include "language/lexer/lexer.h"
27 #include "language/lexer/segment.h"
28 #include "language/lexer/scan.h"
29 #include "libpspp/assertion.h"
30 #include "libpspp/cast.h"
31 #include "libpspp/i18n.h"
32 #include "libpspp/message.h"
33 #include "libpspp/str.h"
34 #include "libpspp/string-array.h"
35 #include "libpspp/stringi-map.h"
36 #include "libpspp/stringi-set.h"
38 #include "gl/c-ctype.h"
39 #include "gl/ftoastr.h"
42 #define _(msgid) gettext (msgid)
44 /* An entry in the stack of macros and macro directives being expanded. The
45 stack is maintained as a linked list. Entries are not dynamically allocated
46 but on the program stack.
48 The outermost entry, where 'next' is NULL, represents the source location of
49 the call to the macro. */
50 struct macro_expansion_stack
52 const struct macro_expansion_stack *next; /* Next outer stack entry. */
53 const char *name; /* A macro name or !IF, !DO, etc. */
54 const struct msg_location *location; /* Source location if available. */
57 /* Reports an error during macro expansion. STACK is the stack for reporting
58 the location of the error, MT is the optional token at which the error was
59 detected, and FORMAT along with the varargs is the message to report. */
60 static void PRINTF_FORMAT (3, 0)
61 macro_error_valist (const struct macro_expansion_stack *stack,
62 const struct macro_token *mt, const char *format,
65 struct msg_stack **ms = NULL;
66 size_t allocated_ms = 0;
69 const struct macro_expansion_stack *p;
70 for (p = stack; p && p->next; p = p->next)
72 if (n_ms >= allocated_ms)
73 ms = x2nrealloc (ms, &allocated_ms, sizeof *ms);
75 /* TRANSLATORS: These strings are used for explaining the context of an
76 error. The "While expanding" message appears first, followed by zero
77 or more of the "inside expansion" messages. `innermost',
78 `next_inner`, etc., are names of macros, and `foobar' is a piece of
81 foo.sps:12: At `foobar' in the expansion of 'innermost',
82 foo.sps:23: inside the expansion of 'next_inner',
83 foo.sps:34: inside the expansion of 'next_inner2',
84 foo.sps:45: inside the expansion of 'outermost',
85 foo.sps:76: This is the actual error message. */
89 if (mt && mt->syntax.length)
92 str_ellipsize (mt->syntax, syntax, sizeof syntax);
93 description = xasprintf (_("At `%s' in the expansion of `%s',"),
97 description = xasprintf (_("In the expansion of `%s',"), p->name);
100 description = xasprintf (_("inside the expansion of `%s',"), p->name);
102 ms[n_ms] = xmalloc (sizeof *ms[n_ms]);
103 *ms[n_ms] = (struct msg_stack) {
104 .location = msg_location_dup (p->location),
105 .description = description,
110 struct msg *m = xmalloc (sizeof *m);
112 .category = MSG_C_SYNTAX,
113 .severity = MSG_S_ERROR,
116 .location = msg_location_dup (p ? p->location : NULL),
117 .text = xvasprintf (format, args),
122 /* Reports an error during macro expansion. STACK is the stack for reporting
123 the location of the error, MT is the optional token at which the error was
124 detected, and FORMAT along with the varargs is the message to report. */
125 static void PRINTF_FORMAT (3, 4)
126 macro_error (const struct macro_expansion_stack *stack,
127 const struct macro_token *mt, const char *format, ...)
130 va_start (args, format);
131 macro_error_valist (stack, mt, format, args);
136 macro_token_copy (struct macro_token *dst, const struct macro_token *src)
138 token_copy (&dst->token, &src->token);
139 ss_alloc_substring (&dst->syntax, src->syntax);
143 macro_token_uninit (struct macro_token *mt)
145 token_uninit (&mt->token);
146 ss_dealloc (&mt->syntax);
150 macro_token_to_syntax (struct macro_token *mt, struct string *s)
152 ds_put_substring (s, mt->syntax);
155 is_macro_keyword (struct substring s)
157 static struct stringi_set keywords = STRINGI_SET_INITIALIZER (keywords);
158 if (stringi_set_is_empty (&keywords))
160 static const char *kws[] = {
181 for (size_t i = 0; i < sizeof kws / sizeof *kws; i++)
182 stringi_set_insert (&keywords, kws[i]);
185 ss_ltrim (&s, ss_cstr ("!"));
186 return stringi_set_contains_len (&keywords, s.string, s.length);
190 macro_tokens_copy (struct macro_tokens *dst, const struct macro_tokens *src)
192 *dst = (struct macro_tokens) {
193 .mts = xmalloc (src->n * sizeof *dst->mts),
197 for (size_t i = 0; i < src->n; i++)
198 macro_token_copy (&dst->mts[i], &src->mts[i]);
202 macro_tokens_uninit (struct macro_tokens *mts)
204 for (size_t i = 0; i < mts->n; i++)
205 macro_token_uninit (&mts->mts[i]);
210 macro_tokens_add_uninit (struct macro_tokens *mts)
212 if (mts->n >= mts->allocated)
213 mts->mts = x2nrealloc (mts->mts, &mts->allocated, sizeof *mts->mts);
214 return &mts->mts[mts->n++];
218 macro_tokens_add (struct macro_tokens *mts, const struct macro_token *mt)
220 macro_token_copy (macro_tokens_add_uninit (mts), mt);
223 /* Tokenizes SRC according to MODE and appends the tokens to MTS. Uses STACK,
224 if nonull, for error reporting. */
226 macro_tokens_from_string__ (struct macro_tokens *mts, const struct substring src,
227 enum segmenter_mode mode,
228 const struct macro_expansion_stack *stack)
230 struct segmenter segmenter = segmenter_init (mode, true);
231 struct substring body = src;
233 while (body.length > 0)
235 struct macro_token mt = {
236 .token = { .type = T_STOP },
237 .syntax = { .string = body.string },
239 struct token *token = &mt.token;
241 enum segment_type type;
242 int seg_len = segmenter_push (&segmenter, body.string,
243 body.length, true, &type);
244 assert (seg_len >= 0);
246 struct substring segment = ss_head (body, seg_len);
247 enum tokenize_result result = token_from_segment (type, segment, token);
248 ss_advance (&body, seg_len);
256 mt.syntax.length = body.string - mt.syntax.string;
257 macro_tokens_add (mts, &mt);
261 mt.syntax.length = body.string - mt.syntax.string;
262 macro_error (stack, &mt, "%s", token->string.string);
266 token_uninit (token);
270 /* Tokenizes SRC according to MODE and appends the tokens to MTS. */
272 macro_tokens_from_string (struct macro_tokens *mts, const struct substring src,
273 enum segmenter_mode mode)
275 macro_tokens_from_string__ (mts, src, mode, NULL);
279 macro_tokens_print (const struct macro_tokens *mts, FILE *stream)
281 for (size_t i = 0; i < mts->n; i++)
282 token_print (&mts->mts[i].token, stream);
287 TC_ENDCMD, /* No space before or after (new-line after). */
288 TC_BINOP, /* Space on both sides. */
289 TC_COMMA, /* Space afterward. */
290 TC_ID, /* Don't need spaces except sequentially. */
291 TC_PUNCT, /* Don't need spaces except sequentially. */
295 needs_space (enum token_class prev, enum token_class next)
297 /* Don't need a space before or after the end of a command.
298 (A new-line is needed afterward as a special case.) */
299 if (prev == TC_ENDCMD || next == TC_ENDCMD)
302 /* Binary operators always have a space on both sides. */
303 if (prev == TC_BINOP || next == TC_BINOP)
306 /* A comma always has a space afterward. */
307 if (prev == TC_COMMA)
310 /* Otherwise, PREV is TC_ID or TC_PUNCT, which only need a space if there are
311 two or them in a row. */
315 static enum token_class
316 classify_token (enum token_type type)
368 /* Appends syntax for the tokens in MTS to S. If OFS and LEN are nonnull, sets
369 OFS[i] to the offset within S of the start of token 'i' in MTS and LEN[i] to
370 its length. OFS[i] + LEN[i] is not necessarily OFS[i + 1] because some
371 tokens are separated by white space. */
373 macro_tokens_to_syntax (struct macro_tokens *mts, struct string *s,
374 size_t *ofs, size_t *len)
376 assert ((ofs != NULL) == (len != NULL));
381 for (size_t i = 0; i < mts->n; i++)
385 enum token_type prev = mts->mts[i - 1].token.type;
386 enum token_type next = mts->mts[i].token.type;
388 if (prev == T_ENDCMD)
389 ds_put_byte (s, '\n');
392 enum token_class pc = classify_token (prev);
393 enum token_class nc = classify_token (next);
394 if (needs_space (pc, nc))
395 ds_put_byte (s, ' ');
400 ofs[i] = s->ss.length;
401 macro_token_to_syntax (&mts->mts[i], s);
403 len[i] = s->ss.length - ofs[i];
408 macro_destroy (struct macro *m)
414 msg_location_destroy (m->location);
415 for (size_t i = 0; i < m->n_params; i++)
417 struct macro_param *p = &m->params[i];
420 macro_tokens_uninit (&p->def);
421 token_uninit (&p->start);
422 token_uninit (&p->end);
425 macro_tokens_uninit (&m->body);
430 macro_set_create (void)
432 struct macro_set *set = xmalloc (sizeof *set);
433 *set = (struct macro_set) {
434 .macros = HMAP_INITIALIZER (set->macros),
440 macro_set_destroy (struct macro_set *set)
445 struct macro *macro, *next;
446 HMAP_FOR_EACH_SAFE (macro, next, struct macro, hmap_node, &set->macros)
448 hmap_delete (&set->macros, ¯o->hmap_node);
449 macro_destroy (macro);
451 hmap_destroy (&set->macros);
456 hash_macro_name (const char *name)
458 return utf8_hash_case_string (name, 0);
461 static struct macro *
462 macro_set_find__ (struct macro_set *set, const char *name)
464 if (macro_set_is_empty (set))
468 HMAP_FOR_EACH_WITH_HASH (macro, struct macro, hmap_node,
469 hash_macro_name (name), &set->macros)
470 if (!utf8_strcasecmp (macro->name, name))
477 macro_set_find (const struct macro_set *set, const char *name)
479 return macro_set_find__ (CONST_CAST (struct macro_set *, set), name);
482 /* Adds M to SET. M replaces any existing macro with the same name. Takes
485 macro_set_add (struct macro_set *set, struct macro *m)
487 struct macro *victim = macro_set_find__ (set, m->name);
490 hmap_delete (&set->macros, &victim->hmap_node);
491 macro_destroy (victim);
494 hmap_insert (&set->macros, &m->hmap_node, hash_macro_name (m->name));
497 /* Macro call parsing. */
504 /* Accumulating tokens in mc->params toward the end of any type of
508 /* Expecting the opening delimiter of an ARG_ENCLOSE argument. */
511 /* Expecting a keyword for a keyword argument. */
514 /* Expecting an equal sign for a keyword argument. */
517 /* Macro fully parsed and ready for expansion. */
521 /* Parsing macro calls. This is a FSM driven by macro_call_create() and
522 macro_call_add() to identify the macro being called and obtain its
523 arguments. 'state' identifies the FSM state. */
526 const struct macro_set *macros;
527 const struct macro *macro;
528 struct macro_tokens **args;
529 const struct macro_expansion_stack *stack;
530 const struct macro_expander *me;
534 const struct macro_param *param; /* Parameter currently being parsed. */
537 static bool macro_expand_arg (const struct token *,
538 const struct macro_expander *,
539 struct macro_tokens *exp);
541 /* Completes macro expansion by initializing arguments that weren't supplied to
544 mc_finished (struct macro_call *mc)
546 mc->state = MC_FINISHED;
547 for (size_t i = 0; i < mc->macro->n_params; i++)
549 mc->args[i] = &mc->macro->params[i].def;
554 mc_next_arg (struct macro_call *mc)
558 assert (!mc->macro->n_params);
559 return mc_finished (mc);
561 else if (mc->param->positional)
564 if (mc->param >= &mc->macro->params[mc->macro->n_params])
565 return mc_finished (mc);
568 mc->state = (!mc->param->positional ? MC_KEYWORD
569 : mc->param->arg_type == ARG_ENCLOSE ? MC_ENCLOSE
576 for (size_t i = 0; i < mc->macro->n_params; i++)
579 mc->state = MC_KEYWORD;
582 return mc_finished (mc);
586 static void PRINTF_FORMAT (3, 4)
587 mc_error (const struct macro_call *mc, const struct msg_location *loc,
588 const char *format, ...)
591 va_start (args, format);
594 const struct macro_expansion_stack stack = { .location = loc };
595 macro_error_valist (&stack, NULL, format, args);
598 macro_error_valist (mc->stack, NULL, format, args);
603 mc_add_arg (struct macro_call *mc, const struct macro_token *mt,
604 const struct msg_location *loc)
606 const struct macro_param *p = mc->param;
608 const struct token *token = &mt->token;
609 if ((token->type == T_ENDCMD || token->type == T_STOP)
610 && p->arg_type != ARG_CMDEND)
613 _("Unexpected end of command reading argument %s "
614 "to macro %s."), mc->param->name, mc->macro->name);
616 mc->state = MC_ERROR;
622 struct macro_tokens **argp = &mc->args[p - mc->macro->params];
624 *argp = xzalloc (sizeof **argp);
626 bool add_token; /* Should we add 'mt' to the current arg? */
627 bool next_arg; /* Should we advance to the next arg? */
628 if (p->arg_type == ARG_N_TOKENS)
630 next_arg = (*argp)->n + 1 >= p->n_tokens;
635 next_arg = (p->arg_type == ARG_CMDEND
636 ? token->type == T_ENDCMD || token->type == T_STOP
637 : token_equal (token, &p->end));
638 add_token = !next_arg;
643 if (!macro_expand_arg (&mt->token, mc->me, *argp))
644 macro_tokens_add (*argp, mt);
646 return next_arg ? mc_next_arg (mc) : 0;
650 mc_expected (struct macro_call *mc, const struct macro_token *actual,
651 const struct msg_location *loc, const struct token *expected)
653 const struct substring actual_s = (actual->syntax.length ? actual->syntax
654 : ss_cstr (_("<end of input>")));
655 char *expected_s = token_to_string (expected);
657 _("Found `%.*s' while expecting `%s' reading argument %s "
659 (int) actual_s.length, actual_s.string, expected_s,
660 mc->param->name, mc->macro->name);
663 mc->state = MC_ERROR;
668 mc_enclose (struct macro_call *mc, const struct macro_token *mt,
669 const struct msg_location *loc)
671 const struct token *token = &mt->token;
674 if (token_equal (&mc->param->start, token))
680 return mc_expected (mc, mt, loc, &mc->param->start);
683 static const struct macro_param *
684 macro_find_parameter_by_name (const struct macro *m, struct substring name)
689 ss_ltrim (&name, ss_cstr ("!"));
691 for (size_t i = 0; i < m->n_params; i++)
693 const struct macro_param *p = &m->params[i];
694 struct substring p_name = ss_cstr (p->name + 1);
695 if (!utf8_strncasecmp (p_name.string, p_name.length,
696 name.string, name.length))
703 mc_keyword (struct macro_call *mc, const struct macro_token *mt,
704 const struct msg_location *loc)
706 const struct token *token = &mt->token;
707 if (token->type != T_ID)
708 return mc_finished (mc);
710 const struct macro_param *p = macro_find_parameter_by_name (mc->macro,
714 size_t arg_index = p - mc->macro->params;
716 if (mc->args[arg_index])
719 _("Argument %s multiply specified in call to macro %s."),
720 p->name, mc->macro->name);
721 mc->state = MC_ERROR;
726 mc->state = MC_EQUALS;
730 return mc_finished (mc);
734 mc_equals (struct macro_call *mc, const struct macro_token *mt,
735 const struct msg_location *loc)
737 const struct token *token = &mt->token;
740 if (token->type == T_EQUALS)
742 mc->state = mc->param->arg_type == ARG_ENCLOSE ? MC_ENCLOSE : MC_ARG;
746 return mc_expected (mc, mt, loc, &(struct token) { .type = T_EQUALS });
750 macro_call_create__ (const struct macro_set *macros,
751 const struct macro_expansion_stack *stack,
752 const struct macro_expander *me,
753 const struct token *token,
754 struct macro_call **mcp)
756 const struct macro *macro = (token->type == T_ID || token->type == T_MACRO_ID
757 ? macro_set_find (macros, token->string.string)
765 struct macro_call *mc = xmalloc (sizeof *mc);
766 *mc = (struct macro_call) {
770 .state = (!macro->n_params ? MC_FINISHED
771 : !macro->params[0].positional ? MC_KEYWORD
772 : macro->params[0].arg_type == ARG_ENCLOSE ? MC_ENCLOSE
774 .args = macro->n_params ? xcalloc (macro->n_params, sizeof *mc->args) : NULL,
775 .param = macro->params,
781 return mc->state == MC_FINISHED ? 1 : 0;
784 /* If TOKEN is the first token of a call to a macro in MACROS, create a new
785 macro expander, initializes *MCP to it. Returns 0 if more tokens are needed
786 and should be added via macro_call_add() or 1 if the caller should next call
787 macro_call_get_expansion().
789 If TOKEN is not the first token of a macro call, returns -1 and sets *MCP to
792 macro_call_create (const struct macro_set *macros,
793 const struct token *token,
794 struct macro_call **mcp)
796 return macro_call_create__ (macros, NULL, NULL, token, mcp);
800 macro_call_destroy (struct macro_call *mc)
805 for (size_t i = 0; i < mc->macro->n_params; i++)
807 struct macro_tokens *a = mc->args[i];
808 if (a && a != &mc->macro->params[i].def)
810 macro_tokens_uninit (a);
818 /* Adds TOKEN to the collection of tokens in MC that potentially need to be
821 Returns -1 if the tokens added do not actually invoke a macro. The caller
822 should consume the first token without expanding it. (Later tokens might
823 invoke a macro so it's best to feed the second token into a new expander.)
825 Returns 0 if the macro expander needs more tokens, for macro arguments or to
826 decide whether this is actually a macro invocation. The caller should call
827 macro_call_add() again with the next token.
829 Returns a positive number to indicate that the returned number of tokens
830 invoke a macro. The number returned might be less than the number of tokens
831 added because it can take a few tokens of lookahead to determine whether the
832 macro invocation is finished. The caller should call
833 macro_call_get_expansion() to obtain the expansion. */
835 macro_call_add (struct macro_call *mc, const struct macro_token *mt,
836 const struct msg_location *loc)
844 return mc_add_arg (mc, mt, loc);
847 return mc_enclose (mc, mt, loc);
850 return mc_keyword (mc, mt, loc);
853 return mc_equals (mc, mt, loc);
860 /* Macro expansion. */
862 struct macro_expander
864 /* Always available. */
865 const struct macro_set *macros; /* Macros to expand recursively. */
866 enum segmenter_mode segmenter_mode; /* Mode for tokenization. */
867 int nesting_countdown; /* Remaining nesting levels. */
868 const struct macro_expansion_stack *stack; /* Stack for error reporting. */
869 bool *expand; /* May macro calls be expanded? */
870 struct stringi_map *vars; /* Variables from !do and !let. */
872 /* Only nonnull if inside a !DO loop. */
873 bool *break_; /* Set to true to break out of loop. */
875 /* Only nonnull if expanding a macro (and not, say, a macro argument). */
876 const struct macro *macro;
877 struct macro_tokens **args;
881 macro_expand (const struct macro_token *mts, size_t n_mts,
882 const struct macro_expander *, struct macro_tokens *);
885 expand_macro_function (const struct macro_expander *me,
886 const struct macro_token *input, size_t n_input,
887 struct string *output);
889 /* Parses one function argument from the N_INPUT tokens in INPUT
890 Each argument to a macro function is one of:
892 - A quoted string or other single literal token.
894 - An argument to the macro being expanded, e.g. !1 or a named argument.
898 - A function invocation.
900 Each function invocation yields a character sequence to be turned into a
901 sequence of tokens. The case where that character sequence is a single
902 quoted string is an important special case.
905 parse_function_arg (const struct macro_expander *me,
906 const struct macro_token *input, size_t n_input,
909 assert (n_input > 0);
911 const struct token *token = &input[0].token;
912 if (token->type == T_MACRO_ID && me->macro)
914 const struct macro_param *param = macro_find_parameter_by_name (
915 me->macro, token->string);
918 size_t param_idx = param - me->macro->params;
919 macro_tokens_to_syntax (me->args[param_idx], farg, NULL, NULL);
923 if (ss_equals (token->string, ss_cstr ("!*")))
925 for (size_t i = 0; i < me->macro->n_params; i++)
927 if (!me->macro->params[i].positional)
930 ds_put_byte (farg, ' ');
931 macro_tokens_to_syntax (me->args[i], farg, NULL, NULL);
936 const char *var = stringi_map_find__ (me->vars,
937 token->string.string,
938 token->string.length);
941 ds_put_cstr (farg, var);
945 size_t n_function = expand_macro_function (me, input, n_input, farg);
950 ds_put_substring (farg, input[0].syntax);
955 parse_function_args (const struct macro_expander *me,
956 const struct macro_token *mts, size_t n,
957 const char *function,
958 struct string_array *args)
960 if (n < 2 || mts[1].token.type != T_LPAREN)
962 macro_error (me->stack, n > 1 ? &mts[1] : NULL,
963 _("`(' expected following %s."), function);
967 for (size_t i = 2; i < n; )
969 if (mts[i].token.type == T_RPAREN)
972 struct string s = DS_EMPTY_INITIALIZER;
973 i += parse_function_arg (me, mts + i, n - i, &s);
974 string_array_append_nocopy (args, ds_steal_cstr (&s));
978 else if (mts[i].token.type == T_COMMA)
980 else if (mts[i].token.type != T_RPAREN)
982 macro_error (me->stack, &mts[i],
983 _("`,' or `)' expected in call to macro function %s."),
989 macro_error (me->stack, NULL, _("Missing `)' in call to macro function %s."),
995 unquote_string (const char *s, enum segmenter_mode segmenter_mode,
996 struct string *content)
998 struct string_lexer slex;
999 string_lexer_init (&slex, s, strlen (s), segmenter_mode, true);
1001 struct token token1;
1002 if (string_lexer_next (&slex, &token1) != SLR_TOKEN
1003 || token1.type != T_STRING)
1005 token_uninit (&token1);
1009 struct token token2;
1010 if (string_lexer_next (&slex, &token2) != SLR_END)
1012 token_uninit (&token1);
1013 token_uninit (&token2);
1017 ds_put_substring (content, token1.string);
1018 token_uninit (&token1);
1023 unquote_string_in_place (const char *s, enum segmenter_mode segmenter_mode,
1026 ds_init_empty (tmp);
1027 return unquote_string (s, segmenter_mode, tmp) ? ds_cstr (tmp) : s;
1031 parse_integer (const char *s, int *np)
1036 long int n = strtol (s, &tail, 10);
1037 *np = n < INT_MIN ? INT_MIN : n > INT_MAX ? INT_MAX : n;
1038 tail += strspn (tail, CC_SPACES);
1039 return *tail == '\0' && errno != ERANGE && n == *np;
1043 expand_macro_function (const struct macro_expander *me,
1044 const struct macro_token *input, size_t n_input,
1045 struct string *output)
1047 if (!n_input || input[0].token.type != T_MACRO_ID)
1050 struct macro_function
1056 enum macro_function_id
1071 static const struct macro_function mfs[] = {
1072 [MF_BLANKS] = { "!BLANKS", 1, 1 },
1073 [MF_CONCAT] = { "!CONCAT", 1, INT_MAX },
1074 [MF_EVAL] = { "!EVAL", 1, 1 },
1075 [MF_HEAD] = { "!HEAD", 1, 1 },
1076 [MF_INDEX] = { "!INDEX", 2, 2 },
1077 [MF_LENGTH] = { "!LENGTH", 1, 1 },
1078 [MF_NULL] = { "!NULL", 0, 0 },
1079 [MF_QUOTE] = { "!QUOTE", 1, 1 },
1080 [MF_SUBSTR] = { "!SUBSTR", 2, 3 },
1081 [MF_TAIL] = { "!TAIL", 1, 1 },
1082 [MF_UNQUOTE] = { "!UNQUOTE", 1, 1 },
1083 [MF_UPCASE] = { "!UPCASE", 1, 1 },
1086 /* Is this a macro function? */
1087 const struct macro_function *mf;
1088 for (mf = mfs; ; mf++)
1090 if (mf >= mfs + sizeof mfs / sizeof *mfs)
1092 /* Not a macro function. */
1096 if (lex_id_match_n (ss_cstr (mf->name), input[0].token.string, 4))
1100 enum macro_function_id id = mf - mfs;
1104 struct string_array args = STRING_ARRAY_INITIALIZER;
1105 size_t n_consumed = parse_function_args (me, input, n_input, mf->name, &args);
1109 if (args.n < mf->min_args || args.n > mf->max_args)
1111 if (mf->min_args == 1 && mf->max_args == 1)
1112 macro_error (me->stack, NULL,
1113 _("Macro function %s takes one argument (not %zu)."),
1115 else if (mf->min_args == 2 && mf->max_args == 2)
1116 macro_error (me->stack, NULL,
1117 _("Macro function %s takes two arguments (not %zu)."),
1119 else if (mf->min_args == 2 && mf->max_args == 3)
1120 macro_error (me->stack, NULL,
1121 _("Macro function %s takes two or three arguments "
1124 else if (mf->min_args == 1 && mf->max_args == INT_MAX)
1125 macro_error (me->stack, NULL,
1126 _("Macro function %s needs at least one argument."),
1136 ds_put_format (output, "%zu", strlen (args.strings[0]));
1142 if (!parse_integer (args.strings[0], &n))
1144 macro_error (me->stack, NULL,
1145 _("Argument to !BLANKS must be non-negative integer "
1146 "(not \"%s\")."), args.strings[0]);
1147 string_array_destroy (&args);
1151 ds_put_byte_multiple (output, ' ', n);
1156 for (size_t i = 0; i < args.n; i++)
1157 if (!unquote_string (args.strings[i], me->segmenter_mode, output))
1158 ds_put_cstr (output, args.strings[i]);
1164 const char *s = unquote_string_in_place (args.strings[0],
1165 me->segmenter_mode, &tmp);
1167 struct macro_tokens mts = { .n = 0 };
1168 macro_tokens_from_string__ (&mts, ss_cstr (s), me->segmenter_mode,
1171 ds_put_substring (output, mts.mts[0].syntax);
1172 macro_tokens_uninit (&mts);
1179 const char *haystack = args.strings[0];
1180 const char *needle = strstr (haystack, args.strings[1]);
1181 ds_put_format (output, "%zu", needle ? needle - haystack + 1 : 0);
1186 if (unquote_string (args.strings[0], me->segmenter_mode, NULL))
1187 ds_put_cstr (output, args.strings[0]);
1190 ds_extend (output, strlen (args.strings[0]) + 2);
1191 ds_put_byte (output, '\'');
1192 for (const char *p = args.strings[0]; *p; p++)
1195 ds_put_byte (output, '\'');
1196 ds_put_byte (output, *p);
1198 ds_put_byte (output, '\'');
1205 if (!parse_integer (args.strings[1], &start) || start < 1)
1207 macro_error (me->stack, NULL,
1208 _("Second argument of !SUBSTR must be "
1209 "positive integer (not \"%s\")."),
1211 string_array_destroy (&args);
1215 int count = INT_MAX;
1216 if (args.n > 2 && (!parse_integer (args.strings[2], &count) || count < 0))
1218 macro_error (me->stack, NULL,
1219 _("Third argument of !SUBSTR must be "
1220 "non-negative integer (not \"%s\")."),
1222 string_array_destroy (&args);
1226 struct substring s = ss_cstr (args.strings[0]);
1227 ds_put_substring (output, ss_substr (s, start - 1, count));
1234 const char *s = unquote_string_in_place (args.strings[0],
1235 me->segmenter_mode, &tmp);
1237 struct macro_tokens mts = { .n = 0 };
1238 macro_tokens_from_string__ (&mts, ss_cstr (s), me->segmenter_mode,
1242 struct macro_tokens tail = { .mts = mts.mts + 1, .n = mts.n - 1 };
1243 macro_tokens_to_syntax (&tail, output, NULL, NULL);
1245 macro_tokens_uninit (&mts);
1251 if (!unquote_string (args.strings[0], me->segmenter_mode, output))
1252 ds_put_cstr (output, args.strings[0]);
1258 const char *s = unquote_string_in_place (args.strings[0],
1259 me->segmenter_mode, &tmp);
1260 char *upper = utf8_to_upper (s);
1261 ds_put_cstr (output, upper);
1269 struct macro_tokens mts = { .n = 0 };
1270 macro_tokens_from_string__ (&mts, ss_cstr (args.strings[0]),
1271 me->segmenter_mode, me->stack);
1272 struct macro_tokens exp = { .n = 0 };
1273 struct macro_expansion_stack stack = {
1277 struct macro_expander subme = *me;
1278 subme.break_ = NULL;
1279 subme.stack = &stack;
1281 macro_expand (mts.mts, mts.n, &subme, &exp);
1282 macro_tokens_to_syntax (&exp, output, NULL, NULL);
1283 macro_tokens_uninit (&exp);
1284 macro_tokens_uninit (&mts);
1292 string_array_destroy (&args);
1296 static char *macro_evaluate_or (const struct macro_expander *me,
1297 const struct macro_token **tokens,
1298 const struct macro_token *end);
1301 macro_evaluate_literal (const struct macro_expander *me,
1302 const struct macro_token **tokens,
1303 const struct macro_token *end)
1305 const struct macro_token *p = *tokens;
1308 if (p->token.type == T_LPAREN)
1311 char *value = macro_evaluate_or (me, &p, end);
1314 if (p >= end || p->token.type != T_RPAREN)
1317 macro_error (me->stack, p < end ? p : NULL,
1318 _("Expecting ')' in macro expression."));
1325 else if (p->token.type == T_RPAREN)
1327 macro_error (me->stack, p, _("Expecting literal or function invocation "
1328 "in macro expression."));
1332 struct string function_output = DS_EMPTY_INITIALIZER;
1333 size_t function_consumed = parse_function_arg (me, p, end - p,
1335 struct string unquoted = DS_EMPTY_INITIALIZER;
1336 if (unquote_string (ds_cstr (&function_output), me->segmenter_mode,
1339 ds_swap (&function_output, &unquoted);
1340 ds_destroy (&unquoted);
1342 *tokens = p + function_consumed;
1343 return ds_steal_cstr (&function_output);
1346 /* Returns true if MT is valid as a macro operator. Only operators written as
1347 symbols (e.g. <>) are usable in macro expressions, not operator written as
1348 letters (e.g. EQ). */
1350 is_macro_operator (const struct macro_token *mt)
1352 return mt->syntax.length > 0 && !c_isalpha (mt->syntax.string[0]);
1355 static enum token_type
1356 parse_relational_op (const struct macro_token *mt)
1358 switch (mt->token.type)
1368 return is_macro_operator (mt) ? mt->token.type : T_STOP;
1371 return (ss_equals_case (mt->token.string, ss_cstr ("!EQ")) ? T_EQ
1372 : ss_equals_case (mt->token.string, ss_cstr ("!NE")) ? T_NE
1373 : ss_equals_case (mt->token.string, ss_cstr ("!LT")) ? T_LT
1374 : ss_equals_case (mt->token.string, ss_cstr ("!GT")) ? T_GT
1375 : ss_equals_case (mt->token.string, ss_cstr ("!LE")) ? T_LE
1376 : ss_equals_case (mt->token.string, ss_cstr ("!GE")) ? T_GE
1385 macro_evaluate_relational (const struct macro_expander *me,
1386 const struct macro_token **tokens,
1387 const struct macro_token *end)
1389 const struct macro_token *p = *tokens;
1390 char *lhs = macro_evaluate_literal (me, &p, end);
1394 enum token_type op = p >= end ? T_STOP : parse_relational_op (p);
1402 char *rhs = macro_evaluate_literal (me, &p, end);
1409 struct string lhs_tmp, rhs_tmp;
1410 int cmp = strcmp (unquote_string_in_place (lhs, me->segmenter_mode,
1412 unquote_string_in_place (rhs, me->segmenter_mode,
1414 ds_destroy (&lhs_tmp);
1415 ds_destroy (&rhs_tmp);
1420 bool b = (op == T_EQUALS || op == T_EQ ? !cmp
1422 : op == T_LT ? cmp < 0
1423 : op == T_GT ? cmp > 0
1424 : op == T_LE ? cmp <= 0
1425 : /* T_GE */ cmp >= 0);
1428 return xstrdup (b ? "1" : "0");
1432 macro_evaluate_not (const struct macro_expander *me,
1433 const struct macro_token **tokens,
1434 const struct macro_token *end)
1436 const struct macro_token *p = *tokens;
1438 unsigned int negations = 0;
1440 && (ss_equals_case (p->syntax, ss_cstr ("!NOT"))
1441 || ss_equals (p->syntax, ss_cstr ("~"))))
1447 char *operand = macro_evaluate_relational (me, &p, end);
1448 if (!operand || !negations)
1454 bool b = strcmp (operand, "0") ^ (negations & 1);
1457 return xstrdup (b ? "1" : "0");
1461 macro_evaluate_and (const struct macro_expander *me,
1462 const struct macro_token **tokens,
1463 const struct macro_token *end)
1465 const struct macro_token *p = *tokens;
1466 char *lhs = macro_evaluate_not (me, &p, end);
1471 && (ss_equals_case (p->syntax, ss_cstr ("!AND"))
1472 || ss_equals (p->syntax, ss_cstr ("&"))))
1475 char *rhs = macro_evaluate_not (me, &p, end);
1482 bool b = strcmp (lhs, "0") && strcmp (rhs, "0");
1485 lhs = xstrdup (b ? "1" : "0");
1492 macro_evaluate_or (const struct macro_expander *me,
1493 const struct macro_token **tokens,
1494 const struct macro_token *end)
1496 const struct macro_token *p = *tokens;
1497 char *lhs = macro_evaluate_and (me, &p, end);
1502 && (ss_equals_case (p->syntax, ss_cstr ("!OR"))
1503 || ss_equals (p->syntax, ss_cstr ("|"))))
1506 char *rhs = macro_evaluate_and (me, &p, end);
1513 bool b = strcmp (lhs, "0") || strcmp (rhs, "0");
1516 lhs = xstrdup (b ? "1" : "0");
1523 macro_evaluate_expression (const struct macro_token **tokens, size_t n_tokens,
1524 const struct macro_expander *me)
1526 return macro_evaluate_or (me, tokens, *tokens + n_tokens);
1530 macro_evaluate_number (const struct macro_token **tokens, size_t n_tokens,
1531 const struct macro_expander *me,
1534 char *s = macro_evaluate_expression (tokens, n_tokens, me);
1538 struct macro_tokens mts = { .n = 0 };
1539 macro_tokens_from_string__ (&mts, ss_cstr (s), me->segmenter_mode, me->stack);
1540 if (mts.n != 1 || !token_is_number (&mts.mts[0].token))
1542 macro_error (me->stack, mts.n > 0 ? &mts.mts[0] : NULL,
1543 _("Macro expression must evaluate to "
1544 "a number (not \"%s\")."), s);
1546 macro_tokens_uninit (&mts);
1550 *number = token_number (&mts.mts[0].token);
1552 macro_tokens_uninit (&mts);
1556 static const struct macro_token *
1557 find_ifend_clause (const struct macro_token *p, const struct macro_token *end)
1560 for (; p < end; p++)
1562 if (p->token.type != T_MACRO_ID)
1565 if (ss_equals_case (p->token.string, ss_cstr ("!IF")))
1567 else if (lex_id_match_n (p->token.string, ss_cstr ("!IFEND"), 4))
1573 else if (lex_id_match_n (p->token.string, ss_cstr ("!ELSE"), 4)
1581 macro_expand_if (const struct macro_token *tokens, size_t n_tokens,
1582 const struct macro_expander *me,
1583 struct macro_tokens *exp)
1585 const struct macro_token *p = tokens;
1586 const struct macro_token *end = tokens + n_tokens;
1588 if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!IF")))
1592 char *result = macro_evaluate_expression (&p, end - p, me);
1595 bool b = strcmp (result, "0");
1599 || p->token.type != T_MACRO_ID
1600 || !lex_id_match_n (p->token.string, ss_cstr ("!THEN"), 4))
1602 macro_error (me->stack, p < end ? p : NULL,
1603 _("!THEN expected in macro !IF construct."));
1607 const struct macro_token *start_then = p + 1;
1608 const struct macro_token *end_then = find_ifend_clause (start_then, end);
1611 macro_error (me->stack, NULL,
1612 _("!ELSE or !IFEND expected in macro !IF construct."));
1616 const struct macro_token *start_else, *end_if;
1617 if (lex_id_match_n (end_then->token.string, ss_cstr ("!ELSE"), 4))
1619 start_else = end_then + 1;
1620 end_if = find_ifend_clause (start_else, end);
1622 || !lex_id_match_n (end_if->token.string, ss_cstr ("!IFEND"), 4))
1624 macro_error (me->stack, end_if ? end_if : NULL,
1625 _("!IFEND expected in macro !IF construct."));
1635 const struct macro_token *start;
1640 n = end_then - start_then;
1642 else if (start_else)
1645 n = end_if - start_else;
1655 struct macro_expansion_stack stack = {
1659 struct macro_expander subme = *me;
1660 subme.stack = &stack;
1661 macro_expand (start, n, &subme, exp);
1663 return (end_if + 1) - tokens;
1667 macro_parse_let (const struct macro_token *tokens, size_t n_tokens,
1668 const struct macro_expander *me)
1670 const struct macro_token *p = tokens;
1671 const struct macro_token *end = tokens + n_tokens;
1673 if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!LET")))
1677 if (p >= end || p->token.type != T_MACRO_ID)
1679 macro_error (me->stack, p < end ? p : NULL,
1680 _("Expected macro variable name following !LET."));
1683 const struct substring var_name = p->token.string;
1684 if (is_macro_keyword (var_name)
1685 || macro_find_parameter_by_name (me->macro, var_name))
1687 macro_error (me->stack, p < end ? p : NULL,
1688 _("Cannot use argument name or macro keyword "
1689 "\"%.*s\" as !LET variable."),
1690 (int) var_name.length, var_name.string);
1695 if (p >= end || p->token.type != T_EQUALS)
1697 macro_error (me->stack, p < end ? p : NULL,
1698 _("Expected `=' following !LET."));
1703 char *value = macro_evaluate_expression (&p, end - p, me);
1707 stringi_map_replace_nocopy (me->vars, ss_xstrdup (var_name), value);
1711 static const struct macro_token *
1712 find_doend (const struct macro_expansion_stack *stack,
1713 const struct macro_token *p, const struct macro_token *end)
1716 for (; p < end; p++)
1718 if (p->token.type != T_MACRO_ID)
1721 if (ss_equals_case (p->token.string, ss_cstr ("!DO")))
1723 else if (lex_id_match_n (p->token.string, ss_cstr ("!DOEND"), 4))
1730 macro_error (stack, NULL, _("Missing !DOEND."));
1735 macro_expand_do (const struct macro_token *tokens, size_t n_tokens,
1736 const struct macro_expander *me,
1737 struct macro_tokens *exp)
1739 const struct macro_token *p = tokens;
1740 const struct macro_token *end = tokens + n_tokens;
1742 if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!DO")))
1746 if (p >= end || p->token.type != T_MACRO_ID)
1748 macro_error (me->stack, p < end ? p : NULL,
1749 _("Expected macro variable name following !DO."));
1752 const struct substring var_name = p->token.string;
1753 if (is_macro_keyword (var_name)
1754 || macro_find_parameter_by_name (me->macro, var_name))
1756 macro_error (me->stack, p, _("Cannot use argument name or macro "
1757 "keyword as !DO variable."));
1762 struct macro_expansion_stack substack = {
1766 bool break_ = false;
1767 struct macro_expander subme = *me;
1768 subme.break_ = &break_;
1769 subme.stack = &substack;
1771 int miterate = settings_get_miterate ();
1772 if (p < end && p->token.type == T_MACRO_ID
1773 && ss_equals_case (p->token.string, ss_cstr ("!IN")))
1776 char *list = macro_evaluate_expression (&p, end - p, &subme);
1780 struct macro_tokens items = { .n = 0 };
1781 macro_tokens_from_string__ (&items, ss_cstr (list), me->segmenter_mode,
1785 const struct macro_token *do_end = find_doend (subme.stack, p, end);
1788 macro_tokens_uninit (&items);
1792 for (size_t i = 0; i < items.n && !break_; i++)
1796 macro_error (&substack, NULL,
1797 _("!DO loop over list exceeded "
1798 "maximum number of iterations %d. "
1799 "(Use SET MITERATE to change the limit.)"),
1803 stringi_map_replace_nocopy (me->vars, ss_xstrdup (var_name),
1804 ss_xstrdup (items.mts[i].syntax));
1806 macro_expand (p, do_end - p, &subme, exp);
1808 return do_end - tokens + 1;
1810 else if (p < end && p->token.type == T_EQUALS)
1814 if (!macro_evaluate_number (&p, end - p, &subme, &first))
1817 if (p >= end || p->token.type != T_MACRO_ID
1818 || !ss_equals_case (p->token.string, ss_cstr ("!TO")))
1820 macro_error (subme.stack, p < end ? p : NULL,
1821 _("Expected !TO in numerical !DO loop."));
1827 if (!macro_evaluate_number (&p, end - p, &subme, &last))
1831 if (p < end && p->token.type == T_MACRO_ID
1832 && ss_equals_case (p->token.string, ss_cstr ("!BY")))
1835 if (!macro_evaluate_number (&p, end - p, &subme, &by))
1840 macro_error (subme.stack, NULL, _("!BY value cannot be zero."));
1845 const struct macro_token *do_end = find_doend (subme.stack, p, end);
1848 if ((by > 0 && first <= last) || (by < 0 && first >= last))
1851 for (double index = first;
1852 by > 0 ? (index <= last) : (index >= last) && !break_;
1857 macro_error (subme.stack, NULL,
1858 _("Numerical !DO loop exceeded "
1859 "maximum number of iterations %d. "
1860 "(Use SET MITERATE to change the limit.)"),
1865 char index_s[DBL_BUFSIZE_BOUND];
1866 c_dtoastr (index_s, sizeof index_s, 0, 0, index);
1867 stringi_map_replace_nocopy (me->vars, ss_xstrdup (var_name),
1870 macro_expand (p, do_end - p, &subme, exp);
1874 return do_end - tokens + 1;
1878 macro_error (me->stack, p < end ? p : NULL,
1879 _("Expected `=' or !IN in !DO loop."));
1885 macro_expand_arg__ (const struct macro_expander *me, size_t idx,
1886 struct macro_tokens *exp)
1888 const struct macro_param *param = &me->macro->params[idx];
1889 const struct macro_tokens *arg = me->args[idx];
1891 if (*me->expand && param->expand_arg)
1893 struct stringi_map vars = STRINGI_MAP_INITIALIZER (vars);
1894 struct macro_expansion_stack stack = {
1895 .name = param->name,
1898 struct macro_expander subme = {
1899 .macros = me->macros,
1902 .segmenter_mode = me->segmenter_mode,
1903 .expand = me->expand,
1906 .nesting_countdown = me->nesting_countdown,
1909 macro_expand (arg->mts, arg->n, &subme, exp);
1910 stringi_map_destroy (&vars);
1913 for (size_t i = 0; i < arg->n; i++)
1914 macro_tokens_add (exp, &arg->mts[i]);
1918 macro_expand_arg (const struct token *token, const struct macro_expander *me,
1919 struct macro_tokens *exp)
1921 if (!me || token->type != T_MACRO_ID)
1924 /* Macro arguments. */
1927 const struct macro_param *param = macro_find_parameter_by_name (
1928 me->macro, token->string);
1931 macro_expand_arg__ (me, param - me->macro->params, exp);
1934 else if (ss_equals (token->string, ss_cstr ("!*")))
1936 for (size_t j = 0; j < me->macro->n_params; j++)
1937 macro_expand_arg__ (me, j, exp);
1942 /* Variables set by !DO or !LET. */
1943 const char *var = stringi_map_find__ (me->vars, token->string.string,
1944 token->string.length);
1947 macro_tokens_from_string__ (exp, ss_cstr (var),
1948 me->segmenter_mode, me->stack);
1956 macro_expand__ (const struct macro_token *mts, size_t n,
1957 const struct macro_expander *me,
1958 struct macro_tokens *exp)
1960 const struct token *token = &mts[0].token;
1962 /* Recursive macro calls. */
1965 struct macro_call *submc;
1966 int n_call = macro_call_create__ (me->macros, me->stack, me,
1968 for (size_t j = 1; !n_call; j++)
1970 const struct macro_token endcmd
1971 = { .token = { .type = T_ENDCMD } };
1972 n_call = macro_call_add (submc, j < n ? &mts[j] : &endcmd, NULL);
1976 struct stringi_map vars = STRINGI_MAP_INITIALIZER (vars);
1977 struct macro_expansion_stack stack = {
1978 .name = submc->macro->name,
1979 .location = submc->macro->location,
1982 struct macro_expander subme = {
1983 .macros = submc->macros,
1984 .macro = submc->macro,
1985 .args = submc->args,
1986 .segmenter_mode = me->segmenter_mode,
1987 .expand = me->expand,
1990 .nesting_countdown = me->nesting_countdown - 1,
1993 const struct macro_tokens *body = &submc->macro->body;
1994 macro_expand (body->mts, body->n, &subme, exp);
1995 macro_call_destroy (submc);
1996 stringi_map_destroy (&vars);
2000 macro_call_destroy (submc);
2003 if (token->type != T_MACRO_ID)
2005 macro_tokens_add (exp, &mts[0]);
2009 /* Parameters and macro variables. */
2010 if (macro_expand_arg (token, me, exp))
2013 /* Macro functions. */
2014 struct string function_output = DS_EMPTY_INITIALIZER;
2015 size_t n_function = expand_macro_function (me, mts, n, &function_output);
2018 macro_tokens_from_string__ (exp, function_output.ss,
2019 me->segmenter_mode, me->stack);
2020 ds_destroy (&function_output);
2025 size_t n_if = macro_expand_if (mts, n, me, exp);
2029 size_t n_let = macro_parse_let (mts, n, me);
2033 size_t n_do = macro_expand_do (mts, n, me, exp);
2037 if (lex_id_match_n (token->string, ss_cstr ("!break"), 4))
2042 macro_error (me->stack, &mts[0], _("!BREAK outside !DO."));
2044 else if (lex_id_match_n (token->string, ss_cstr ("!onexpand"), 4))
2046 else if (lex_id_match_n (token->string, ss_cstr ("!offexpand"), 4))
2047 *me->expand = false;
2049 macro_tokens_add (exp, &mts[0]);
2054 macro_expand (const struct macro_token *mts, size_t n,
2055 const struct macro_expander *me,
2056 struct macro_tokens *exp)
2058 if (me->nesting_countdown <= 0)
2060 macro_error (me->stack, NULL, _("Maximum nesting level %d exceeded. "
2061 "(Use SET MNEST to change the limit.)"),
2062 settings_get_mnest ());
2063 for (size_t i = 0; i < n; i++)
2064 macro_tokens_add (exp, &mts[i]);
2068 for (size_t i = 0; i < n; )
2070 if (me->break_ && *me->break_)
2073 size_t consumed = macro_expand__ (&mts[i], n - i, me, exp);
2074 assert (consumed > 0 && i + consumed <= n);
2080 macro_call_expand (struct macro_call *mc, enum segmenter_mode segmenter_mode,
2081 const struct msg_location *call_loc,
2082 struct macro_tokens *exp)
2084 assert (mc->state == MC_FINISHED);
2087 struct stringi_map vars = STRINGI_MAP_INITIALIZER (vars);
2088 struct macro_expansion_stack stack0 = {
2089 .location = call_loc,
2091 struct macro_expansion_stack stack1 = {
2093 .name = mc->macro->name,
2094 .location = mc->macro->location,
2096 struct macro_expander me = {
2097 .macros = mc->macros,
2100 .segmenter_mode = segmenter_mode,
2104 .nesting_countdown = settings_get_mnest (),
2108 const struct macro_tokens *body = &mc->macro->body;
2109 macro_expand (body->mts, body->n, &me, exp);
2111 stringi_map_destroy (&vars);