1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/lexer/macro.h"
25 #include "data/settings.h"
26 #include "language/lexer/lexer.h"
27 #include "language/lexer/segment.h"
28 #include "language/lexer/scan.h"
29 #include "libpspp/assertion.h"
30 #include "libpspp/cast.h"
31 #include "libpspp/i18n.h"
32 #include "libpspp/message.h"
33 #include "libpspp/str.h"
34 #include "libpspp/string-array.h"
35 #include "libpspp/stringi-map.h"
36 #include "libpspp/stringi-set.h"
38 #include "gl/c-ctype.h"
39 #include "gl/ftoastr.h"
42 #define _(msgid) gettext (msgid)
44 /* An entry in the stack of macros and macro directives being expanded. The
45 stack is maintained as a linked list. Entries are not dynamically allocated
46 but on the program stack.
48 The outermost entry, where 'next' is NULL, represents the source location of
49 the call to the macro. */
50 struct macro_expansion_stack
52 const struct macro_expansion_stack *next; /* Next outer stack entry. */
53 const char *name; /* A macro name or !IF, !DO, etc. */
54 const struct msg_location *location; /* Source location if available. */
57 /* Reports an error during macro expansion. STACK is the stack for reporting
58 the location of the error, MT is the optional token at which the error was
59 detected, and FORMAT along with the varargs is the message to report. */
60 static void PRINTF_FORMAT (3, 0)
61 macro_error_valist (const struct macro_expansion_stack *stack,
62 const struct macro_token *mt, const char *format,
65 struct msg_stack **ms = NULL;
66 size_t allocated_ms = 0;
69 const struct macro_expansion_stack *p;
70 for (p = stack; p && p->next; p = p->next)
72 if (n_ms >= allocated_ms)
73 ms = x2nrealloc (ms, &allocated_ms, sizeof *ms);
75 /* TRANSLATORS: These strings are used for explaining the context of an
76 error. The "While expanding" message appears first, followed by zero
77 or more of the "inside expansion" messages. `innermost',
78 `next_inner`, etc., are names of macros, and `foobar' is a piece of
81 foo.sps:12: At `foobar' in the expansion of 'innermost',
82 foo.sps:23: inside the expansion of 'next_inner',
83 foo.sps:34: inside the expansion of 'next_inner2',
84 foo.sps:45: inside the expansion of 'outermost',
85 foo.sps:76: This is the actual error message. */
89 if (mt && mt->syntax.length)
92 str_ellipsize (mt->syntax, syntax, sizeof syntax);
93 description = xasprintf (_("At `%s' in the expansion of `%s',"),
97 description = xasprintf (_("In the expansion of `%s',"), p->name);
100 description = xasprintf (_("inside the expansion of `%s',"), p->name);
102 ms[n_ms] = xmalloc (sizeof *ms[n_ms]);
103 *ms[n_ms] = (struct msg_stack) {
104 .location = msg_location_dup (p->location),
105 .description = description,
110 struct msg *m = xmalloc (sizeof *m);
112 .category = MSG_C_SYNTAX,
113 .severity = MSG_S_ERROR,
116 .location = msg_location_dup (p ? p->location : NULL),
117 .text = xvasprintf (format, args),
122 /* Reports an error during macro expansion. STACK is the stack for reporting
123 the location of the error, MT is the optional token at which the error was
124 detected, and FORMAT along with the varargs is the message to report. */
125 static void PRINTF_FORMAT (3, 4)
126 macro_error (const struct macro_expansion_stack *stack,
127 const struct macro_token *mt, const char *format, ...)
130 va_start (args, format);
131 macro_error_valist (stack, mt, format, args);
136 macro_token_copy (struct macro_token *dst, const struct macro_token *src)
138 token_copy (&dst->token, &src->token);
139 ss_alloc_substring (&dst->syntax, src->syntax);
143 macro_token_uninit (struct macro_token *mt)
145 token_uninit (&mt->token);
146 ss_dealloc (&mt->syntax);
150 macro_token_to_syntax (struct macro_token *mt, struct string *s)
152 ds_put_substring (s, mt->syntax);
155 is_macro_keyword (struct substring s)
157 static struct stringi_set keywords = STRINGI_SET_INITIALIZER (keywords);
158 if (stringi_set_is_empty (&keywords))
160 static const char *kws[] = {
181 for (size_t i = 0; i < sizeof kws / sizeof *kws; i++)
182 stringi_set_insert (&keywords, kws[i]);
185 ss_ltrim (&s, ss_cstr ("!"));
186 return stringi_set_contains_len (&keywords, s.string, s.length);
190 macro_tokens_copy (struct macro_tokens *dst, const struct macro_tokens *src)
192 *dst = (struct macro_tokens) {
193 .mts = xmalloc (src->n * sizeof *dst->mts),
197 for (size_t i = 0; i < src->n; i++)
198 macro_token_copy (&dst->mts[i], &src->mts[i]);
202 macro_tokens_uninit (struct macro_tokens *mts)
204 for (size_t i = 0; i < mts->n; i++)
205 macro_token_uninit (&mts->mts[i]);
210 macro_tokens_add_uninit (struct macro_tokens *mts)
212 if (mts->n >= mts->allocated)
213 mts->mts = x2nrealloc (mts->mts, &mts->allocated, sizeof *mts->mts);
214 return &mts->mts[mts->n++];
218 macro_tokens_add (struct macro_tokens *mts, const struct macro_token *mt)
220 macro_token_copy (macro_tokens_add_uninit (mts), mt);
223 /* Tokenizes SRC according to MODE and appends the tokens to MTS. Uses STACK,
224 if nonull, for error reporting. */
226 macro_tokens_from_string__ (struct macro_tokens *mts, const struct substring src,
227 enum segmenter_mode mode,
228 const struct macro_expansion_stack *stack)
230 struct segmenter segmenter = segmenter_init (mode, true);
231 struct substring body = src;
233 while (body.length > 0)
235 struct macro_token mt = {
236 .token = { .type = T_STOP },
237 .syntax = { .string = body.string },
239 struct token *token = &mt.token;
241 enum segment_type type;
242 int seg_len = segmenter_push (&segmenter, body.string,
243 body.length, true, &type);
244 assert (seg_len >= 0);
246 struct substring segment = ss_head (body, seg_len);
247 enum tokenize_result result = token_from_segment (type, segment, token);
248 ss_advance (&body, seg_len);
256 mt.syntax.length = body.string - mt.syntax.string;
257 macro_tokens_add (mts, &mt);
261 mt.syntax.length = body.string - mt.syntax.string;
262 macro_error (stack, &mt, "%s", token->string.string);
266 token_uninit (token);
270 /* Tokenizes SRC according to MODE and appends the tokens to MTS. */
272 macro_tokens_from_string (struct macro_tokens *mts, const struct substring src,
273 enum segmenter_mode mode)
275 macro_tokens_from_string__ (mts, src, mode, NULL);
279 macro_tokens_print (const struct macro_tokens *mts, FILE *stream)
281 for (size_t i = 0; i < mts->n; i++)
282 token_print (&mts->mts[i].token, stream);
287 TC_ENDCMD, /* No space before or after (new-line after). */
288 TC_BINOP, /* Space on both sides. */
289 TC_COMMA, /* Space afterward. */
290 TC_ID, /* Don't need spaces except sequentially. */
291 TC_PUNCT, /* Don't need spaces except sequentially. */
295 needs_space (enum token_class prev, enum token_class next)
297 /* Don't need a space before or after the end of a command.
298 (A new-line is needed afterward as a special case.) */
299 if (prev == TC_ENDCMD || next == TC_ENDCMD)
302 /* Binary operators always have a space on both sides. */
303 if (prev == TC_BINOP || next == TC_BINOP)
306 /* A comma always has a space afterward. */
307 if (prev == TC_COMMA)
310 /* Otherwise, PREV is TC_ID or TC_PUNCT, which only need a space if there are
311 two or them in a row. */
315 static enum token_class
316 classify_token (enum token_type type)
368 /* Appends syntax for the tokens in MTS to S. If OFS and LEN are nonnull, sets
369 OFS[i] to the offset within S of the start of token 'i' in MTS and LEN[i] to
370 its length. OFS[i] + LEN[i] is not necessarily OFS[i + 1] because some
371 tokens are separated by white space. */
373 macro_tokens_to_syntax (struct macro_tokens *mts, struct string *s,
374 size_t *ofs, size_t *len)
376 assert ((ofs != NULL) == (len != NULL));
381 for (size_t i = 0; i < mts->n; i++)
385 enum token_type prev = mts->mts[i - 1].token.type;
386 enum token_type next = mts->mts[i].token.type;
388 if (prev == T_ENDCMD)
389 ds_put_byte (s, '\n');
392 enum token_class pc = classify_token (prev);
393 enum token_class nc = classify_token (next);
394 if (needs_space (pc, nc))
395 ds_put_byte (s, ' ');
400 ofs[i] = s->ss.length;
401 macro_token_to_syntax (&mts->mts[i], s);
403 len[i] = s->ss.length - ofs[i];
408 macro_destroy (struct macro *m)
414 msg_location_destroy (m->location);
415 for (size_t i = 0; i < m->n_params; i++)
417 struct macro_param *p = &m->params[i];
420 macro_tokens_uninit (&p->def);
428 token_uninit (&p->charend);
432 token_uninit (&p->enclose[0]);
433 token_uninit (&p->enclose[1]);
441 macro_tokens_uninit (&m->body);
446 macro_set_create (void)
448 struct macro_set *set = xmalloc (sizeof *set);
449 *set = (struct macro_set) {
450 .macros = HMAP_INITIALIZER (set->macros),
456 macro_set_destroy (struct macro_set *set)
461 struct macro *macro, *next;
462 HMAP_FOR_EACH_SAFE (macro, next, struct macro, hmap_node, &set->macros)
464 hmap_delete (&set->macros, ¯o->hmap_node);
465 macro_destroy (macro);
467 hmap_destroy (&set->macros);
472 hash_macro_name (const char *name)
474 return utf8_hash_case_string (name, 0);
477 static struct macro *
478 macro_set_find__ (struct macro_set *set, const char *name)
480 if (macro_set_is_empty (set))
484 HMAP_FOR_EACH_WITH_HASH (macro, struct macro, hmap_node,
485 hash_macro_name (name), &set->macros)
486 if (!utf8_strcasecmp (macro->name, name))
493 macro_set_find (const struct macro_set *set, const char *name)
495 return macro_set_find__ (CONST_CAST (struct macro_set *, set), name);
498 /* Adds M to SET. M replaces any existing macro with the same name. Takes
501 macro_set_add (struct macro_set *set, struct macro *m)
503 struct macro *victim = macro_set_find__ (set, m->name);
506 hmap_delete (&set->macros, &victim->hmap_node);
507 macro_destroy (victim);
510 hmap_insert (&set->macros, &m->hmap_node, hash_macro_name (m->name));
513 /* Macro call parsing. */
520 /* Accumulating tokens in mc->params toward the end of any type of
524 /* Expecting the opening delimiter of an ARG_ENCLOSE argument. */
527 /* Expecting a keyword for a keyword argument. */
530 /* Expecting an equal sign for a keyword argument. */
533 /* Macro fully parsed and ready for expansion. */
537 /* Parsing macro calls. This is a FSM driven by macro_call_create() and
538 macro_call_add() to identify the macro being called and obtain its
539 arguments. 'state' identifies the FSM state. */
542 const struct macro_set *macros;
543 const struct macro *macro;
544 struct macro_tokens **args;
545 const struct macro_expansion_stack *stack;
549 const struct macro_param *param; /* Parameter currently being parsed. */
552 /* Completes macro expansion by initializing arguments that weren't supplied to
555 mc_finished (struct macro_call *mc)
557 mc->state = MC_FINISHED;
558 for (size_t i = 0; i < mc->macro->n_params; i++)
560 mc->args[i] = &mc->macro->params[i].def;
565 mc_next_arg (struct macro_call *mc)
569 assert (!mc->macro->n_params);
570 return mc_finished (mc);
572 else if (mc->param->positional)
575 if (mc->param >= &mc->macro->params[mc->macro->n_params])
576 return mc_finished (mc);
579 mc->state = (!mc->param->positional ? MC_KEYWORD
580 : mc->param->arg_type == ARG_ENCLOSE ? MC_ENCLOSE
587 for (size_t i = 0; i < mc->macro->n_params; i++)
590 mc->state = MC_KEYWORD;
593 return mc_finished (mc);
597 static void PRINTF_FORMAT (3, 4)
598 mc_error (const struct macro_call *mc, const struct msg_location *loc,
599 const char *format, ...)
602 va_start (args, format);
605 const struct macro_expansion_stack stack = { .location = loc };
606 macro_error_valist (&stack, NULL, format, args);
609 macro_error_valist (mc->stack, NULL, format, args);
614 mc_add_arg (struct macro_call *mc, const struct macro_token *mt,
615 const struct msg_location *loc)
617 const struct macro_param *p = mc->param;
619 const struct token *token = &mt->token;
620 if ((token->type == T_ENDCMD || token->type == T_STOP)
621 && p->arg_type != ARG_CMDEND)
624 _("Unexpected end of command reading argument %s "
625 "to macro %s."), mc->param->name, mc->macro->name);
627 mc->state = MC_ERROR;
633 struct macro_tokens **argp = &mc->args[p - mc->macro->params];
635 *argp = xzalloc (sizeof **argp);
636 struct macro_tokens *arg = *argp;
637 if (p->arg_type == ARG_N_TOKENS)
639 macro_tokens_add (arg, mt);
640 if (arg->n >= p->n_tokens)
641 return mc_next_arg (mc);
644 else if (p->arg_type == ARG_CMDEND)
646 if (token->type == T_ENDCMD || token->type == T_STOP)
647 return mc_next_arg (mc);
648 macro_tokens_add (arg, mt);
653 const struct token *end
654 = p->arg_type == ARG_CHAREND ? &p->charend : &p->enclose[1];
655 if (token_equal (token, end))
656 return mc_next_arg (mc);
657 macro_tokens_add (arg, mt);
663 mc_expected (struct macro_call *mc, const struct macro_token *actual,
664 const struct msg_location *loc, const struct token *expected)
666 const struct substring actual_s = (actual->syntax.length ? actual->syntax
667 : ss_cstr (_("<end of input>")));
668 char *expected_s = token_to_string (expected);
670 _("Found `%.*s' while expecting `%s' reading argument %s "
672 (int) actual_s.length, actual_s.string, expected_s,
673 mc->param->name, mc->macro->name);
676 mc->state = MC_ERROR;
681 mc_enclose (struct macro_call *mc, const struct macro_token *mt,
682 const struct msg_location *loc)
684 const struct token *token = &mt->token;
687 if (token_equal (&mc->param->enclose[0], token))
693 return mc_expected (mc, mt, loc, &mc->param->enclose[0]);
696 static const struct macro_param *
697 macro_find_parameter_by_name (const struct macro *m, struct substring name)
702 ss_ltrim (&name, ss_cstr ("!"));
704 for (size_t i = 0; i < m->n_params; i++)
706 const struct macro_param *p = &m->params[i];
707 struct substring p_name = ss_cstr (p->name + 1);
708 if (!utf8_strncasecmp (p_name.string, p_name.length,
709 name.string, name.length))
716 mc_keyword (struct macro_call *mc, const struct macro_token *mt,
717 const struct msg_location *loc)
719 const struct token *token = &mt->token;
720 if (token->type != T_ID)
721 return mc_finished (mc);
723 const struct macro_param *p = macro_find_parameter_by_name (mc->macro,
727 size_t arg_index = p - mc->macro->params;
729 if (mc->args[arg_index])
732 _("Argument %s multiply specified in call to macro %s."),
733 p->name, mc->macro->name);
734 mc->state = MC_ERROR;
739 mc->state = MC_EQUALS;
743 return mc_finished (mc);
747 mc_equals (struct macro_call *mc, const struct macro_token *mt,
748 const struct msg_location *loc)
750 const struct token *token = &mt->token;
753 if (token->type == T_EQUALS)
755 mc->state = mc->param->arg_type == ARG_ENCLOSE ? MC_ENCLOSE : MC_ARG;
759 return mc_expected (mc, mt, loc, &(struct token) { .type = T_EQUALS });
763 macro_call_create__ (const struct macro_set *macros,
764 const struct macro_expansion_stack *stack,
765 const struct token *token,
766 struct macro_call **mcp)
768 const struct macro *macro = (token->type == T_ID || token->type == T_MACRO_ID
769 ? macro_set_find (macros, token->string.string)
777 struct macro_call *mc = xmalloc (sizeof *mc);
778 *mc = (struct macro_call) {
782 .state = (!macro->n_params ? MC_FINISHED
783 : !macro->params[0].positional ? MC_KEYWORD
784 : macro->params[0].arg_type == ARG_ENCLOSE ? MC_ENCLOSE
786 .args = macro->n_params ? xcalloc (macro->n_params, sizeof *mc->args) : NULL,
787 .param = macro->params,
792 return mc->state == MC_FINISHED ? 1 : 0;
795 /* If TOKEN is the first token of a call to a macro in MACROS, create a new
796 macro expander, initializes *MCP to it. Returns 0 if more tokens are needed
797 and should be added via macro_call_add() or 1 if the caller should next call
798 macro_call_get_expansion().
800 If TOKEN is not the first token of a macro call, returns -1 and sets *MCP to
803 macro_call_create (const struct macro_set *macros,
804 const struct token *token,
805 struct macro_call **mcp)
807 return macro_call_create__ (macros, NULL, token, mcp);
811 macro_call_destroy (struct macro_call *mc)
816 for (size_t i = 0; i < mc->macro->n_params; i++)
818 struct macro_tokens *a = mc->args[i];
819 if (a && a != &mc->macro->params[i].def)
821 macro_tokens_uninit (a);
829 /* Adds TOKEN to the collection of tokens in MC that potentially need to be
832 Returns -1 if the tokens added do not actually invoke a macro. The caller
833 should consume the first token without expanding it. (Later tokens might
834 invoke a macro so it's best to feed the second token into a new expander.)
836 Returns 0 if the macro expander needs more tokens, for macro arguments or to
837 decide whether this is actually a macro invocation. The caller should call
838 macro_call_add() again with the next token.
840 Returns a positive number to indicate that the returned number of tokens
841 invoke a macro. The number returned might be less than the number of tokens
842 added because it can take a few tokens of lookahead to determine whether the
843 macro invocation is finished. The caller should call
844 macro_call_get_expansion() to obtain the expansion. */
846 macro_call_add (struct macro_call *mc, const struct macro_token *mt,
847 const struct msg_location *loc)
855 return mc_add_arg (mc, mt, loc);
858 return mc_enclose (mc, mt, loc);
861 return mc_keyword (mc, mt, loc);
864 return mc_equals (mc, mt, loc);
871 /* Macro expansion. */
873 struct macro_expander
875 /* Always available. */
876 const struct macro_set *macros; /* Macros to expand recursively. */
877 enum segmenter_mode segmenter_mode; /* Mode for tokenization. */
878 int nesting_countdown; /* Remaining nesting levels. */
879 const struct macro_expansion_stack *stack; /* Stack for error reporting. */
880 bool *expand; /* May macro calls be expanded? */
881 struct stringi_map *vars; /* Variables from !do and !let. */
883 /* Only nonnull if inside a !DO loop. */
884 bool *break_; /* Set to true to break out of loop. */
886 /* Only nonnull if expanding a macro (and not, say, a macro argument). */
887 const struct macro *macro;
888 struct macro_tokens **args;
892 macro_expand (const struct macro_token *mts, size_t n_mts,
893 const struct macro_expander *, struct macro_tokens *);
896 expand_macro_function (const struct macro_expander *me,
897 const struct macro_token *input, size_t n_input,
898 struct string *output);
900 /* Parses one function argument from the N_INPUT tokens in INPUT
901 Each argument to a macro function is one of:
903 - A quoted string or other single literal token.
905 - An argument to the macro being expanded, e.g. !1 or a named argument.
909 - A function invocation.
911 Each function invocation yields a character sequence to be turned into a
912 sequence of tokens. The case where that character sequence is a single
913 quoted string is an important special case.
916 parse_function_arg (const struct macro_expander *me,
917 const struct macro_token *input, size_t n_input,
920 assert (n_input > 0);
922 const struct token *token = &input[0].token;
923 if (token->type == T_MACRO_ID && me->macro)
925 const struct macro_param *param = macro_find_parameter_by_name (
926 me->macro, token->string);
929 size_t param_idx = param - me->macro->params;
930 macro_tokens_to_syntax (me->args[param_idx], farg, NULL, NULL);
934 if (ss_equals (token->string, ss_cstr ("!*")))
936 for (size_t i = 0; i < me->macro->n_params; i++)
938 if (!me->macro->params[i].positional)
941 ds_put_byte (farg, ' ');
942 macro_tokens_to_syntax (me->args[i], farg, NULL, NULL);
947 const char *var = stringi_map_find__ (me->vars,
948 token->string.string,
949 token->string.length);
952 ds_put_cstr (farg, var);
956 size_t n_function = expand_macro_function (me, input, n_input, farg);
961 ds_put_substring (farg, input[0].syntax);
966 parse_function_args (const struct macro_expander *me,
967 const struct macro_token *mts, size_t n,
968 const char *function,
969 struct string_array *args)
971 if (n < 2 || mts[1].token.type != T_LPAREN)
973 macro_error (me->stack, n > 1 ? &mts[1] : NULL,
974 _("`(' expected following %s."), function);
978 for (size_t i = 2; i < n; )
980 if (mts[i].token.type == T_RPAREN)
983 struct string s = DS_EMPTY_INITIALIZER;
984 i += parse_function_arg (me, mts + i, n - i, &s);
985 string_array_append_nocopy (args, ds_steal_cstr (&s));
989 else if (mts[i].token.type == T_COMMA)
991 else if (mts[i].token.type != T_RPAREN)
993 macro_error (me->stack, &mts[i],
994 _("`,' or `)' expected in call to macro function %s."),
1000 macro_error (me->stack, NULL, _("Missing `)' in call to macro function %s."),
1006 unquote_string (const char *s, enum segmenter_mode segmenter_mode,
1007 struct string *content)
1009 struct string_lexer slex;
1010 string_lexer_init (&slex, s, strlen (s), segmenter_mode, true);
1012 struct token token1;
1013 if (string_lexer_next (&slex, &token1) != SLR_TOKEN
1014 || token1.type != T_STRING)
1016 token_uninit (&token1);
1020 struct token token2;
1021 if (string_lexer_next (&slex, &token2) != SLR_END)
1023 token_uninit (&token1);
1024 token_uninit (&token2);
1028 ds_put_substring (content, token1.string);
1029 token_uninit (&token1);
1034 unquote_string_in_place (const char *s, enum segmenter_mode segmenter_mode,
1037 ds_init_empty (tmp);
1038 return unquote_string (s, segmenter_mode, tmp) ? ds_cstr (tmp) : s;
1042 parse_integer (const char *s, int *np)
1047 long int n = strtol (s, &tail, 10);
1048 *np = n < INT_MIN ? INT_MIN : n > INT_MAX ? INT_MAX : n;
1049 tail += strspn (tail, CC_SPACES);
1050 return *tail == '\0' && errno != ERANGE && n == *np;
1054 expand_macro_function (const struct macro_expander *me,
1055 const struct macro_token *input, size_t n_input,
1056 struct string *output)
1058 if (!n_input || input[0].token.type != T_MACRO_ID)
1061 struct macro_function
1067 enum macro_function_id
1082 static const struct macro_function mfs[] = {
1083 [MF_BLANKS] = { "!BLANKS", 1, 1 },
1084 [MF_CONCAT] = { "!CONCAT", 1, INT_MAX },
1085 [MF_EVAL] = { "!EVAL", 1, 1 },
1086 [MF_HEAD] = { "!HEAD", 1, 1 },
1087 [MF_INDEX] = { "!INDEX", 2, 2 },
1088 [MF_LENGTH] = { "!LENGTH", 1, 1 },
1089 [MF_NULL] = { "!NULL", 0, 0 },
1090 [MF_QUOTE] = { "!QUOTE", 1, 1 },
1091 [MF_SUBSTR] = { "!SUBSTR", 2, 3 },
1092 [MF_TAIL] = { "!TAIL", 1, 1 },
1093 [MF_UNQUOTE] = { "!UNQUOTE", 1, 1 },
1094 [MF_UPCASE] = { "!UPCASE", 1, 1 },
1097 /* Is this a macro function? */
1098 const struct macro_function *mf;
1099 for (mf = mfs; ; mf++)
1101 if (mf >= mfs + sizeof mfs / sizeof *mfs)
1103 /* Not a macro function. */
1107 if (lex_id_match_n (ss_cstr (mf->name), input[0].token.string, 4))
1111 enum macro_function_id id = mf - mfs;
1115 struct string_array args = STRING_ARRAY_INITIALIZER;
1116 size_t n_consumed = parse_function_args (me, input, n_input, mf->name, &args);
1120 if (args.n < mf->min_args || args.n > mf->max_args)
1122 if (mf->min_args == 1 && mf->max_args == 1)
1123 macro_error (me->stack, NULL,
1124 _("Macro function %s takes one argument (not %zu)."),
1126 else if (mf->min_args == 2 && mf->max_args == 2)
1127 macro_error (me->stack, NULL,
1128 _("Macro function %s takes two arguments (not %zu)."),
1130 else if (mf->min_args == 2 && mf->max_args == 3)
1131 macro_error (me->stack, NULL,
1132 _("Macro function %s takes two or three arguments "
1135 else if (mf->min_args == 1 && mf->max_args == INT_MAX)
1136 macro_error (me->stack, NULL,
1137 _("Macro function %s needs at least one argument."),
1147 ds_put_format (output, "%zu", strlen (args.strings[0]));
1153 if (!parse_integer (args.strings[0], &n))
1155 macro_error (me->stack, NULL,
1156 _("Argument to !BLANKS must be non-negative integer "
1157 "(not \"%s\")."), args.strings[0]);
1158 string_array_destroy (&args);
1162 ds_put_byte_multiple (output, ' ', n);
1167 for (size_t i = 0; i < args.n; i++)
1168 if (!unquote_string (args.strings[i], me->segmenter_mode, output))
1169 ds_put_cstr (output, args.strings[i]);
1175 const char *s = unquote_string_in_place (args.strings[0],
1176 me->segmenter_mode, &tmp);
1178 struct macro_tokens mts = { .n = 0 };
1179 macro_tokens_from_string__ (&mts, ss_cstr (s), me->segmenter_mode,
1182 ds_put_substring (output, mts.mts[0].syntax);
1183 macro_tokens_uninit (&mts);
1190 const char *haystack = args.strings[0];
1191 const char *needle = strstr (haystack, args.strings[1]);
1192 ds_put_format (output, "%zu", needle ? needle - haystack + 1 : 0);
1197 if (unquote_string (args.strings[0], me->segmenter_mode, NULL))
1198 ds_put_cstr (output, args.strings[0]);
1201 ds_extend (output, strlen (args.strings[0]) + 2);
1202 ds_put_byte (output, '\'');
1203 for (const char *p = args.strings[0]; *p; p++)
1206 ds_put_byte (output, '\'');
1207 ds_put_byte (output, *p);
1209 ds_put_byte (output, '\'');
1216 if (!parse_integer (args.strings[1], &start) || start < 1)
1218 macro_error (me->stack, NULL,
1219 _("Second argument of !SUBSTR must be "
1220 "positive integer (not \"%s\")."),
1222 string_array_destroy (&args);
1226 int count = INT_MAX;
1227 if (args.n > 2 && (!parse_integer (args.strings[2], &count) || count < 0))
1229 macro_error (me->stack, NULL,
1230 _("Third argument of !SUBSTR must be "
1231 "non-negative integer (not \"%s\")."),
1233 string_array_destroy (&args);
1237 struct substring s = ss_cstr (args.strings[0]);
1238 ds_put_substring (output, ss_substr (s, start - 1, count));
1245 const char *s = unquote_string_in_place (args.strings[0],
1246 me->segmenter_mode, &tmp);
1248 struct macro_tokens mts = { .n = 0 };
1249 macro_tokens_from_string__ (&mts, ss_cstr (s), me->segmenter_mode,
1253 struct macro_tokens tail = { .mts = mts.mts + 1, .n = mts.n - 1 };
1254 macro_tokens_to_syntax (&tail, output, NULL, NULL);
1256 macro_tokens_uninit (&mts);
1262 if (!unquote_string (args.strings[0], me->segmenter_mode, output))
1263 ds_put_cstr (output, args.strings[0]);
1269 const char *s = unquote_string_in_place (args.strings[0],
1270 me->segmenter_mode, &tmp);
1271 char *upper = utf8_to_upper (s);
1272 ds_put_cstr (output, upper);
1280 struct macro_tokens mts = { .n = 0 };
1281 macro_tokens_from_string__ (&mts, ss_cstr (args.strings[0]),
1282 me->segmenter_mode, me->stack);
1283 struct macro_tokens exp = { .n = 0 };
1284 struct macro_expansion_stack stack = {
1288 struct macro_expander subme = *me;
1289 subme.break_ = NULL;
1290 subme.stack = &stack;
1292 macro_expand (mts.mts, mts.n, &subme, &exp);
1293 macro_tokens_to_syntax (&exp, output, NULL, NULL);
1294 macro_tokens_uninit (&exp);
1295 macro_tokens_uninit (&mts);
1303 string_array_destroy (&args);
1307 static char *macro_evaluate_or (const struct macro_expander *me,
1308 const struct macro_token **tokens,
1309 const struct macro_token *end);
1312 macro_evaluate_literal (const struct macro_expander *me,
1313 const struct macro_token **tokens,
1314 const struct macro_token *end)
1316 const struct macro_token *p = *tokens;
1319 if (p->token.type == T_LPAREN)
1322 char *value = macro_evaluate_or (me, &p, end);
1325 if (p >= end || p->token.type != T_RPAREN)
1328 macro_error (me->stack, p < end ? p : NULL,
1329 _("Expecting ')' in macro expression."));
1336 else if (p->token.type == T_RPAREN)
1338 macro_error (me->stack, p, _("Expecting literal or function invocation "
1339 "in macro expression."));
1343 struct string function_output = DS_EMPTY_INITIALIZER;
1344 size_t function_consumed = parse_function_arg (me, p, end - p,
1346 struct string unquoted = DS_EMPTY_INITIALIZER;
1347 if (unquote_string (ds_cstr (&function_output), me->segmenter_mode,
1350 ds_swap (&function_output, &unquoted);
1351 ds_destroy (&unquoted);
1353 *tokens = p + function_consumed;
1354 return ds_steal_cstr (&function_output);
1357 /* Returns true if MT is valid as a macro operator. Only operators written as
1358 symbols (e.g. <>) are usable in macro expressions, not operator written as
1359 letters (e.g. EQ). */
1361 is_macro_operator (const struct macro_token *mt)
1363 return mt->syntax.length > 0 && !c_isalpha (mt->syntax.string[0]);
1366 static enum token_type
1367 parse_relational_op (const struct macro_token *mt)
1369 switch (mt->token.type)
1379 return is_macro_operator (mt) ? mt->token.type : T_STOP;
1382 return (ss_equals_case (mt->token.string, ss_cstr ("!EQ")) ? T_EQ
1383 : ss_equals_case (mt->token.string, ss_cstr ("!NE")) ? T_NE
1384 : ss_equals_case (mt->token.string, ss_cstr ("!LT")) ? T_LT
1385 : ss_equals_case (mt->token.string, ss_cstr ("!GT")) ? T_GT
1386 : ss_equals_case (mt->token.string, ss_cstr ("!LE")) ? T_LE
1387 : ss_equals_case (mt->token.string, ss_cstr ("!GE")) ? T_GE
1396 macro_evaluate_relational (const struct macro_expander *me,
1397 const struct macro_token **tokens,
1398 const struct macro_token *end)
1400 const struct macro_token *p = *tokens;
1401 char *lhs = macro_evaluate_literal (me, &p, end);
1405 enum token_type op = p >= end ? T_STOP : parse_relational_op (p);
1413 char *rhs = macro_evaluate_literal (me, &p, end);
1420 struct string lhs_tmp, rhs_tmp;
1421 int cmp = strcmp (unquote_string_in_place (lhs, me->segmenter_mode,
1423 unquote_string_in_place (rhs, me->segmenter_mode,
1425 ds_destroy (&lhs_tmp);
1426 ds_destroy (&rhs_tmp);
1431 bool b = (op == T_EQUALS || op == T_EQ ? !cmp
1433 : op == T_LT ? cmp < 0
1434 : op == T_GT ? cmp > 0
1435 : op == T_LE ? cmp <= 0
1436 : /* T_GE */ cmp >= 0);
1439 return xstrdup (b ? "1" : "0");
1443 macro_evaluate_not (const struct macro_expander *me,
1444 const struct macro_token **tokens,
1445 const struct macro_token *end)
1447 const struct macro_token *p = *tokens;
1449 unsigned int negations = 0;
1451 && (ss_equals_case (p->syntax, ss_cstr ("!NOT"))
1452 || ss_equals (p->syntax, ss_cstr ("~"))))
1458 char *operand = macro_evaluate_relational (me, &p, end);
1459 if (!operand || !negations)
1465 bool b = strcmp (operand, "0") ^ (negations & 1);
1468 return xstrdup (b ? "1" : "0");
1472 macro_evaluate_and (const struct macro_expander *me,
1473 const struct macro_token **tokens,
1474 const struct macro_token *end)
1476 const struct macro_token *p = *tokens;
1477 char *lhs = macro_evaluate_not (me, &p, end);
1482 && (ss_equals_case (p->syntax, ss_cstr ("!AND"))
1483 || ss_equals (p->syntax, ss_cstr ("&"))))
1486 char *rhs = macro_evaluate_not (me, &p, end);
1493 bool b = strcmp (lhs, "0") && strcmp (rhs, "0");
1496 lhs = xstrdup (b ? "1" : "0");
1503 macro_evaluate_or (const struct macro_expander *me,
1504 const struct macro_token **tokens,
1505 const struct macro_token *end)
1507 const struct macro_token *p = *tokens;
1508 char *lhs = macro_evaluate_and (me, &p, end);
1513 && (ss_equals_case (p->syntax, ss_cstr ("!OR"))
1514 || ss_equals (p->syntax, ss_cstr ("|"))))
1517 char *rhs = macro_evaluate_and (me, &p, end);
1524 bool b = strcmp (lhs, "0") || strcmp (rhs, "0");
1527 lhs = xstrdup (b ? "1" : "0");
1534 macro_evaluate_expression (const struct macro_token **tokens, size_t n_tokens,
1535 const struct macro_expander *me)
1537 return macro_evaluate_or (me, tokens, *tokens + n_tokens);
1541 macro_evaluate_number (const struct macro_token **tokens, size_t n_tokens,
1542 const struct macro_expander *me,
1545 char *s = macro_evaluate_expression (tokens, n_tokens, me);
1549 struct macro_tokens mts = { .n = 0 };
1550 macro_tokens_from_string__ (&mts, ss_cstr (s), me->segmenter_mode, me->stack);
1551 if (mts.n != 1 || !token_is_number (&mts.mts[0].token))
1553 macro_error (me->stack, mts.n > 0 ? &mts.mts[0] : NULL,
1554 _("Macro expression must evaluate to "
1555 "a number (not \"%s\")."), s);
1557 macro_tokens_uninit (&mts);
1561 *number = token_number (&mts.mts[0].token);
1563 macro_tokens_uninit (&mts);
1567 static const struct macro_token *
1568 find_ifend_clause (const struct macro_token *p, const struct macro_token *end)
1571 for (; p < end; p++)
1573 if (p->token.type != T_MACRO_ID)
1576 if (ss_equals_case (p->token.string, ss_cstr ("!IF")))
1578 else if (lex_id_match_n (p->token.string, ss_cstr ("!IFEND"), 4))
1584 else if (lex_id_match_n (p->token.string, ss_cstr ("!ELSE"), 4)
1592 macro_expand_if (const struct macro_token *tokens, size_t n_tokens,
1593 const struct macro_expander *me,
1594 struct macro_tokens *exp)
1596 const struct macro_token *p = tokens;
1597 const struct macro_token *end = tokens + n_tokens;
1599 if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!IF")))
1603 char *result = macro_evaluate_expression (&p, end - p, me);
1606 bool b = strcmp (result, "0");
1610 || p->token.type != T_MACRO_ID
1611 || !lex_id_match_n (p->token.string, ss_cstr ("!THEN"), 4))
1613 macro_error (me->stack, p < end ? p : NULL,
1614 _("!THEN expected in macro !IF construct."));
1618 const struct macro_token *start_then = p + 1;
1619 const struct macro_token *end_then = find_ifend_clause (start_then, end);
1622 macro_error (me->stack, NULL,
1623 _("!ELSE or !IFEND expected in macro !IF construct."));
1627 const struct macro_token *start_else, *end_if;
1628 if (lex_id_match_n (end_then->token.string, ss_cstr ("!ELSE"), 4))
1630 start_else = end_then + 1;
1631 end_if = find_ifend_clause (start_else, end);
1633 || !lex_id_match_n (end_if->token.string, ss_cstr ("!IFEND"), 4))
1635 macro_error (me->stack, end_if ? end_if : NULL,
1636 _("!IFEND expected in macro !IF construct."));
1646 const struct macro_token *start;
1651 n = end_then - start_then;
1653 else if (start_else)
1656 n = end_if - start_else;
1666 struct macro_expansion_stack stack = {
1670 struct macro_expander subme = *me;
1671 subme.stack = &stack;
1672 macro_expand (start, n, &subme, exp);
1674 return (end_if + 1) - tokens;
1678 macro_parse_let (const struct macro_token *tokens, size_t n_tokens,
1679 const struct macro_expander *me)
1681 const struct macro_token *p = tokens;
1682 const struct macro_token *end = tokens + n_tokens;
1684 if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!LET")))
1688 if (p >= end || p->token.type != T_MACRO_ID)
1690 macro_error (me->stack, p < end ? p : NULL,
1691 _("Expected macro variable name following !LET."));
1694 const struct substring var_name = p->token.string;
1695 if (is_macro_keyword (var_name)
1696 || macro_find_parameter_by_name (me->macro, var_name))
1698 macro_error (me->stack, p < end ? p : NULL,
1699 _("Cannot use argument name or macro keyword "
1700 "\"%.*s\" as !LET variable."),
1701 (int) var_name.length, var_name.string);
1706 if (p >= end || p->token.type != T_EQUALS)
1708 macro_error (me->stack, p < end ? p : NULL,
1709 _("Expected `=' following !LET."));
1714 char *value = macro_evaluate_expression (&p, end - p, me);
1718 stringi_map_replace_nocopy (me->vars, ss_xstrdup (var_name), value);
1722 static const struct macro_token *
1723 find_doend (const struct macro_expansion_stack *stack,
1724 const struct macro_token *p, const struct macro_token *end)
1727 for (; p < end; p++)
1729 if (p->token.type != T_MACRO_ID)
1732 if (ss_equals_case (p->token.string, ss_cstr ("!DO")))
1734 else if (lex_id_match_n (p->token.string, ss_cstr ("!DOEND"), 4))
1741 macro_error (stack, NULL, _("Missing !DOEND."));
1746 macro_expand_do (const struct macro_token *tokens, size_t n_tokens,
1747 const struct macro_expander *me,
1748 struct macro_tokens *exp)
1750 const struct macro_token *p = tokens;
1751 const struct macro_token *end = tokens + n_tokens;
1753 if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!DO")))
1757 if (p >= end || p->token.type != T_MACRO_ID)
1759 macro_error (me->stack, p < end ? p : NULL,
1760 _("Expected macro variable name following !DO."));
1763 const struct substring var_name = p->token.string;
1764 if (is_macro_keyword (var_name)
1765 || macro_find_parameter_by_name (me->macro, var_name))
1767 macro_error (me->stack, p, _("Cannot use argument name or macro "
1768 "keyword as !DO variable."));
1773 struct macro_expansion_stack substack = {
1777 bool break_ = false;
1778 struct macro_expander subme = *me;
1779 subme.break_ = &break_;
1780 subme.stack = &substack;
1782 int miterate = settings_get_miterate ();
1783 if (p < end && p->token.type == T_MACRO_ID
1784 && ss_equals_case (p->token.string, ss_cstr ("!IN")))
1787 char *list = macro_evaluate_expression (&p, end - p, &subme);
1791 struct macro_tokens items = { .n = 0 };
1792 macro_tokens_from_string__ (&items, ss_cstr (list), me->segmenter_mode,
1796 const struct macro_token *do_end = find_doend (subme.stack, p, end);
1799 macro_tokens_uninit (&items);
1803 for (size_t i = 0; i < items.n && !break_; i++)
1807 macro_error (&substack, NULL,
1808 _("!DO loop over list exceeded "
1809 "maximum number of iterations %d. "
1810 "(Use SET MITERATE to change the limit.)"),
1814 stringi_map_replace_nocopy (me->vars, ss_xstrdup (var_name),
1815 ss_xstrdup (items.mts[i].syntax));
1817 macro_expand (p, do_end - p, &subme, exp);
1819 return do_end - tokens + 1;
1821 else if (p < end && p->token.type == T_EQUALS)
1825 if (!macro_evaluate_number (&p, end - p, &subme, &first))
1828 if (p >= end || p->token.type != T_MACRO_ID
1829 || !ss_equals_case (p->token.string, ss_cstr ("!TO")))
1831 macro_error (subme.stack, p < end ? p : NULL,
1832 _("Expected !TO in numerical !DO loop."));
1838 if (!macro_evaluate_number (&p, end - p, &subme, &last))
1842 if (p < end && p->token.type == T_MACRO_ID
1843 && ss_equals_case (p->token.string, ss_cstr ("!BY")))
1846 if (!macro_evaluate_number (&p, end - p, &subme, &by))
1851 macro_error (subme.stack, NULL, _("!BY value cannot be zero."));
1856 const struct macro_token *do_end = find_doend (subme.stack, p, end);
1859 if ((by > 0 && first <= last) || (by < 0 && first >= last))
1862 for (double index = first;
1863 by > 0 ? (index <= last) : (index >= last) && !break_;
1868 macro_error (subme.stack, NULL,
1869 _("Numerical !DO loop exceeded "
1870 "maximum number of iterations %d. "
1871 "(Use SET MITERATE to change the limit.)"),
1876 char index_s[DBL_BUFSIZE_BOUND];
1877 c_dtoastr (index_s, sizeof index_s, 0, 0, index);
1878 stringi_map_replace_nocopy (me->vars, ss_xstrdup (var_name),
1881 macro_expand (p, do_end - p, &subme, exp);
1885 return do_end - tokens + 1;
1889 macro_error (me->stack, p < end ? p : NULL,
1890 _("Expected `=' or !IN in !DO loop."));
1896 macro_expand_arg (const struct macro_expander *me, size_t idx,
1897 struct macro_tokens *exp)
1899 const struct macro_param *param = &me->macro->params[idx];
1900 const struct macro_tokens *arg = me->args[idx];
1902 if (*me->expand && param->expand_arg)
1904 struct stringi_map vars = STRINGI_MAP_INITIALIZER (vars);
1905 struct macro_expansion_stack stack = {
1906 .name = param->name,
1909 struct macro_expander subme = {
1910 .macros = me->macros,
1913 .segmenter_mode = me->segmenter_mode,
1914 .expand = me->expand,
1917 .nesting_countdown = me->nesting_countdown,
1920 macro_expand (arg->mts, arg->n, &subme, exp);
1921 stringi_map_destroy (&vars);
1924 for (size_t i = 0; i < arg->n; i++)
1925 macro_tokens_add (exp, &arg->mts[i]);
1929 macro_expand__ (const struct macro_token *mts, size_t n,
1930 const struct macro_expander *me,
1931 struct macro_tokens *exp)
1933 const struct token *token = &mts[0].token;
1935 /* Recursive macro calls. */
1938 struct macro_call *submc;
1939 int n_call = macro_call_create__ (me->macros, me->stack, token, &submc);
1940 for (size_t j = 1; !n_call; j++)
1942 const struct macro_token endcmd
1943 = { .token = { .type = T_ENDCMD } };
1944 n_call = macro_call_add (submc, j < n ? &mts[j] : &endcmd, NULL);
1948 struct stringi_map vars = STRINGI_MAP_INITIALIZER (vars);
1949 struct macro_expansion_stack stack = {
1950 .name = submc->macro->name,
1951 .location = submc->macro->location,
1954 struct macro_expander subme = {
1955 .macros = submc->macros,
1956 .macro = submc->macro,
1957 .args = submc->args,
1958 .segmenter_mode = me->segmenter_mode,
1959 .expand = me->expand,
1962 .nesting_countdown = me->nesting_countdown - 1,
1965 const struct macro_tokens *body = &submc->macro->body;
1966 macro_expand (body->mts, body->n, &subme, exp);
1967 macro_call_destroy (submc);
1968 stringi_map_destroy (&vars);
1972 macro_call_destroy (submc);
1975 if (token->type != T_MACRO_ID)
1977 macro_tokens_add (exp, &mts[0]);
1984 const struct macro_param *param = macro_find_parameter_by_name (
1985 me->macro, token->string);
1988 macro_expand_arg (me, param - me->macro->params, exp);
1991 else if (ss_equals (token->string, ss_cstr ("!*")))
1993 for (size_t j = 0; j < me->macro->n_params; j++)
1994 macro_expand_arg (me, j, exp);
1999 /* Variables set by !DO or !LET. */
2000 const char *var = stringi_map_find__ (me->vars, token->string.string,
2001 token->string.length);
2004 macro_tokens_from_string__ (exp, ss_cstr (var),
2005 me->segmenter_mode, me->stack);
2009 /* Macro functions. */
2010 struct string function_output = DS_EMPTY_INITIALIZER;
2011 size_t n_function = expand_macro_function (me, mts, n, &function_output);
2014 macro_tokens_from_string__ (exp, function_output.ss,
2015 me->segmenter_mode, me->stack);
2016 ds_destroy (&function_output);
2021 size_t n_if = macro_expand_if (mts, n, me, exp);
2025 size_t n_let = macro_parse_let (mts, n, me);
2029 size_t n_do = macro_expand_do (mts, n, me, exp);
2033 if (lex_id_match_n (token->string, ss_cstr ("!break"), 4))
2038 macro_error (me->stack, &mts[0], _("!BREAK outside !DO."));
2040 else if (lex_id_match_n (token->string, ss_cstr ("!onexpand"), 4))
2042 else if (lex_id_match_n (token->string, ss_cstr ("!offexpand"), 4))
2043 *me->expand = false;
2045 macro_tokens_add (exp, &mts[0]);
2050 macro_expand (const struct macro_token *mts, size_t n,
2051 const struct macro_expander *me,
2052 struct macro_tokens *exp)
2054 if (me->nesting_countdown <= 0)
2056 macro_error (me->stack, NULL, _("Maximum nesting level %d exceeded. "
2057 "(Use SET MNEST to change the limit.)"),
2058 settings_get_mnest ());
2059 for (size_t i = 0; i < n; i++)
2060 macro_tokens_add (exp, &mts[i]);
2064 for (size_t i = 0; i < n; )
2066 if (me->break_ && *me->break_)
2069 size_t consumed = macro_expand__ (&mts[i], n - i, me, exp);
2070 assert (consumed > 0 && i + consumed <= n);
2076 macro_call_expand (struct macro_call *mc, enum segmenter_mode segmenter_mode,
2077 const struct msg_location *call_loc,
2078 struct macro_tokens *exp)
2080 assert (mc->state == MC_FINISHED);
2083 struct stringi_map vars = STRINGI_MAP_INITIALIZER (vars);
2084 struct macro_expansion_stack stack0 = {
2085 .location = call_loc,
2087 struct macro_expansion_stack stack1 = {
2089 .name = mc->macro->name,
2090 .location = mc->macro->location,
2092 struct macro_expander me = {
2093 .macros = mc->macros,
2096 .segmenter_mode = segmenter_mode,
2100 .nesting_countdown = settings_get_mnest (),
2104 const struct macro_tokens *body = &mc->macro->body;
2105 macro_expand (body->mts, body->n, &me, exp);
2107 stringi_map_destroy (&vars);