1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/lexer/macro.h"
25 #include "data/settings.h"
26 #include "language/lexer/lexer.h"
27 #include "language/lexer/segment.h"
28 #include "language/lexer/scan.h"
29 #include "libpspp/assertion.h"
30 #include "libpspp/cast.h"
31 #include "libpspp/i18n.h"
32 #include "libpspp/message.h"
33 #include "libpspp/str.h"
34 #include "libpspp/string-array.h"
35 #include "libpspp/stringi-map.h"
36 #include "libpspp/stringi-set.h"
38 #include "gl/c-ctype.h"
39 #include "gl/ftoastr.h"
42 #define _(msgid) gettext (msgid)
44 /* An entry in the stack of macros and macro directives being expanded. The
45 stack is maintained as a linked list. Entries are not dynamically allocated
46 but on the program stack.
48 The outermost entry, where 'next' is NULL, represents the source location of
49 the call to the macro. */
50 struct macro_expansion_stack
52 const struct macro_expansion_stack *next; /* Next outer stack entry. */
53 const char *name; /* A macro name or !IF, !DO, etc. */
54 const struct msg_location *location; /* Source location if available. */
57 /* Reports an error during macro expansion. STACK is the stack for reporting
58 the location of the error, MT is the optional token at which the error was
59 detected, and FORMAT along with the varargs is the message to report. */
60 static void PRINTF_FORMAT (3, 0)
61 macro_error_valist (const struct macro_expansion_stack *stack,
62 const struct macro_token *mt, const char *format,
65 struct msg_stack **ms = NULL;
66 size_t allocated_ms = 0;
69 const struct macro_expansion_stack *p;
70 for (p = stack; p && p->next; p = p->next)
72 if (n_ms >= allocated_ms)
73 ms = x2nrealloc (ms, &allocated_ms, sizeof *ms);
75 /* TRANSLATORS: These strings are used for explaining the context of an
76 error. The "While expanding" message appears first, followed by zero
77 or more of the "inside expansion" messages. `innermost',
78 `next_inner`, etc., are names of macros, and `foobar' is a piece of
81 foo.sps:12: At `foobar' in the expansion of 'innermost',
82 foo.sps:23: inside the expansion of 'next_inner',
83 foo.sps:34: inside the expansion of 'next_inner2',
84 foo.sps:45: inside the expansion of 'outermost',
85 foo.sps:76: This is the actual error message. */
89 if (mt && mt->syntax.length)
92 str_ellipsize (mt->syntax, syntax, sizeof syntax);
93 description = xasprintf (_("At `%s' in the expansion of `%s',"),
97 description = xasprintf (_("In the expansion of `%s',"), p->name);
100 description = xasprintf (_("inside the expansion of `%s',"), p->name);
102 ms[n_ms] = xmalloc (sizeof *ms[n_ms]);
103 *ms[n_ms] = (struct msg_stack) {
104 .location = msg_location_dup (p->location),
105 .description = description,
110 struct msg *m = xmalloc (sizeof *m);
112 .category = MSG_C_SYNTAX,
113 .severity = MSG_S_ERROR,
116 .location = msg_location_dup (p ? p->location : NULL),
117 .text = xvasprintf (format, args),
122 /* Reports an error during macro expansion. STACK is the stack for reporting
123 the location of the error, MT is the optional token at which the error was
124 detected, and FORMAT along with the varargs is the message to report. */
125 static void PRINTF_FORMAT (3, 4)
126 macro_error (const struct macro_expansion_stack *stack,
127 const struct macro_token *mt, const char *format, ...)
130 va_start (args, format);
131 macro_error_valist (stack, mt, format, args);
136 macro_token_copy (struct macro_token *dst, const struct macro_token *src)
138 token_copy (&dst->token, &src->token);
139 ss_alloc_substring (&dst->syntax, src->syntax);
143 macro_token_uninit (struct macro_token *mt)
145 token_uninit (&mt->token);
146 ss_dealloc (&mt->syntax);
150 macro_token_to_syntax (struct macro_token *mt, struct string *s)
152 ds_put_substring (s, mt->syntax);
155 is_macro_keyword (struct substring s)
157 static struct stringi_set keywords = STRINGI_SET_INITIALIZER (keywords);
158 if (stringi_set_is_empty (&keywords))
160 static const char *kws[] = {
181 for (size_t i = 0; i < sizeof kws / sizeof *kws; i++)
182 stringi_set_insert (&keywords, kws[i]);
185 ss_ltrim (&s, ss_cstr ("!"));
186 return stringi_set_contains_len (&keywords, s.string, s.length);
190 macro_tokens_copy (struct macro_tokens *dst, const struct macro_tokens *src)
192 *dst = (struct macro_tokens) {
193 .mts = xmalloc (src->n * sizeof *dst->mts),
197 for (size_t i = 0; i < src->n; i++)
198 macro_token_copy (&dst->mts[i], &src->mts[i]);
202 macro_tokens_uninit (struct macro_tokens *mts)
204 for (size_t i = 0; i < mts->n; i++)
205 macro_token_uninit (&mts->mts[i]);
210 macro_tokens_add_uninit (struct macro_tokens *mts)
212 if (mts->n >= mts->allocated)
213 mts->mts = x2nrealloc (mts->mts, &mts->allocated, sizeof *mts->mts);
214 return &mts->mts[mts->n++];
218 macro_tokens_add (struct macro_tokens *mts, const struct macro_token *mt)
220 macro_token_copy (macro_tokens_add_uninit (mts), mt);
223 /* Tokenizes SRC according to MODE and appends the tokens to MTS. Uses STACK,
224 if nonull, for error reporting. */
226 macro_tokens_from_string__ (struct macro_tokens *mts, const struct substring src,
227 enum segmenter_mode mode,
228 const struct macro_expansion_stack *stack)
230 struct segmenter segmenter = segmenter_init (mode, true);
231 struct substring body = src;
233 while (body.length > 0)
235 struct macro_token mt = {
236 .token = { .type = T_STOP },
237 .syntax = { .string = body.string },
239 struct token *token = &mt.token;
241 enum segment_type type;
242 int seg_len = segmenter_push (&segmenter, body.string,
243 body.length, true, &type);
244 assert (seg_len >= 0);
246 struct substring segment = ss_head (body, seg_len);
247 enum tokenize_result result = token_from_segment (type, segment, token);
248 ss_advance (&body, seg_len);
256 mt.syntax.length = body.string - mt.syntax.string;
257 macro_tokens_add (mts, &mt);
261 mt.syntax.length = body.string - mt.syntax.string;
262 macro_error (stack, &mt, "%s", token->string.string);
266 token_uninit (token);
270 /* Tokenizes SRC according to MODE and appends the tokens to MTS. */
272 macro_tokens_from_string (struct macro_tokens *mts, const struct substring src,
273 enum segmenter_mode mode)
275 macro_tokens_from_string__ (mts, src, mode, NULL);
279 macro_tokens_print (const struct macro_tokens *mts, FILE *stream)
281 for (size_t i = 0; i < mts->n; i++)
282 token_print (&mts->mts[i].token, stream);
287 TC_ENDCMD, /* No space before or after (new-line after). */
288 TC_BINOP, /* Space on both sides. */
289 TC_COMMA, /* Space afterward. */
290 TC_ID, /* Don't need spaces except sequentially. */
291 TC_PUNCT, /* Don't need spaces except sequentially. */
295 needs_space (enum token_class prev, enum token_class next)
297 /* Don't need a space before or after the end of a command.
298 (A new-line is needed afterward as a special case.) */
299 if (prev == TC_ENDCMD || next == TC_ENDCMD)
302 /* Binary operators always have a space on both sides. */
303 if (prev == TC_BINOP || next == TC_BINOP)
306 /* A comma always has a space afterward. */
307 if (prev == TC_COMMA)
310 /* Otherwise, PREV is TC_ID or TC_PUNCT, which only need a space if there are
311 two or them in a row. */
315 static enum token_class
316 classify_token (enum token_type type)
368 /* Appends syntax for the tokens in MTS to S. If OFS and LEN are nonnull, sets
369 OFS[i] to the offset within S of the start of token 'i' in MTS and LEN[i] to
370 its length. OFS[i] + LEN[i] is not necessarily OFS[i + 1] because some
371 tokens are separated by white space. */
373 macro_tokens_to_syntax (struct macro_tokens *mts, struct string *s,
374 size_t *ofs, size_t *len)
376 assert ((ofs != NULL) == (len != NULL));
381 for (size_t i = 0; i < mts->n; i++)
385 enum token_type prev = mts->mts[i - 1].token.type;
386 enum token_type next = mts->mts[i].token.type;
388 if (prev == T_ENDCMD)
389 ds_put_byte (s, '\n');
392 enum token_class pc = classify_token (prev);
393 enum token_class nc = classify_token (next);
394 if (needs_space (pc, nc))
395 ds_put_byte (s, ' ');
400 ofs[i] = s->ss.length;
401 macro_token_to_syntax (&mts->mts[i], s);
403 len[i] = s->ss.length - ofs[i];
408 macro_destroy (struct macro *m)
414 msg_location_destroy (m->location);
415 for (size_t i = 0; i < m->n_params; i++)
417 struct macro_param *p = &m->params[i];
420 macro_tokens_uninit (&p->def);
428 token_uninit (&p->charend);
432 token_uninit (&p->enclose[0]);
433 token_uninit (&p->enclose[1]);
441 macro_tokens_uninit (&m->body);
446 macro_set_create (void)
448 struct macro_set *set = xmalloc (sizeof *set);
449 *set = (struct macro_set) {
450 .macros = HMAP_INITIALIZER (set->macros),
456 macro_set_destroy (struct macro_set *set)
461 struct macro *macro, *next;
462 HMAP_FOR_EACH_SAFE (macro, next, struct macro, hmap_node, &set->macros)
464 hmap_delete (&set->macros, ¯o->hmap_node);
465 macro_destroy (macro);
467 hmap_destroy (&set->macros);
472 hash_macro_name (const char *name)
474 return utf8_hash_case_string (name, 0);
477 static struct macro *
478 macro_set_find__ (struct macro_set *set, const char *name)
480 if (macro_set_is_empty (set))
484 HMAP_FOR_EACH_WITH_HASH (macro, struct macro, hmap_node,
485 hash_macro_name (name), &set->macros)
486 if (!utf8_strcasecmp (macro->name, name))
493 macro_set_find (const struct macro_set *set, const char *name)
495 return macro_set_find__ (CONST_CAST (struct macro_set *, set), name);
498 /* Adds M to SET. M replaces any existing macro with the same name. Takes
501 macro_set_add (struct macro_set *set, struct macro *m)
503 struct macro *victim = macro_set_find__ (set, m->name);
506 hmap_delete (&set->macros, &victim->hmap_node);
507 macro_destroy (victim);
510 hmap_insert (&set->macros, &m->hmap_node, hash_macro_name (m->name));
513 /* Macro call parsing.. */
520 /* Accumulating tokens in mc->params toward the end of any type of
524 /* Expecting the opening delimiter of an ARG_ENCLOSE argument. */
527 /* Expecting a keyword for a keyword argument. */
530 /* Expecting an equal sign for a keyword argument. */
533 /* Macro fully parsed and ready for expansion. */
537 /* Parsing macro calls. This is a FSM driven by macro_call_create() and
538 macro_call_add() to identify the macro being called and obtain its
539 arguments. 'state' identifies the FSM state. */
542 const struct macro_set *macros;
543 const struct macro *macro;
544 struct macro_tokens **args;
545 const struct macro_expansion_stack *stack;
549 const struct macro_param *param; /* Parameter currently being parsed. */
552 /* Completes macro expansion by initializing arguments that weren't supplied to
555 mc_finished (struct macro_call *mc)
557 mc->state = MC_FINISHED;
558 for (size_t i = 0; i < mc->macro->n_params; i++)
560 mc->args[i] = &mc->macro->params[i].def;
565 mc_next_arg (struct macro_call *mc)
569 assert (!mc->macro->n_params);
570 return mc_finished (mc);
572 else if (mc->param->positional)
575 if (mc->param >= &mc->macro->params[mc->macro->n_params])
576 return mc_finished (mc);
579 mc->state = (!mc->param->positional ? MC_KEYWORD
580 : mc->param->arg_type == ARG_ENCLOSE ? MC_ENCLOSE
587 for (size_t i = 0; i < mc->macro->n_params; i++)
590 mc->state = MC_KEYWORD;
593 return mc_finished (mc);
597 static void PRINTF_FORMAT (3, 4)
598 mc_error (const struct macro_call *mc, const struct msg_location *loc,
599 const char *format, ...)
602 va_start (args, format);
605 const struct macro_expansion_stack stack = { .location = loc };
606 macro_error_valist (&stack, NULL, format, args);
609 macro_error_valist (mc->stack, NULL, format, args);
614 mc_add_arg (struct macro_call *mc, const struct macro_token *mt,
615 const struct msg_location *loc)
617 const struct macro_param *p = mc->param;
619 const struct token *token = &mt->token;
620 if ((token->type == T_ENDCMD || token->type == T_STOP)
621 && p->arg_type != ARG_CMDEND)
624 _("Unexpected end of command reading argument %s "
625 "to macro %s."), mc->param->name, mc->macro->name);
627 mc->state = MC_ERROR;
633 struct macro_tokens **argp = &mc->args[p - mc->macro->params];
635 *argp = xzalloc (sizeof **argp);
636 struct macro_tokens *arg = *argp;
637 if (p->arg_type == ARG_N_TOKENS)
639 macro_tokens_add (arg, mt);
640 if (arg->n >= p->n_tokens)
641 return mc_next_arg (mc);
644 else if (p->arg_type == ARG_CMDEND)
646 if (token->type == T_ENDCMD || token->type == T_STOP)
647 return mc_next_arg (mc);
648 macro_tokens_add (arg, mt);
653 const struct token *end
654 = p->arg_type == ARG_CHAREND ? &p->charend : &p->enclose[1];
655 if (token_equal (token, end))
656 return mc_next_arg (mc);
657 macro_tokens_add (arg, mt);
663 mc_expected (struct macro_call *mc, const struct macro_token *actual,
664 const struct msg_location *loc, const struct token *expected)
666 const struct substring actual_s = (actual->syntax.length ? actual->syntax
667 : ss_cstr (_("<end of input>")));
668 char *expected_s = token_to_string (expected);
670 _("Found `%.*s' while expecting `%s' reading argument %s "
672 (int) actual_s.length, actual_s.string, expected_s,
673 mc->param->name, mc->macro->name);
676 mc->state = MC_ERROR;
681 mc_enclose (struct macro_call *mc, const struct macro_token *mt,
682 const struct msg_location *loc)
684 const struct token *token = &mt->token;
687 if (token_equal (&mc->param->enclose[0], token))
693 return mc_expected (mc, mt, loc, &mc->param->enclose[0]);
696 static const struct macro_param *
697 macro_find_parameter_by_name (const struct macro *m, struct substring name)
702 ss_ltrim (&name, ss_cstr ("!"));
704 for (size_t i = 0; i < m->n_params; i++)
706 const struct macro_param *p = &m->params[i];
707 struct substring p_name = ss_cstr (p->name + 1);
708 if (!utf8_strncasecmp (p_name.string, p_name.length,
709 name.string, name.length))
716 mc_keyword (struct macro_call *mc, const struct macro_token *mt,
717 const struct msg_location *loc)
719 const struct token *token = &mt->token;
720 if (token->type != T_ID)
721 return mc_finished (mc);
723 const struct macro_param *p = macro_find_parameter_by_name (mc->macro,
727 size_t arg_index = p - mc->macro->params;
729 if (mc->args[arg_index])
732 _("Argument %s multiply specified in call to macro %s."),
733 p->name, mc->macro->name);
734 mc->state = MC_ERROR;
739 mc->state = MC_EQUALS;
743 return mc_finished (mc);
747 mc_equals (struct macro_call *mc, const struct macro_token *mt,
748 const struct msg_location *loc)
750 const struct token *token = &mt->token;
753 if (token->type == T_EQUALS)
759 return mc_expected (mc, mt, loc, &(struct token) { .type = T_EQUALS });
763 macro_call_create__ (const struct macro_set *macros,
764 const struct macro_expansion_stack *stack,
765 const struct token *token,
766 struct macro_call **mcp)
768 const struct macro *macro = (token->type == T_ID || token->type == T_MACRO_ID
769 ? macro_set_find (macros, token->string.string)
777 struct macro_call *mc = xmalloc (sizeof *mc);
778 *mc = (struct macro_call) {
782 .state = (!macro->n_params ? MC_FINISHED
783 : !macro->params[0].positional ? MC_KEYWORD
784 : macro->params[0].arg_type == ARG_ENCLOSE ? MC_ENCLOSE
786 .args = macro->n_params ? xcalloc (macro->n_params, sizeof *mc->args) : NULL,
787 .param = macro->params,
792 return mc->state == MC_FINISHED ? 1 : 0;
795 /* If TOKEN is the first token of a call to a macro in MACROS, create a new
796 macro expander, initializes *MCP to it. Returns 0 if more tokens are needed
797 and should be added via macro_call_add() or 1 if the caller should next call
798 macro_call_get_expansion().
800 If TOKEN is not the first token of a macro call, returns -1 and sets *MCP to
803 macro_call_create (const struct macro_set *macros,
804 const struct token *token,
805 struct macro_call **mcp)
807 return macro_call_create__ (macros, NULL, token, mcp);
811 macro_call_destroy (struct macro_call *mc)
816 for (size_t i = 0; i < mc->macro->n_params; i++)
818 struct macro_tokens *a = mc->args[i];
819 if (a && a != &mc->macro->params[i].def)
821 macro_tokens_uninit (a);
829 /* Adds TOKEN to the collection of tokens in MC that potentially need to be
832 Returns -1 if the tokens added do not actually invoke a macro. The caller
833 should consume the first token without expanding it. (Later tokens might
834 invoke a macro so it's best to feed the second token into a new expander.)
836 Returns 0 if the macro expander needs more tokens, for macro arguments or to
837 decide whether this is actually a macro invocation. The caller should call
838 macro_call_add() again with the next token.
840 Returns a positive number to indicate that the returned number of tokens
841 invoke a macro. The number returned might be less than the number of tokens
842 added because it can take a few tokens of lookahead to determine whether the
843 macro invocation is finished. The caller should call
844 macro_call_get_expansion() to obtain the expansion. */
846 macro_call_add (struct macro_call *mc, const struct macro_token *mt,
847 const struct msg_location *loc)
855 return mc_add_arg (mc, mt, loc);
858 return mc_enclose (mc, mt, loc);
861 return mc_keyword (mc, mt, loc);
864 return mc_equals (mc, mt, loc);
871 /* Macro expansion. */
873 struct macro_expander
875 /* Always available. */
876 const struct macro_set *macros; /* Macros to expand recursively. */
877 enum segmenter_mode segmenter_mode; /* Mode for tokenization. */
878 int nesting_countdown; /* Remaining nesting levels. */
879 const struct macro_expansion_stack *stack; /* Stack for error reporting. */
880 bool *expand; /* May macro calls be expanded? */
881 struct stringi_map *vars; /* Variables from !do and !let. */
883 /* Only nonnull if inside a !DO loop. */
884 bool *break_; /* Set to true to break out of loop. */
886 /* Only nonnull if expanding a macro (and not, say, a macro argument). */
887 const struct macro *macro;
888 struct macro_tokens **args;
892 macro_expand (const struct macro_token *mts, size_t n_mts,
893 const struct macro_expander *, struct macro_tokens *);
896 expand_macro_function (const struct macro_expander *me,
897 const struct macro_token *input, size_t n_input,
898 struct string *output);
900 /* Returns true if the N tokens within MTS start with !*, false otherwise. */
902 is_bang_star (const struct macro_token *mts, size_t n)
905 && mts[0].token.type == T_MACRO_ID
906 && ss_equals (mts[0].token.string, ss_cstr ("!"))
907 && mts[1].token.type == T_ASTERISK);
910 /* Parses one function argument from the N_INPUT tokens in INPUT
911 Each argument to a macro function is one of:
913 - A quoted string or other single literal token.
915 - An argument to the macro being expanded, e.g. !1 or a named argument.
919 - A function invocation.
921 Each function invocation yields a character sequence to be turned into a
922 sequence of tokens. The case where that character sequence is a single
923 quoted string is an important special case.
926 parse_function_arg (const struct macro_expander *me,
927 const struct macro_token *input, size_t n_input,
930 assert (n_input > 0);
932 const struct token *token = &input[0].token;
933 if (token->type == T_MACRO_ID && me->macro)
935 const struct macro_param *param = macro_find_parameter_by_name (
936 me->macro, token->string);
939 size_t param_idx = param - me->macro->params;
940 macro_tokens_to_syntax (me->args[param_idx], farg, NULL, NULL);
944 if (is_bang_star (input, n_input))
946 for (size_t i = 0; i < me->macro->n_params; i++)
948 if (!me->macro->params[i].positional)
951 ds_put_byte (farg, ' ');
952 macro_tokens_to_syntax (me->args[i], farg, NULL, NULL);
957 const char *var = stringi_map_find__ (me->vars,
958 token->string.string,
959 token->string.length);
962 ds_put_cstr (farg, var);
966 size_t n_function = expand_macro_function (me, input, n_input, farg);
971 ds_put_substring (farg, input[0].syntax);
976 parse_function_args (const struct macro_expander *me,
977 const struct macro_token *mts, size_t n,
978 const char *function,
979 struct string_array *args)
981 if (n < 2 || mts[1].token.type != T_LPAREN)
983 macro_error (me->stack, n > 1 ? &mts[1] : NULL,
984 _("`(' expected following %s."), function);
988 for (size_t i = 2; i < n; )
990 if (mts[i].token.type == T_RPAREN)
993 struct string s = DS_EMPTY_INITIALIZER;
994 i += parse_function_arg (me, mts + i, n - i, &s);
995 string_array_append_nocopy (args, ds_steal_cstr (&s));
999 else if (mts[i].token.type == T_COMMA)
1001 else if (mts[i].token.type != T_RPAREN)
1003 macro_error (me->stack, &mts[i],
1004 _("`,' or `)' expected in call to macro function %s."),
1010 macro_error (me->stack, NULL, _("Missing `)' in call to macro function %s."),
1016 unquote_string (const char *s, enum segmenter_mode segmenter_mode,
1017 struct string *content)
1019 struct string_lexer slex;
1020 string_lexer_init (&slex, s, strlen (s), segmenter_mode, true);
1022 struct token token1;
1023 if (string_lexer_next (&slex, &token1) != SLR_TOKEN
1024 || token1.type != T_STRING)
1026 token_uninit (&token1);
1030 struct token token2;
1031 if (string_lexer_next (&slex, &token2) != SLR_END)
1033 token_uninit (&token1);
1034 token_uninit (&token2);
1038 ds_put_substring (content, token1.string);
1039 token_uninit (&token1);
1044 unquote_string_in_place (const char *s, enum segmenter_mode segmenter_mode,
1047 ds_init_empty (tmp);
1048 return unquote_string (s, segmenter_mode, tmp) ? ds_cstr (tmp) : s;
1052 parse_integer (const char *s, int *np)
1057 long int n = strtol (s, &tail, 10);
1058 *np = n < INT_MIN ? INT_MIN : n > INT_MAX ? INT_MAX : n;
1059 tail += strspn (tail, CC_SPACES);
1060 return *tail == '\0' && errno != ERANGE && n == *np;
1064 expand_macro_function (const struct macro_expander *me,
1065 const struct macro_token *input, size_t n_input,
1066 struct string *output)
1068 if (!n_input || input[0].token.type != T_MACRO_ID)
1071 struct macro_function
1077 enum macro_function_id
1092 static const struct macro_function mfs[] = {
1093 [MF_BLANKS] = { "!BLANKS", 1, 1 },
1094 [MF_CONCAT] = { "!CONCAT", 1, INT_MAX },
1095 [MF_EVAL] = { "!EVAL", 1, 1 },
1096 [MF_HEAD] = { "!HEAD", 1, 1 },
1097 [MF_INDEX] = { "!INDEX", 2, 2 },
1098 [MF_LENGTH] = { "!LENGTH", 1, 1 },
1099 [MF_NULL] = { "!NULL", 0, 0 },
1100 [MF_QUOTE] = { "!QUOTE", 1, 1 },
1101 [MF_SUBSTR] = { "!SUBSTR", 2, 3 },
1102 [MF_TAIL] = { "!TAIL", 1, 1 },
1103 [MF_UNQUOTE] = { "!UNQUOTE", 1, 1 },
1104 [MF_UPCASE] = { "!UPCASE", 1, 1 },
1107 /* Is this a macro function? */
1108 const struct macro_function *mf;
1109 for (mf = mfs; ; mf++)
1111 if (mf >= mfs + sizeof mfs / sizeof *mfs)
1113 /* Not a macro function. */
1117 if (lex_id_match_n (ss_cstr (mf->name), input[0].token.string, 4))
1121 enum macro_function_id id = mf - mfs;
1125 struct string_array args = STRING_ARRAY_INITIALIZER;
1126 size_t n_consumed = parse_function_args (me, input, n_input, mf->name, &args);
1130 if (args.n < mf->min_args || args.n > mf->max_args)
1132 if (mf->min_args == 1 && mf->max_args == 1)
1133 macro_error (me->stack, NULL,
1134 _("Macro function %s takes one argument (not %zu)."),
1136 else if (mf->min_args == 2 && mf->max_args == 2)
1137 macro_error (me->stack, NULL,
1138 _("Macro function %s takes two arguments (not %zu)."),
1140 else if (mf->min_args == 2 && mf->max_args == 3)
1141 macro_error (me->stack, NULL,
1142 _("Macro function %s takes two or three arguments "
1145 else if (mf->min_args == 1 && mf->max_args == INT_MAX)
1146 macro_error (me->stack, NULL,
1147 _("Macro function %s needs at least one argument."),
1157 ds_put_format (output, "%zu", strlen (args.strings[0]));
1163 if (!parse_integer (args.strings[0], &n))
1165 macro_error (me->stack, NULL,
1166 _("Argument to !BLANKS must be non-negative integer "
1167 "(not \"%s\")."), args.strings[0]);
1168 string_array_destroy (&args);
1172 ds_put_byte_multiple (output, ' ', n);
1177 for (size_t i = 0; i < args.n; i++)
1178 if (!unquote_string (args.strings[i], me->segmenter_mode, output))
1179 ds_put_cstr (output, args.strings[i]);
1185 const char *s = unquote_string_in_place (args.strings[0],
1186 me->segmenter_mode, &tmp);
1188 struct macro_tokens mts = { .n = 0 };
1189 macro_tokens_from_string__ (&mts, ss_cstr (s), me->segmenter_mode,
1192 ds_put_substring (output, mts.mts[0].syntax);
1193 macro_tokens_uninit (&mts);
1200 const char *haystack = args.strings[0];
1201 const char *needle = strstr (haystack, args.strings[1]);
1202 ds_put_format (output, "%zu", needle ? needle - haystack + 1 : 0);
1207 if (unquote_string (args.strings[0], me->segmenter_mode, NULL))
1208 ds_put_cstr (output, args.strings[0]);
1211 ds_extend (output, strlen (args.strings[0]) + 2);
1212 ds_put_byte (output, '\'');
1213 for (const char *p = args.strings[0]; *p; p++)
1216 ds_put_byte (output, '\'');
1217 ds_put_byte (output, *p);
1219 ds_put_byte (output, '\'');
1226 if (!parse_integer (args.strings[1], &start) || start < 1)
1228 macro_error (me->stack, NULL,
1229 _("Second argument of !SUBSTR must be "
1230 "positive integer (not \"%s\")."),
1232 string_array_destroy (&args);
1236 int count = INT_MAX;
1237 if (args.n > 2 && (!parse_integer (args.strings[2], &count) || count < 0))
1239 macro_error (me->stack, NULL,
1240 _("Third argument of !SUBSTR must be "
1241 "non-negative integer (not \"%s\")."),
1243 string_array_destroy (&args);
1247 struct substring s = ss_cstr (args.strings[0]);
1248 ds_put_substring (output, ss_substr (s, start - 1, count));
1255 const char *s = unquote_string_in_place (args.strings[0],
1256 me->segmenter_mode, &tmp);
1258 struct macro_tokens mts = { .n = 0 };
1259 macro_tokens_from_string__ (&mts, ss_cstr (s), me->segmenter_mode,
1263 struct macro_tokens tail = { .mts = mts.mts + 1, .n = mts.n - 1 };
1264 macro_tokens_to_syntax (&tail, output, NULL, NULL);
1266 macro_tokens_uninit (&mts);
1272 if (!unquote_string (args.strings[0], me->segmenter_mode, output))
1273 ds_put_cstr (output, args.strings[0]);
1279 const char *s = unquote_string_in_place (args.strings[0],
1280 me->segmenter_mode, &tmp);
1281 char *upper = utf8_to_upper (s);
1282 ds_put_cstr (output, upper);
1290 struct macro_tokens mts = { .n = 0 };
1291 macro_tokens_from_string__ (&mts, ss_cstr (args.strings[0]),
1292 me->segmenter_mode, me->stack);
1293 struct macro_tokens exp = { .n = 0 };
1294 struct macro_expansion_stack stack = {
1298 struct macro_expander subme = *me;
1299 subme.break_ = NULL;
1300 subme.stack = &stack;
1302 macro_expand (mts.mts, mts.n, &subme, &exp);
1303 macro_tokens_to_syntax (&exp, output, NULL, NULL);
1304 macro_tokens_uninit (&exp);
1305 macro_tokens_uninit (&mts);
1313 string_array_destroy (&args);
1317 static char *macro_evaluate_or (const struct macro_expander *me,
1318 const struct macro_token **tokens,
1319 const struct macro_token *end);
1322 macro_evaluate_literal (const struct macro_expander *me,
1323 const struct macro_token **tokens,
1324 const struct macro_token *end)
1326 const struct macro_token *p = *tokens;
1329 if (p->token.type == T_LPAREN)
1332 char *value = macro_evaluate_or (me, &p, end);
1335 if (p >= end || p->token.type != T_RPAREN)
1338 macro_error (me->stack, p < end ? p : NULL,
1339 _("Expecting ')' in macro expression."));
1346 else if (p->token.type == T_RPAREN)
1348 macro_error (me->stack, p, _("Expecting literal or function invocation "
1349 "in macro expression."));
1353 struct string function_output = DS_EMPTY_INITIALIZER;
1354 size_t function_consumed = parse_function_arg (me, p, end - p,
1356 struct string unquoted = DS_EMPTY_INITIALIZER;
1357 if (unquote_string (ds_cstr (&function_output), me->segmenter_mode,
1360 ds_swap (&function_output, &unquoted);
1361 ds_destroy (&unquoted);
1363 *tokens = p + function_consumed;
1364 return ds_steal_cstr (&function_output);
1367 /* Returns true if MT is valid as a macro operator. Only operators written as
1368 symbols (e.g. <>) are usable in macro expressions, not operator written as
1369 letters (e.g. EQ). */
1371 is_macro_operator (const struct macro_token *mt)
1373 return mt->syntax.length > 0 && !c_isalpha (mt->syntax.string[0]);
1376 static enum token_type
1377 parse_relational_op (const struct macro_token *mt)
1379 switch (mt->token.type)
1389 return is_macro_operator (mt) ? mt->token.type : T_STOP;
1392 return (ss_equals_case (mt->token.string, ss_cstr ("!EQ")) ? T_EQ
1393 : ss_equals_case (mt->token.string, ss_cstr ("!NE")) ? T_NE
1394 : ss_equals_case (mt->token.string, ss_cstr ("!LT")) ? T_LT
1395 : ss_equals_case (mt->token.string, ss_cstr ("!GT")) ? T_GT
1396 : ss_equals_case (mt->token.string, ss_cstr ("!LE")) ? T_LE
1397 : ss_equals_case (mt->token.string, ss_cstr ("!GE")) ? T_GE
1406 macro_evaluate_relational (const struct macro_expander *me,
1407 const struct macro_token **tokens,
1408 const struct macro_token *end)
1410 const struct macro_token *p = *tokens;
1411 char *lhs = macro_evaluate_literal (me, &p, end);
1415 enum token_type op = p >= end ? T_STOP : parse_relational_op (p);
1423 char *rhs = macro_evaluate_literal (me, &p, end);
1430 struct string lhs_tmp, rhs_tmp;
1431 int cmp = strcmp (unquote_string_in_place (lhs, me->segmenter_mode,
1433 unquote_string_in_place (rhs, me->segmenter_mode,
1435 ds_destroy (&lhs_tmp);
1436 ds_destroy (&rhs_tmp);
1441 bool b = (op == T_EQUALS || op == T_EQ ? !cmp
1443 : op == T_LT ? cmp < 0
1444 : op == T_GT ? cmp > 0
1445 : op == T_LE ? cmp <= 0
1446 : /* T_GE */ cmp >= 0);
1449 return xstrdup (b ? "1" : "0");
1453 macro_evaluate_not (const struct macro_expander *me,
1454 const struct macro_token **tokens,
1455 const struct macro_token *end)
1457 const struct macro_token *p = *tokens;
1459 unsigned int negations = 0;
1461 && (ss_equals_case (p->syntax, ss_cstr ("!NOT"))
1462 || ss_equals (p->syntax, ss_cstr ("~"))))
1468 char *operand = macro_evaluate_relational (me, &p, end);
1469 if (!operand || !negations)
1475 bool b = strcmp (operand, "0") ^ (negations & 1);
1478 return xstrdup (b ? "1" : "0");
1482 macro_evaluate_and (const struct macro_expander *me,
1483 const struct macro_token **tokens,
1484 const struct macro_token *end)
1486 const struct macro_token *p = *tokens;
1487 char *lhs = macro_evaluate_not (me, &p, end);
1492 && (ss_equals_case (p->syntax, ss_cstr ("!AND"))
1493 || ss_equals (p->syntax, ss_cstr ("&"))))
1496 char *rhs = macro_evaluate_not (me, &p, end);
1503 bool b = strcmp (lhs, "0") && strcmp (rhs, "0");
1506 lhs = xstrdup (b ? "1" : "0");
1513 macro_evaluate_or (const struct macro_expander *me,
1514 const struct macro_token **tokens,
1515 const struct macro_token *end)
1517 const struct macro_token *p = *tokens;
1518 char *lhs = macro_evaluate_and (me, &p, end);
1523 && (ss_equals_case (p->syntax, ss_cstr ("!OR"))
1524 || ss_equals (p->syntax, ss_cstr ("|"))))
1527 char *rhs = macro_evaluate_and (me, &p, end);
1534 bool b = strcmp (lhs, "0") || strcmp (rhs, "0");
1537 lhs = xstrdup (b ? "1" : "0");
1544 macro_evaluate_expression (const struct macro_token **tokens, size_t n_tokens,
1545 const struct macro_expander *me)
1547 return macro_evaluate_or (me, tokens, *tokens + n_tokens);
1551 macro_evaluate_number (const struct macro_token **tokens, size_t n_tokens,
1552 const struct macro_expander *me,
1555 char *s = macro_evaluate_expression (tokens, n_tokens, me);
1559 struct macro_tokens mts = { .n = 0 };
1560 macro_tokens_from_string__ (&mts, ss_cstr (s), me->segmenter_mode, me->stack);
1561 if (mts.n != 1 || !token_is_number (&mts.mts[0].token))
1563 macro_error (me->stack, mts.n > 0 ? &mts.mts[0] : NULL,
1564 _("Macro expression must evaluate to "
1565 "a number (not \"%s\")."), s);
1567 macro_tokens_uninit (&mts);
1571 *number = token_number (&mts.mts[0].token);
1573 macro_tokens_uninit (&mts);
1577 static const struct macro_token *
1578 find_ifend_clause (const struct macro_token *p, const struct macro_token *end)
1581 for (; p < end; p++)
1583 if (p->token.type != T_MACRO_ID)
1586 if (ss_equals_case (p->token.string, ss_cstr ("!IF")))
1588 else if (lex_id_match_n (p->token.string, ss_cstr ("!IFEND"), 4))
1594 else if (lex_id_match_n (p->token.string, ss_cstr ("!ELSE"), 4)
1602 macro_expand_if (const struct macro_token *tokens, size_t n_tokens,
1603 const struct macro_expander *me,
1604 struct macro_tokens *exp)
1606 const struct macro_token *p = tokens;
1607 const struct macro_token *end = tokens + n_tokens;
1609 if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!IF")))
1613 char *result = macro_evaluate_expression (&p, end - p, me);
1616 bool b = strcmp (result, "0");
1620 || p->token.type != T_MACRO_ID
1621 || !lex_id_match_n (p->token.string, ss_cstr ("!THEN"), 4))
1623 macro_error (me->stack, p < end ? p : NULL,
1624 _("!THEN expected in macro !IF construct."));
1628 const struct macro_token *start_then = p + 1;
1629 const struct macro_token *end_then = find_ifend_clause (start_then, end);
1632 macro_error (me->stack, NULL,
1633 _("!ELSE or !IFEND expected in macro !IF construct."));
1637 const struct macro_token *start_else, *end_if;
1638 if (lex_id_match_n (end_then->token.string, ss_cstr ("!ELSE"), 4))
1640 start_else = end_then + 1;
1641 end_if = find_ifend_clause (start_else, end);
1643 || !lex_id_match_n (end_if->token.string, ss_cstr ("!IFEND"), 4))
1645 macro_error (me->stack, end_if ? end_if : NULL,
1646 _("!IFEND expected in macro !IF construct."));
1656 const struct macro_token *start;
1661 n = end_then - start_then;
1663 else if (start_else)
1666 n = end_if - start_else;
1676 struct macro_expansion_stack stack = {
1680 struct macro_expander subme = *me;
1681 subme.stack = &stack;
1682 macro_expand (start, n, &subme, exp);
1684 return (end_if + 1) - tokens;
1688 macro_parse_let (const struct macro_token *tokens, size_t n_tokens,
1689 const struct macro_expander *me)
1691 const struct macro_token *p = tokens;
1692 const struct macro_token *end = tokens + n_tokens;
1694 if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!LET")))
1698 if (p >= end || p->token.type != T_MACRO_ID)
1700 macro_error (me->stack, p < end ? p : NULL,
1701 _("Expected macro variable name following !LET."));
1704 const struct substring var_name = p->token.string;
1705 if (is_macro_keyword (var_name)
1706 || macro_find_parameter_by_name (me->macro, var_name))
1708 macro_error (me->stack, p < end ? p : NULL,
1709 _("Cannot use argument name or macro keyword "
1710 "\"%.*s\" as !LET variable."),
1711 (int) var_name.length, var_name.string);
1716 if (p >= end || p->token.type != T_EQUALS)
1718 macro_error (me->stack, p < end ? p : NULL,
1719 _("Expected `=' following !LET."));
1724 char *value = macro_evaluate_expression (&p, end - p, me);
1728 stringi_map_replace_nocopy (me->vars, ss_xstrdup (var_name), value);
1732 static const struct macro_token *
1733 find_doend (const struct macro_expansion_stack *stack,
1734 const struct macro_token *p, const struct macro_token *end)
1737 for (; p < end; p++)
1739 if (p->token.type != T_MACRO_ID)
1742 if (ss_equals_case (p->token.string, ss_cstr ("!DO")))
1744 else if (lex_id_match_n (p->token.string, ss_cstr ("!DOEND"), 4))
1751 macro_error (stack, NULL, _("Missing !DOEND."));
1756 macro_expand_do (const struct macro_token *tokens, size_t n_tokens,
1757 const struct macro_expander *me,
1758 struct macro_tokens *exp)
1760 const struct macro_token *p = tokens;
1761 const struct macro_token *end = tokens + n_tokens;
1763 if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!DO")))
1767 if (p >= end || p->token.type != T_MACRO_ID)
1769 macro_error (me->stack, p < end ? p : NULL,
1770 _("Expected macro variable name following !DO."));
1773 const struct substring var_name = p->token.string;
1774 if (is_macro_keyword (var_name)
1775 || macro_find_parameter_by_name (me->macro, var_name))
1777 macro_error (me->stack, p, _("Cannot use argument name or macro "
1778 "keyword as !DO variable."));
1783 struct macro_expansion_stack substack = {
1787 bool break_ = false;
1788 struct macro_expander subme = *me;
1789 subme.break_ = &break_;
1790 subme.stack = &substack;
1792 int miterate = settings_get_miterate ();
1793 if (p < end && p->token.type == T_MACRO_ID
1794 && ss_equals_case (p->token.string, ss_cstr ("!IN")))
1797 char *list = macro_evaluate_expression (&p, end - p, &subme);
1801 struct macro_tokens items = { .n = 0 };
1802 macro_tokens_from_string__ (&items, ss_cstr (list), me->segmenter_mode,
1806 const struct macro_token *do_end = find_doend (subme.stack, p, end);
1809 macro_tokens_uninit (&items);
1813 for (size_t i = 0; i < items.n && !break_; i++)
1817 macro_error (&substack, NULL,
1818 _("!DO loop over list exceeded "
1819 "maximum number of iterations %d. "
1820 "(Use SET MITERATE to change the limit.)"),
1824 stringi_map_replace_nocopy (me->vars, ss_xstrdup (var_name),
1825 ss_xstrdup (items.mts[i].syntax));
1827 macro_expand (p, do_end - p, &subme, exp);
1829 return do_end - tokens + 1;
1831 else if (p < end && p->token.type == T_EQUALS)
1835 if (!macro_evaluate_number (&p, end - p, &subme, &first))
1838 if (p >= end || p->token.type != T_MACRO_ID
1839 || !ss_equals_case (p->token.string, ss_cstr ("!TO")))
1841 macro_error (subme.stack, p < end ? p : NULL,
1842 _("Expected !TO in numerical !DO loop."));
1848 if (!macro_evaluate_number (&p, end - p, &subme, &last))
1852 if (p < end && p->token.type == T_MACRO_ID
1853 && ss_equals_case (p->token.string, ss_cstr ("!BY")))
1856 if (!macro_evaluate_number (&p, end - p, &subme, &by))
1861 macro_error (subme.stack, NULL, _("!BY value cannot be zero."));
1866 const struct macro_token *do_end = find_doend (subme.stack, p, end);
1869 if ((by > 0 && first <= last) || (by < 0 && first >= last))
1872 for (double index = first;
1873 by > 0 ? (index <= last) : (index >= last) && !break_;
1878 macro_error (subme.stack, NULL,
1879 _("Numerical !DO loop exceeded "
1880 "maximum number of iterations %d. "
1881 "(Use SET MITERATE to change the limit.)"),
1886 char index_s[DBL_BUFSIZE_BOUND];
1887 c_dtoastr (index_s, sizeof index_s, 0, 0, index);
1888 stringi_map_replace_nocopy (me->vars, ss_xstrdup (var_name),
1891 macro_expand (p, do_end - p, &subme, exp);
1895 return do_end - tokens + 1;
1899 macro_error (me->stack, p < end ? p : NULL,
1900 _("Expected `=' or !IN in !DO loop."));
1906 macro_expand_arg (const struct macro_expander *me, size_t idx,
1907 struct macro_tokens *exp)
1909 const struct macro_param *param = &me->macro->params[idx];
1910 const struct macro_tokens *arg = me->args[idx];
1912 if (*me->expand && param->expand_arg)
1914 struct stringi_map vars = STRINGI_MAP_INITIALIZER (vars);
1915 struct macro_expansion_stack stack = {
1916 .name = param->name,
1919 struct macro_expander subme = {
1920 .macros = me->macros,
1923 .segmenter_mode = me->segmenter_mode,
1924 .expand = me->expand,
1927 .nesting_countdown = me->nesting_countdown,
1930 macro_expand (arg->mts, arg->n, &subme, exp);
1931 stringi_map_destroy (&vars);
1934 for (size_t i = 0; i < arg->n; i++)
1935 macro_tokens_add (exp, &arg->mts[i]);
1939 macro_expand__ (const struct macro_token *mts, size_t n,
1940 const struct macro_expander *me,
1941 struct macro_tokens *exp)
1943 const struct token *token = &mts[0].token;
1945 /* Recursive macro calls. */
1948 struct macro_call *submc;
1949 int n_call = macro_call_create__ (me->macros, me->stack, token, &submc);
1950 for (size_t j = 1; !n_call; j++)
1952 const struct macro_token endcmd
1953 = { .token = { .type = T_ENDCMD } };
1954 n_call = macro_call_add (submc, j < n ? &mts[j] : &endcmd, NULL);
1958 struct stringi_map vars = STRINGI_MAP_INITIALIZER (vars);
1959 struct macro_expansion_stack stack = {
1960 .name = submc->macro->name,
1961 .location = submc->macro->location,
1964 struct macro_expander subme = {
1965 .macros = submc->macros,
1966 .macro = submc->macro,
1967 .args = submc->args,
1968 .segmenter_mode = me->segmenter_mode,
1969 .expand = me->expand,
1972 .nesting_countdown = me->nesting_countdown - 1,
1975 const struct macro_tokens *body = &submc->macro->body;
1976 macro_expand (body->mts, body->n, &subme, exp);
1977 macro_call_destroy (submc);
1978 stringi_map_destroy (&vars);
1982 macro_call_destroy (submc);
1985 if (token->type != T_MACRO_ID)
1987 macro_tokens_add (exp, &mts[0]);
1994 const struct macro_param *param = macro_find_parameter_by_name (
1995 me->macro, token->string);
1998 macro_expand_arg (me, param - me->macro->params, exp);
2001 else if (is_bang_star (mts, n))
2003 for (size_t j = 0; j < me->macro->n_params; j++)
2004 macro_expand_arg (me, j, exp);
2009 /* Variables set by !DO or !LET. */
2010 const char *var = stringi_map_find__ (me->vars, token->string.string,
2011 token->string.length);
2014 macro_tokens_from_string__ (exp, ss_cstr (var),
2015 me->segmenter_mode, me->stack);
2019 /* Macro functions. */
2020 struct string function_output = DS_EMPTY_INITIALIZER;
2021 size_t n_function = expand_macro_function (me, mts, n, &function_output);
2024 macro_tokens_from_string__ (exp, function_output.ss,
2025 me->segmenter_mode, me->stack);
2026 ds_destroy (&function_output);
2031 size_t n_if = macro_expand_if (mts, n, me, exp);
2035 size_t n_let = macro_parse_let (mts, n, me);
2039 size_t n_do = macro_expand_do (mts, n, me, exp);
2043 if (lex_id_match_n (token->string, ss_cstr ("!break"), 4))
2048 macro_error (me->stack, &mts[0], _("!BREAK outside !DO."));
2050 else if (lex_id_match_n (token->string, ss_cstr ("!onexpand"), 4))
2052 else if (lex_id_match_n (token->string, ss_cstr ("!offexpand"), 4))
2053 *me->expand = false;
2055 macro_tokens_add (exp, &mts[0]);
2060 macro_expand (const struct macro_token *mts, size_t n,
2061 const struct macro_expander *me,
2062 struct macro_tokens *exp)
2064 if (me->nesting_countdown <= 0)
2066 macro_error (me->stack, NULL, _("Maximum nesting level %d exceeded. "
2067 "(Use SET MNEST to change the limit.)"),
2068 settings_get_mnest ());
2069 for (size_t i = 0; i < n; i++)
2070 macro_tokens_add (exp, &mts[i]);
2074 for (size_t i = 0; i < n; )
2076 if (me->break_ && *me->break_)
2079 size_t consumed = macro_expand__ (&mts[i], n - i, me, exp);
2080 assert (consumed > 0 && i + consumed <= n);
2086 macro_call_expand (struct macro_call *mc, enum segmenter_mode segmenter_mode,
2087 const struct msg_location *call_loc,
2088 struct macro_tokens *exp)
2090 assert (mc->state == MC_FINISHED);
2093 struct stringi_map vars = STRINGI_MAP_INITIALIZER (vars);
2094 struct macro_expansion_stack stack0 = {
2095 .location = call_loc,
2097 struct macro_expansion_stack stack1 = {
2099 .name = mc->macro->name,
2100 .location = mc->macro->location,
2102 struct macro_expander me = {
2103 .macros = mc->macros,
2106 .segmenter_mode = segmenter_mode,
2110 .nesting_countdown = settings_get_mnest (),
2114 const struct macro_tokens *body = &mc->macro->body;
2115 macro_expand (body->mts, body->n, &me, exp);
2117 stringi_map_destroy (&vars);