1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/lexer/macro.h"
25 #include "data/settings.h"
26 #include "language/lexer/lexer.h"
27 #include "language/lexer/segment.h"
28 #include "language/lexer/scan.h"
29 #include "libpspp/assertion.h"
30 #include "libpspp/cast.h"
31 #include "libpspp/i18n.h"
32 #include "libpspp/message.h"
33 #include "libpspp/str.h"
34 #include "libpspp/string-array.h"
35 #include "libpspp/stringi-map.h"
36 #include "libpspp/stringi-set.h"
38 #include "gl/c-ctype.h"
39 #include "gl/ftoastr.h"
42 #define _(msgid) gettext (msgid)
44 /* An entry in the stack of macros and macro directives being expanded. The
45 stack is maintained as a linked list. Entries are not dynamically allocated
46 but on the program stack. */
47 struct macro_expansion_stack
49 /* Points to an outer stack entry, or NULL if this is the outermost. */
50 const struct macro_expansion_stack *next;
52 /* A macro name or !IF, !DO, etc. */
55 /* Location of the macro definition, if available. */
56 const char *file_name;
61 /* Reports an error during macro expansion. STACK is the stack for reporting
62 the location of the error, MT is the optional token at which the error was
63 detected, and FORMAT along with the varargs is the message to report. */
64 static void PRINTF_FORMAT (3, 4)
65 macro_error (const struct macro_expansion_stack *stack,
66 const struct macro_token *mt,
67 const char *format, ...)
69 struct msg_stack **ms = NULL;
70 size_t allocated_ms = 0;
73 for (const struct macro_expansion_stack *p = stack; p; p = p->next)
75 if (n_ms >= allocated_ms)
76 ms = x2nrealloc (ms, &allocated_ms, sizeof *ms);
78 /* TRANSLATORS: These strings are used for explaining the context of an
79 error. The "While expanding" message appears first, followed by zero
80 or more of the "inside expansion" messages. `innermost',
81 `next_inner`, etc., are names of macros, and `foobar' is a piece of
84 foo.sps:12: At `foobar' in the expansion of 'innermost',
85 foo.sps:23: inside the expansion of 'next_inner',
86 foo.sps:34: inside the expansion of 'next_inner2',
87 foo.sps:45: inside the expansion of 'outermost',
88 foo.sps:76: This is the actual error message. */
92 if (mt && mt->representation.length)
95 str_ellipsize (mt->representation, syntax, sizeof syntax);
96 description = xasprintf (_("At `%s' in the expansion of `%s',"),
100 description = xasprintf (_("In the expansion of `%s',"), p->name);
103 description = xasprintf (_("inside the expansion of `%s',"), p->name);
105 ms[n_ms] = xmalloc (sizeof *ms[n_ms]);
106 *ms[n_ms] = (struct msg_stack) {
108 .file_name = xstrdup_if_nonnull (p->file_name),
109 .first_line = p->first_line,
110 .last_line = p->last_line,
112 .description = description,
118 va_start (args, format);
119 char *s = xvasprintf (format, args);
122 struct msg *m = xmalloc (sizeof *m);
124 .category = MSG_C_SYNTAX,
125 .severity = MSG_S_ERROR,
134 macro_token_copy (struct macro_token *dst, const struct macro_token *src)
136 token_copy (&dst->token, &src->token);
137 ss_alloc_substring (&dst->representation, src->representation);
141 macro_token_uninit (struct macro_token *mt)
143 token_uninit (&mt->token);
144 ss_dealloc (&mt->representation);
148 macro_token_to_representation (struct macro_token *mt, struct string *s)
150 ds_put_substring (s, mt->representation);
153 is_macro_keyword (struct substring s)
155 static struct stringi_set keywords = STRINGI_SET_INITIALIZER (keywords);
156 if (stringi_set_is_empty (&keywords))
158 static const char *kws[] = {
179 for (size_t i = 0; i < sizeof kws / sizeof *kws; i++)
180 stringi_set_insert (&keywords, kws[i]);
183 ss_ltrim (&s, ss_cstr ("!"));
184 return stringi_set_contains_len (&keywords, s.string, s.length);
188 macro_tokens_copy (struct macro_tokens *dst, const struct macro_tokens *src)
190 *dst = (struct macro_tokens) {
191 .mts = xmalloc (src->n * sizeof *dst->mts),
195 for (size_t i = 0; i < src->n; i++)
196 macro_token_copy (&dst->mts[i], &src->mts[i]);
200 macro_tokens_uninit (struct macro_tokens *mts)
202 for (size_t i = 0; i < mts->n; i++)
203 macro_token_uninit (&mts->mts[i]);
208 macro_tokens_add_uninit (struct macro_tokens *mts)
210 if (mts->n >= mts->allocated)
211 mts->mts = x2nrealloc (mts->mts, &mts->allocated, sizeof *mts->mts);
212 return &mts->mts[mts->n++];
216 macro_tokens_add (struct macro_tokens *mts, const struct macro_token *mt)
218 macro_token_copy (macro_tokens_add_uninit (mts), mt);
221 /* Tokenizes SRC according to MODE and appends the tokens to MTS. Uses STACK,
222 if nonull, for error reporting. */
224 macro_tokens_from_string__ (struct macro_tokens *mts, const struct substring src,
225 enum segmenter_mode mode,
226 const struct macro_expansion_stack *stack)
230 struct segmenter segmenter;
231 struct substring body;
234 struct state state = {
235 .segmenter = segmenter_init (mode, true),
238 struct state saved = state;
240 while (state.body.length > 0)
242 struct macro_token mt = {
243 .token = { .type = T_STOP },
244 .representation = { .string = state.body.string },
246 struct token *token = &mt.token;
248 struct scanner scanner;
249 scanner_init (&scanner, token);
253 enum segment_type type;
254 int seg_len = segmenter_push (&state.segmenter, state.body.string,
255 state.body.length, true, &type);
256 assert (seg_len >= 0);
258 struct substring segment = ss_head (state.body, seg_len);
259 ss_advance (&state.body, seg_len);
261 enum scan_result result = scanner_push (&scanner, type, segment, token);
262 if (result == SCAN_SAVE)
264 else if (result == SCAN_BACK)
269 else if (result == SCAN_DONE)
273 /* We have a token in 'token'. */
274 mt.representation.length = state.body.string - mt.representation.string;
275 if (is_scan_type (token->type))
277 if (token->type != SCAN_SKIP)
279 char *s = scan_token_to_error (token);
282 mt.token.type = T_STRING;
283 macro_error (stack, &mt, "%s", s);
291 macro_tokens_add (mts, &mt);
292 token_uninit (token);
296 /* Tokenizes SRC according to MODE and appends the tokens to MTS. */
298 macro_tokens_from_string (struct macro_tokens *mts, const struct substring src,
299 enum segmenter_mode mode)
301 macro_tokens_from_string__ (mts, src, mode, NULL);
305 macro_tokens_print (const struct macro_tokens *mts, FILE *stream)
307 for (size_t i = 0; i < mts->n; i++)
308 token_print (&mts->mts[i].token, stream);
313 TC_ENDCMD, /* No space before or after (new-line after). */
314 TC_BINOP, /* Space on both sides. */
315 TC_COMMA, /* Space afterward. */
316 TC_ID, /* Don't need spaces except sequentially. */
317 TC_PUNCT, /* Don't need spaces except sequentially. */
321 needs_space (enum token_class prev, enum token_class next)
323 /* Don't need a space before or after the end of a command.
324 (A new-line is needed afterward as a special case.) */
325 if (prev == TC_ENDCMD || next == TC_ENDCMD)
328 /* Binary operators always have a space on both sides. */
329 if (prev == TC_BINOP || next == TC_BINOP)
332 /* A comma always has a space afterward. */
333 if (prev == TC_COMMA)
336 /* Otherwise, PREV is TC_ID or TC_PUNCT, which only need a space if there are
337 two or them in a row. */
341 static enum token_class
342 classify_token (enum token_type type)
394 /* Appends a syntax representation of the tokens in MTS to S. If OFS and LEN
395 are nonnull, sets OFS[i] to the offset within S of the start of token 'i' in
396 MTS and LEN[i] to its length. OFS[i] + LEN[i] is not necessarily OFS[i + 1]
397 because some tokens are separated by white space. */
399 macro_tokens_to_representation (struct macro_tokens *mts, struct string *s,
400 size_t *ofs, size_t *len)
402 assert ((ofs != NULL) == (len != NULL));
407 for (size_t i = 0; i < mts->n; i++)
411 enum token_type prev = mts->mts[i - 1].token.type;
412 enum token_type next = mts->mts[i].token.type;
414 if (prev == T_ENDCMD)
415 ds_put_byte (s, '\n');
418 enum token_class pc = classify_token (prev);
419 enum token_class nc = classify_token (next);
420 if (needs_space (pc, nc))
421 ds_put_byte (s, ' ');
426 ofs[i] = s->ss.length;
427 macro_token_to_representation (&mts->mts[i], s);
429 len[i] = s->ss.length - ofs[i];
434 macro_destroy (struct macro *m)
441 for (size_t i = 0; i < m->n_params; i++)
443 struct macro_param *p = &m->params[i];
446 macro_tokens_uninit (&p->def);
454 token_uninit (&p->charend);
458 token_uninit (&p->enclose[0]);
459 token_uninit (&p->enclose[1]);
467 macro_tokens_uninit (&m->body);
472 macro_set_create (void)
474 struct macro_set *set = xmalloc (sizeof *set);
475 *set = (struct macro_set) {
476 .macros = HMAP_INITIALIZER (set->macros),
482 macro_set_destroy (struct macro_set *set)
487 struct macro *macro, *next;
488 HMAP_FOR_EACH_SAFE (macro, next, struct macro, hmap_node, &set->macros)
490 hmap_delete (&set->macros, ¯o->hmap_node);
491 macro_destroy (macro);
493 hmap_destroy (&set->macros);
498 hash_macro_name (const char *name)
500 return utf8_hash_case_string (name, 0);
503 static struct macro *
504 macro_set_find__ (struct macro_set *set, const char *name)
506 if (macro_set_is_empty (set))
510 HMAP_FOR_EACH_WITH_HASH (macro, struct macro, hmap_node,
511 hash_macro_name (name), &set->macros)
512 if (!utf8_strcasecmp (macro->name, name))
519 macro_set_find (const struct macro_set *set, const char *name)
521 return macro_set_find__ (CONST_CAST (struct macro_set *, set), name);
524 /* Adds M to SET. M replaces any existing macro with the same name. Takes
527 macro_set_add (struct macro_set *set, struct macro *m)
529 struct macro *victim = macro_set_find__ (set, m->name);
532 hmap_delete (&set->macros, &victim->hmap_node);
533 macro_destroy (victim);
536 hmap_insert (&set->macros, &m->hmap_node, hash_macro_name (m->name));
539 /* Macro call parsing.. */
546 /* Accumulating tokens in mc->params toward the end of any type of
550 /* Expecting the opening delimiter of an ARG_ENCLOSE argument. */
553 /* Expecting a keyword for a keyword argument. */
556 /* Expecting an equal sign for a keyword argument. */
559 /* Macro fully parsed and ready for expansion. */
563 /* Parsing macro calls. This is a FSM driven by macro_call_create() and
564 macro_call_add() to identify the macro being called and obtain its
565 arguments. 'state' identifies the FSM state. */
568 const struct macro_set *macros;
569 const struct macro *macro;
570 struct macro_tokens **args;
574 const struct macro_param *param; /* Parameter currently being parsed. */
577 /* Completes macro expansion by initializing arguments that weren't supplied to
580 mc_finished (struct macro_call *mc)
582 mc->state = MC_FINISHED;
583 for (size_t i = 0; i < mc->macro->n_params; i++)
585 mc->args[i] = &mc->macro->params[i].def;
590 mc_next_arg (struct macro_call *mc)
594 assert (!mc->macro->n_params);
595 return mc_finished (mc);
597 else if (mc->param->positional)
600 if (mc->param >= &mc->macro->params[mc->macro->n_params])
601 return mc_finished (mc);
604 mc->state = (!mc->param->positional ? MC_KEYWORD
605 : mc->param->arg_type == ARG_ENCLOSE ? MC_ENCLOSE
612 for (size_t i = 0; i < mc->macro->n_params; i++)
615 mc->state = MC_KEYWORD;
618 return mc_finished (mc);
623 mc_error (struct macro_call *mc)
625 mc->state = MC_ERROR;
630 mc_add_arg (struct macro_call *mc, const struct macro_token *mt)
632 const struct macro_param *p = mc->param;
634 const struct token *token = &mt->token;
635 if ((token->type == T_ENDCMD || token->type == T_STOP)
636 && p->arg_type != ARG_CMDEND)
638 msg (SE, _("Unexpected end of command reading argument %s "
639 "to macro %s."), mc->param->name, mc->macro->name);
641 return mc_error (mc);
646 struct macro_tokens **argp = &mc->args[p - mc->macro->params];
648 *argp = xzalloc (sizeof **argp);
649 struct macro_tokens *arg = *argp;
650 if (p->arg_type == ARG_N_TOKENS)
652 macro_tokens_add (arg, mt);
653 if (arg->n >= p->n_tokens)
654 return mc_next_arg (mc);
657 else if (p->arg_type == ARG_CMDEND)
659 if (token->type == T_ENDCMD || token->type == T_STOP)
660 return mc_next_arg (mc);
661 macro_tokens_add (arg, mt);
666 const struct token *end
667 = p->arg_type == ARG_CHAREND ? &p->charend : &p->enclose[1];
668 if (token_equal (token, end))
669 return mc_next_arg (mc);
670 macro_tokens_add (arg, mt);
676 mc_expected (struct macro_call *mc, const struct macro_token *actual,
677 const struct token *expected)
679 const struct substring actual_s
680 = (actual->representation.length ? actual->representation
681 : ss_cstr (_("<end of input>")));
682 char *expected_s = token_to_string (expected);
683 msg (SE, _("Found `%.*s' while expecting `%s' reading argument %s "
685 (int) actual_s.length, actual_s.string, expected_s,
686 mc->param->name, mc->macro->name);
689 return mc_error (mc);
693 mc_enclose (struct macro_call *mc, const struct macro_token *mt)
695 const struct token *token = &mt->token;
698 if (token_equal (&mc->param->enclose[0], token))
704 return mc_expected (mc, mt, &mc->param->enclose[0]);
707 static const struct macro_param *
708 macro_find_parameter_by_name (const struct macro *m, struct substring name)
710 ss_ltrim (&name, ss_cstr ("!"));
712 for (size_t i = 0; i < m->n_params; i++)
714 const struct macro_param *p = &m->params[i];
715 struct substring p_name = ss_cstr (p->name + 1);
716 if (!utf8_strncasecmp (p_name.string, p_name.length,
717 name.string, name.length))
724 mc_keyword (struct macro_call *mc, const struct macro_token *mt)
726 const struct token *token = &mt->token;
727 if (token->type != T_ID)
728 return mc_finished (mc);
730 const struct macro_param *p = macro_find_parameter_by_name (mc->macro,
734 size_t arg_index = p - mc->macro->params;
736 if (mc->args[arg_index])
739 _("Argument %s multiply specified in call to macro %s."),
740 p->name, mc->macro->name);
741 return mc_error (mc);
745 mc->state = MC_EQUALS;
749 return mc_finished (mc);
753 mc_equals (struct macro_call *mc, const struct macro_token *mt)
755 const struct token *token = &mt->token;
758 if (token->type == T_EQUALS)
764 return mc_expected (mc, mt, &(struct token) { .type = T_EQUALS });
767 /* If TOKEN is the first token of a call to a macro in MACROS, create a new
768 macro expander, initializes *MCP to it. Returns 0 if more tokens are needed
769 and should be added via macro_call_add() or 1 if the caller should next call
770 macro_call_get_expansion().
772 If TOKEN is not the first token of a macro call, returns -1 and sets *MCP to
775 macro_call_create (const struct macro_set *macros,
776 const struct token *token,
777 struct macro_call **mcp)
779 const struct macro *macro = (token->type == T_ID || token->type == T_MACRO_ID
780 ? macro_set_find (macros, token->string.string)
788 struct macro_call *mc = xmalloc (sizeof *mc);
789 *mc = (struct macro_call) {
793 .state = (!macro->n_params ? MC_FINISHED
794 : !macro->params[0].positional ? MC_KEYWORD
795 : macro->params[0].arg_type == ARG_ENCLOSE ? MC_ENCLOSE
797 .args = macro->n_params ? xcalloc (macro->n_params, sizeof *mc->args) : NULL,
798 .param = macro->params,
802 return mc->state == MC_FINISHED ? 1 : 0;
806 macro_call_destroy (struct macro_call *mc)
811 for (size_t i = 0; i < mc->macro->n_params; i++)
813 struct macro_tokens *a = mc->args[i];
814 if (a && a != &mc->macro->params[i].def)
816 macro_tokens_uninit (a);
824 /* Adds TOKEN to the collection of tokens in MC that potentially need to be
827 Returns -1 if the tokens added do not actually invoke a macro. The caller
828 should consume the first token without expanding it. (Later tokens might
829 invoke a macro so it's best to feed the second token into a new expander.)
831 Returns 0 if the macro expander needs more tokens, for macro arguments or to
832 decide whether this is actually a macro invocation. The caller should call
833 macro_call_add() again with the next token.
835 Returns a positive number to indicate that the returned number of tokens
836 invoke a macro. The number returned might be less than the number of tokens
837 added because it can take a few tokens of lookahead to determine whether the
838 macro invocation is finished. The caller should call
839 macro_call_get_expansion() to obtain the expansion. */
841 macro_call_add (struct macro_call *mc, const struct macro_token *mt)
849 return mc_add_arg (mc, mt);
852 return mc_enclose (mc, mt);
855 return mc_keyword (mc, mt);
858 return mc_equals (mc, mt);
865 /* Macro expansion. */
867 struct macro_expander
869 const struct macro_set *macros;
870 const struct macro *macro;
871 struct macro_tokens **args;
872 enum segmenter_mode segmenter_mode;
873 struct stringi_map *vars;
875 int nesting_countdown;
878 /* Each argument to a macro function is one of:
880 - A quoted string or other single literal token.
882 - An argument to the macro being expanded, e.g. !1 or a named argument.
886 - A function invocation.
888 Each function invocation yields a character sequence to be turned into a
889 sequence of tokens. The case where that character sequence is a single
890 quoted string is an important special case.
892 struct parse_macro_function_ctx
894 const struct macro_token *input;
896 const struct macro_expander *me;
897 const struct macro_expansion_stack *stack;
902 macro_expand (const struct macro_tokens *, const struct macro_expander *,
903 const struct macro_expansion_stack *stack,
904 bool *break_, struct macro_tokens *exp);
907 expand_macro_function (struct parse_macro_function_ctx *ctx,
908 struct string *output, size_t *input_consumed);
910 /* Returns true if the pair of tokens starting at offset OFS within MTS are !*,
913 is_bang_star (const struct macro_token *mts, size_t n, size_t ofs)
916 && mts[ofs].token.type == T_MACRO_ID
917 && ss_equals (mts[ofs].token.string, ss_cstr ("!"))
918 && mts[ofs + 1].token.type == T_ASTERISK);
922 parse_function_arg (struct parse_macro_function_ctx *ctx,
923 size_t i, struct string *farg)
925 const struct macro_token *tokens = ctx->input;
926 const struct token *token = &tokens[i].token;
927 if (token->type == T_MACRO_ID && ctx->me->macro)
929 const struct macro_param *param = macro_find_parameter_by_name (
930 ctx->me->macro, token->string);
933 size_t param_idx = param - ctx->me->macro->params;
934 const struct macro_tokens *marg = ctx->me->args[param_idx];
935 for (size_t i = 0; i < marg->n; i++)
938 ds_put_byte (farg, ' ');
939 ds_put_substring (farg, marg->mts[i].representation);
944 if (is_bang_star (ctx->input, ctx->n_input, i))
946 for (size_t i = 0; i < ctx->me->macro->n_params; i++)
948 if (!ctx->me->macro->params[i].positional)
951 const struct macro_tokens *marg = ctx->me->args[i];
952 for (size_t j = 0; j < marg->n; j++)
955 ds_put_byte (farg, ' ');
956 ds_put_substring (farg, marg->mts[j].representation);
962 const char *value = stringi_map_find__ (ctx->me->vars,
963 token->string.string,
964 token->string.length);
967 ds_put_cstr (farg, value);
971 struct parse_macro_function_ctx subctx = {
972 .input = &ctx->input[i],
973 .n_input = ctx->n_input - i,
977 size_t subinput_consumed;
978 if (expand_macro_function (&subctx, farg, &subinput_consumed))
979 return subinput_consumed;
982 ds_put_substring (farg, tokens[i].representation);
987 parse_macro_function (struct parse_macro_function_ctx *ctx,
988 struct string_array *args,
989 struct substring function,
990 int min_args, int max_args,
991 size_t *input_consumed)
993 const struct macro_token *tokens = ctx->input;
994 size_t n_tokens = ctx->n_input;
997 || tokens[0].token.type != T_MACRO_ID
998 || !ss_equals_case (tokens[0].token.string, function)) /* XXX abbrevs allowed */
1001 if (n_tokens < 2 || tokens[1].token.type != T_LPAREN)
1003 macro_error (ctx->stack, n_tokens > 1 ? &tokens[1] : NULL,
1004 _("`(' expected following %s."), function.string);
1008 string_array_init (args);
1010 for (size_t i = 2;; )
1013 goto unexpected_end;
1014 if (tokens[i].token.type == T_RPAREN)
1016 *input_consumed = i + 1;
1017 if (args->n < min_args || args->n > max_args)
1019 macro_error (ctx->stack, &tokens[i],
1020 _("Wrong number of arguments to macro function %s."),
1027 struct string s = DS_EMPTY_INITIALIZER;
1028 i += parse_function_arg (ctx, i, &s);
1032 goto unexpected_end;
1034 string_array_append_nocopy (args, ds_steal_cstr (&s));
1036 if (tokens[i].token.type == T_COMMA)
1038 else if (tokens[i].token.type != T_RPAREN)
1040 macro_error (ctx->stack, &tokens[i],
1041 _("`,' or `)' expected in call to macro function %s."),
1048 macro_error (ctx->stack, NULL, _("Missing `)' in call to macro function %s."),
1052 string_array_destroy (args);
1057 unquote_string (const char *s, enum segmenter_mode segmenter_mode,
1058 struct string *content)
1060 struct string_lexer slex;
1061 string_lexer_init (&slex, s, strlen (s), segmenter_mode, true);
1063 struct token token1;
1064 if (!string_lexer_next (&slex, &token1))
1067 if (token1.type != T_STRING)
1069 token_uninit (&token1);
1073 struct token token2;
1074 if (string_lexer_next (&slex, &token2))
1076 token_uninit (&token1);
1077 token_uninit (&token2);
1081 ds_put_substring (content, token1.string);
1082 token_uninit (&token1);
1087 unquote_string_in_place (const char *s, enum segmenter_mode segmenter_mode,
1090 ds_init_empty (tmp);
1091 return unquote_string (s, segmenter_mode, tmp) ? ds_cstr (tmp) : s;
1095 parse_integer (const char *s, int *np)
1100 long int n = strtol (s, &tail, 10);
1101 *np = n < INT_MIN ? INT_MIN : n > INT_MAX ? INT_MAX : n;
1102 tail += strspn (tail, CC_SPACES);
1103 return *tail == '\0' && errno != ERANGE && n == *np;
1107 expand_macro_function (struct parse_macro_function_ctx *ctx,
1108 struct string *output,
1109 size_t *input_consumed)
1111 struct string_array args;
1113 if (parse_macro_function (ctx, &args, ss_cstr ("!LENGTH"), 1, 1,
1115 ds_put_format (output, "%zu", strlen (args.strings[0]));
1116 else if (parse_macro_function (ctx, &args, ss_cstr ("!BLANKS"), 1, 1,
1120 if (!parse_integer (args.strings[0], &n))
1122 macro_error (ctx->stack, NULL,
1123 _("Argument to !BLANKS must be non-negative integer "
1124 "(not \"%s\")."), args.strings[0]);
1125 string_array_destroy (&args);
1129 ds_put_byte_multiple (output, ' ', n);
1131 else if (parse_macro_function (ctx, &args, ss_cstr ("!CONCAT"), 1, INT_MAX,
1134 for (size_t i = 0; i < args.n; i++)
1135 if (!unquote_string (args.strings[i], ctx->me->segmenter_mode, output))
1136 ds_put_cstr (output, args.strings[i]);
1138 else if (parse_macro_function (ctx, &args, ss_cstr ("!HEAD"), 1, 1,
1142 const char *s = unquote_string_in_place (args.strings[0],
1143 ctx->me->segmenter_mode, &tmp);
1145 struct macro_tokens mts = { .n = 0 };
1146 macro_tokens_from_string__ (&mts, ss_cstr (s), ctx->me->segmenter_mode,
1149 ds_put_substring (output, mts.mts[0].representation);
1150 macro_tokens_uninit (&mts);
1153 else if (parse_macro_function (ctx, &args, ss_cstr ("!INDEX"), 2, 2,
1156 const char *haystack = args.strings[0];
1157 const char *needle = strstr (haystack, args.strings[1]);
1158 ds_put_format (output, "%zu", needle ? needle - haystack + 1 : 0);
1160 else if (parse_macro_function (ctx, &args, ss_cstr ("!QUOTE"), 1, 1,
1163 if (unquote_string (args.strings[0], ctx->me->segmenter_mode, NULL))
1164 ds_put_cstr (output, args.strings[0]);
1167 ds_extend (output, strlen (args.strings[0]) + 2);
1168 ds_put_byte (output, '\'');
1169 for (const char *p = args.strings[0]; *p; p++)
1172 ds_put_byte (output, '\'');
1173 ds_put_byte (output, *p);
1175 ds_put_byte (output, '\'');
1178 else if (parse_macro_function (ctx, &args, ss_cstr ("!SUBSTR"), 2, 3,
1182 if (!parse_integer (args.strings[1], &start) || start < 1)
1184 macro_error (ctx->stack, NULL,
1185 _("Second argument of !SUBSTR must be "
1186 "positive integer (not \"%s\")."),
1188 string_array_destroy (&args);
1192 int count = INT_MAX;
1193 if (args.n > 2 && (!parse_integer (args.strings[2], &count) || count < 0))
1195 macro_error (ctx->stack, NULL,
1196 _("Third argument of !SUBSTR must be "
1197 "non-negative integer (not \"%s\")."),
1199 string_array_destroy (&args);
1203 struct substring s = ss_cstr (args.strings[0]);
1204 ds_put_substring (output, ss_substr (s, start - 1, count));
1206 else if (parse_macro_function (ctx, &args, ss_cstr ("!TAIL"), 1, 1,
1210 const char *s = unquote_string_in_place (args.strings[0],
1211 ctx->me->segmenter_mode, &tmp);
1213 struct macro_tokens mts = { .n = 0 };
1214 macro_tokens_from_string__ (&mts, ss_cstr (s), ctx->me->segmenter_mode,
1218 struct macro_tokens tail = { .mts = mts.mts + 1, .n = mts.n - 1 };
1219 macro_tokens_to_representation (&tail, output, NULL, NULL);
1221 macro_tokens_uninit (&mts);
1224 else if (parse_macro_function (ctx, &args, ss_cstr ("!UNQUOTE"), 1, 1,
1227 if (!unquote_string (args.strings[0], ctx->me->segmenter_mode, output))
1228 ds_put_cstr (output, args.strings[0]);
1230 else if (parse_macro_function (ctx, &args, ss_cstr ("!UPCASE"), 1, 1,
1234 const char *s = unquote_string_in_place (args.strings[0],
1235 ctx->me->segmenter_mode, &tmp);
1236 char *upper = utf8_to_upper (s);
1237 ds_put_cstr (output, upper);
1241 else if (parse_macro_function (ctx, &args, ss_cstr ("!EVAL"), 1, 1,
1244 struct macro_tokens mts = { .n = 0 };
1245 macro_tokens_from_string__ (&mts, ss_cstr (args.strings[0]),
1246 ctx->me->segmenter_mode, ctx->stack);
1247 struct macro_tokens exp = { .n = 0 };
1248 macro_expand (&mts, ctx->me,
1249 &(struct macro_expansion_stack) {
1253 macro_tokens_to_representation (&exp, output, NULL, NULL);
1254 macro_tokens_uninit (&exp);
1255 macro_tokens_uninit (&mts);
1257 else if (ctx->n_input > 0
1258 && ctx->input[0].token.type == T_MACRO_ID
1259 && ss_equals_case (ctx->input[0].token.string, ss_cstr ("!NULL")))
1261 *input_consumed = 1;
1267 string_array_destroy (&args);
1273 const struct macro_expander *me;
1274 const struct macro_expansion_stack *stack;
1277 static char *macro_evaluate_or (const struct expr_context *ctx,
1278 const struct macro_token **tokens,
1279 const struct macro_token *end);
1282 macro_evaluate_literal (const struct expr_context *ctx,
1283 const struct macro_token **tokens,
1284 const struct macro_token *end)
1286 const struct macro_token *p = *tokens;
1289 if (p->token.type == T_LPAREN)
1292 char *value = macro_evaluate_or (ctx, &p, end);
1295 if (p >= end || p->token.type != T_RPAREN)
1298 macro_error (ctx->stack, p < end ? p : NULL,
1299 _("Expecting ')' in macro expression."));
1306 else if (p->token.type == T_RPAREN)
1308 macro_error (ctx->stack, p, _("Expecting literal or function invocation "
1309 "in macro expression."));
1313 struct parse_macro_function_ctx fctx = {
1317 .stack = ctx->stack,
1319 struct string function_output = DS_EMPTY_INITIALIZER;
1320 size_t function_consumed = parse_function_arg (&fctx, 0, &function_output);
1321 struct string unquoted = DS_EMPTY_INITIALIZER;
1322 if (unquote_string (ds_cstr (&function_output), ctx->me->segmenter_mode,
1325 ds_swap (&function_output, &unquoted);
1326 ds_destroy (&unquoted);
1328 *tokens = p + function_consumed;
1329 return ds_steal_cstr (&function_output);
1332 /* Returns true if MT is valid as a macro operator. Only operators written as
1333 symbols (e.g. <>) are usable in macro expressions, not operator written as
1334 letters (e.g. EQ). */
1336 is_macro_operator (const struct macro_token *mt)
1338 return (mt->representation.length > 0
1339 && !c_isalpha (mt->representation.string[0]));
1342 static enum token_type
1343 parse_relational_op (const struct macro_token *mt)
1345 switch (mt->token.type)
1355 return is_macro_operator (mt) ? mt->token.type : T_STOP;
1358 return (ss_equals_case (mt->token.string, ss_cstr ("!EQ")) ? T_EQ
1359 : ss_equals_case (mt->token.string, ss_cstr ("!NE")) ? T_NE
1360 : ss_equals_case (mt->token.string, ss_cstr ("!LT")) ? T_LT
1361 : ss_equals_case (mt->token.string, ss_cstr ("!GT")) ? T_GT
1362 : ss_equals_case (mt->token.string, ss_cstr ("!LE")) ? T_LE
1363 : ss_equals_case (mt->token.string, ss_cstr ("!GE")) ? T_GE
1372 macro_evaluate_relational (const struct expr_context *ctx,
1373 const struct macro_token **tokens,
1374 const struct macro_token *end)
1376 const struct macro_token *p = *tokens;
1377 char *lhs = macro_evaluate_literal (ctx, &p, end);
1381 enum token_type op = p >= end ? T_STOP : parse_relational_op (p);
1389 char *rhs = macro_evaluate_literal (ctx, &p, end);
1396 struct string lhs_tmp, rhs_tmp;
1397 int cmp = strcmp (unquote_string_in_place (lhs, ctx->me->segmenter_mode,
1399 unquote_string_in_place (rhs, ctx->me->segmenter_mode,
1401 ds_destroy (&lhs_tmp);
1402 ds_destroy (&rhs_tmp);
1407 bool b = (op == T_EQUALS || op == T_EQ ? !cmp
1409 : op == T_LT ? cmp < 0
1410 : op == T_GT ? cmp > 0
1411 : op == T_LE ? cmp <= 0
1412 : /* T_GE */ cmp >= 0);
1415 return xstrdup (b ? "1" : "0");
1419 macro_evaluate_not (const struct expr_context *ctx,
1420 const struct macro_token **tokens,
1421 const struct macro_token *end)
1423 const struct macro_token *p = *tokens;
1425 unsigned int negations = 0;
1427 && (ss_equals_case (p->representation, ss_cstr ("!NOT"))
1428 || ss_equals (p->representation, ss_cstr ("~"))))
1434 char *operand = macro_evaluate_relational (ctx, &p, end);
1435 if (!operand || !negations)
1441 bool b = strcmp (operand, "0") ^ (negations & 1);
1444 return xstrdup (b ? "1" : "0");
1448 macro_evaluate_and (const struct expr_context *ctx,
1449 const struct macro_token **tokens,
1450 const struct macro_token *end)
1452 const struct macro_token *p = *tokens;
1453 char *lhs = macro_evaluate_not (ctx, &p, end);
1458 && (ss_equals_case (p->representation, ss_cstr ("!AND"))
1459 || ss_equals (p->representation, ss_cstr ("&"))))
1462 char *rhs = macro_evaluate_not (ctx, &p, end);
1469 bool b = strcmp (lhs, "0") && strcmp (rhs, "0");
1472 lhs = xstrdup (b ? "1" : "0");
1479 macro_evaluate_or (const struct expr_context *ctx,
1480 const struct macro_token **tokens,
1481 const struct macro_token *end)
1483 const struct macro_token *p = *tokens;
1484 char *lhs = macro_evaluate_and (ctx, &p, end);
1489 && (ss_equals_case (p->representation, ss_cstr ("!OR"))
1490 || ss_equals (p->representation, ss_cstr ("|"))))
1493 char *rhs = macro_evaluate_and (ctx, &p, end);
1500 bool b = strcmp (lhs, "0") || strcmp (rhs, "0");
1503 lhs = xstrdup (b ? "1" : "0");
1510 macro_evaluate_expression (const struct macro_token **tokens, size_t n_tokens,
1511 const struct macro_expander *me,
1512 const struct macro_expansion_stack *stack)
1514 const struct expr_context ctx = {
1518 return macro_evaluate_or (&ctx, tokens, *tokens + n_tokens);
1522 macro_evaluate_number (const struct macro_token **tokens, size_t n_tokens,
1523 const struct macro_expander *me,
1524 const struct macro_expansion_stack *stack,
1527 char *s = macro_evaluate_expression (tokens, n_tokens, me, stack);
1531 struct macro_tokens mts = { .n = 0 };
1532 macro_tokens_from_string__ (&mts, ss_cstr (s), me->segmenter_mode, stack);
1533 if (mts.n != 1 || !token_is_number (&mts.mts[0].token))
1535 macro_error (stack, mts.n > 0 ? &mts.mts[0] : NULL,
1536 _("Macro expression must evaluate to "
1537 "a number (not \"%s\")."), s);
1539 macro_tokens_uninit (&mts);
1543 *number = token_number (&mts.mts[0].token);
1545 macro_tokens_uninit (&mts);
1549 static const struct macro_token *
1550 find_ifend_clause (const struct macro_token *p, const struct macro_token *end)
1553 for (; p < end; p++)
1555 if (p->token.type != T_MACRO_ID)
1558 if (ss_equals_case (p->token.string, ss_cstr ("!IF")))
1560 else if (ss_equals_case (p->token.string, ss_cstr ("!IFEND")))
1566 else if (ss_equals_case (p->token.string, ss_cstr ("!ELSE")) && !nesting)
1573 macro_expand_if (const struct macro_token *tokens, size_t n_tokens,
1574 const struct macro_expander *me,
1575 const struct macro_expansion_stack *stack,
1576 bool *break_, struct macro_tokens *exp)
1578 const struct macro_token *p = tokens;
1579 const struct macro_token *end = tokens + n_tokens;
1581 if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!IF")))
1585 char *result = macro_evaluate_expression (&p, end - p, me, stack);
1588 bool b = strcmp (result, "0");
1592 || p->token.type != T_MACRO_ID
1593 || !ss_equals_case (p->token.string, ss_cstr ("!THEN")))
1595 macro_error (stack, p < end ? p : NULL,
1596 _("!THEN expected in macro !IF construct."));
1600 const struct macro_token *start_then = p + 1;
1601 const struct macro_token *end_then = find_ifend_clause (start_then, end);
1604 macro_error (stack, NULL,
1605 _("!ELSE or !IFEND expected in macro !IF construct."));
1609 const struct macro_token *start_else, *end_if;
1610 if (ss_equals_case (end_then->token.string, ss_cstr ("!ELSE")))
1612 start_else = end_then + 1;
1613 end_if = find_ifend_clause (start_else, end);
1615 || !ss_equals_case (end_if->token.string, ss_cstr ("!IFEND")))
1617 macro_error (stack, end_if ? end_if : NULL,
1618 _("!IFEND expected in macro !IF construct."));
1628 const struct macro_token *start;
1633 n = end_then - start_then;
1635 else if (start_else)
1638 n = end_if - start_else;
1648 struct macro_tokens mts = {
1649 .mts = CONST_CAST (struct macro_token *, start),
1652 macro_expand (&mts, me, &(struct macro_expansion_stack) {
1658 return (end_if + 1) - tokens;
1662 macro_parse_let (const struct macro_token *tokens, size_t n_tokens,
1663 const struct macro_expander *me,
1664 const struct macro_expansion_stack *stack)
1666 const struct macro_token *p = tokens;
1667 const struct macro_token *end = tokens + n_tokens;
1669 if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!LET")))
1673 if (p >= end || p->token.type != T_MACRO_ID)
1675 macro_error (stack, p < end ? p : NULL,
1676 _("Expected macro variable name following !LET."));
1679 const struct substring var_name = p->token.string;
1680 if (is_macro_keyword (var_name)
1681 || (me->macro && macro_find_parameter_by_name (me->macro, var_name)))
1683 macro_error (stack, p < end ? p : NULL,
1684 _("Cannot use argument name or macro keyword "
1685 "\"%.*s\" as !LET variable."),
1686 (int) var_name.length, var_name.string);
1691 if (p >= end || p->token.type != T_EQUALS)
1693 macro_error (stack, p < end ? p : NULL,
1694 _("Expected `=' following !LET."));
1699 char *value = macro_evaluate_expression (&p, end - p, me, stack);
1703 stringi_map_replace_nocopy (me->vars, ss_xstrdup (var_name), value);
1707 static const struct macro_token *
1708 find_doend (const struct macro_expansion_stack *stack,
1709 const struct macro_token *p, const struct macro_token *end)
1712 for (; p < end; p++)
1714 if (p->token.type != T_MACRO_ID)
1717 if (ss_equals_case (p->token.string, ss_cstr ("!DO")))
1719 else if (ss_equals_case (p->token.string, ss_cstr ("!DOEND")))
1726 macro_error (stack, NULL, _("Missing !DOEND."));
1731 macro_expand_do (const struct macro_token *tokens, size_t n_tokens,
1732 const struct macro_expander *me,
1733 const struct macro_expansion_stack *stack,
1734 struct macro_tokens *exp)
1736 const struct macro_token *p = tokens;
1737 const struct macro_token *end = tokens + n_tokens;
1739 if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!DO")))
1743 if (p >= end || p->token.type != T_MACRO_ID)
1745 macro_error (stack, p < end ? p : NULL,
1746 _("Expected macro variable name following !DO."));
1749 const struct substring var_name = p->token.string;
1750 if (is_macro_keyword (var_name)
1751 || (me->macro && macro_find_parameter_by_name (me->macro, var_name)))
1753 macro_error (stack, p, _("Cannot use argument name or macro "
1754 "keyword as !DO variable."));
1759 struct macro_expansion_stack next_stack = {
1760 .name = "!DO", .next = stack,
1762 int miterate = settings_get_miterate ();
1763 if (p < end && p->token.type == T_MACRO_ID
1764 && ss_equals_case (p->token.string, ss_cstr ("!IN")))
1767 char *list = macro_evaluate_expression (&p, end - p, me, &next_stack);
1771 struct macro_tokens items = { .n = 0 };
1772 macro_tokens_from_string__ (&items, ss_cstr (list), me->segmenter_mode,
1776 const struct macro_token *do_end = find_doend (stack, p, end);
1779 macro_tokens_uninit (&items);
1783 const struct macro_tokens inner = {
1784 .mts = CONST_CAST (struct macro_token *, p),
1787 for (size_t i = 0; i < items.n; i++)
1791 macro_error (stack, NULL,
1792 _("!DO loop over list exceeded "
1793 "maximum number of iterations %d. "
1794 "(Use SET MITERATE to change the limit.)"),
1798 stringi_map_replace_nocopy (me->vars, ss_xstrdup (var_name),
1799 ss_xstrdup (items.mts[i].representation));
1801 bool break_ = false;
1802 macro_expand (&inner, me, &next_stack, &break_, exp);
1806 return do_end - tokens + 1;
1808 else if (p < end && p->token.type == T_EQUALS)
1812 if (!macro_evaluate_number (&p, end - p, me, &next_stack, &first))
1815 if (p >= end || p->token.type != T_MACRO_ID
1816 || !ss_equals_case (p->token.string, ss_cstr ("!TO")))
1818 macro_error (stack, p < end ? p : NULL,
1819 _("Expected !TO in numerical !DO loop."));
1825 if (!macro_evaluate_number (&p, end - p, me, &next_stack, &last))
1829 if (p < end && p->token.type == T_MACRO_ID
1830 && ss_equals_case (p->token.string, ss_cstr ("!BY")))
1833 if (!macro_evaluate_number (&p, end - p, me, &next_stack, &by))
1838 macro_error (stack, NULL, _("!BY value cannot be zero."));
1843 const struct macro_token *do_end = find_doend (stack, p, end);
1846 const struct macro_tokens inner = {
1847 .mts = CONST_CAST (struct macro_token *, p),
1851 if ((by > 0 && first <= last) || (by < 0 && first >= last))
1854 for (double index = first;
1855 by > 0 ? (index <= last) : (index >= last);
1860 macro_error (stack, NULL,
1861 _("Numerical !DO loop exceeded "
1862 "maximum number of iterations %d. "
1863 "(Use SET MITERATE to change the limit.)"),
1868 char index_s[DBL_BUFSIZE_BOUND];
1869 c_dtoastr (index_s, sizeof index_s, 0, 0, index);
1870 stringi_map_replace_nocopy (me->vars, ss_xstrdup (var_name),
1873 bool break_ = false;
1874 macro_expand (&inner, me, &next_stack, &break_, exp);
1880 return do_end - tokens + 1;
1884 macro_error (stack, p < end ? p : NULL,
1885 _("Expected `=' or !IN in !DO loop."));
1891 macro_expand (const struct macro_tokens *mts,
1892 const struct macro_expander *me,
1893 const struct macro_expansion_stack *stack,
1894 bool *break_, struct macro_tokens *exp)
1896 if (me->nesting_countdown <= 0)
1898 macro_error (stack, NULL, _("Maximum nesting level %d exceeded. "
1899 "(Use SET MNEST to change the limit.)"),
1900 settings_get_mnest ());
1901 for (size_t i = 0; i < mts->n; i++)
1902 macro_tokens_add (exp, &mts->mts[i]);
1906 for (size_t i = 0; i < mts->n && (!break_ || !*break_); i++)
1908 const struct macro_token *mt = &mts->mts[i];
1909 const struct token *token = &mt->token;
1910 if (token->type == T_MACRO_ID && me->macro)
1912 const struct macro_param *param = macro_find_parameter_by_name (
1913 me->macro, token->string);
1916 const struct macro_tokens *arg = me->args[param - me->macro->params];
1917 if (*me->expand && param->expand_arg)
1919 struct stringi_map vars = STRINGI_MAP_INITIALIZER (vars);
1920 struct macro_expander subme = {
1921 .macros = me->macros,
1924 .segmenter_mode = me->segmenter_mode,
1925 .expand = me->expand,
1927 .nesting_countdown = me->nesting_countdown,
1929 macro_expand (arg, &subme, &(struct macro_expansion_stack) {
1930 .name = param->name,
1933 stringi_map_destroy (&vars);
1936 for (size_t i = 0; i < arg->n; i++)
1937 macro_tokens_add (exp, &arg->mts[i]);
1941 if (is_bang_star (mts->mts, mts->n, i))
1943 for (size_t j = 0; j < me->macro->n_params; j++)
1945 const struct macro_param *param = &me->macro->params[j];
1946 if (!param->positional)
1949 const struct macro_tokens *arg = me->args[j];
1950 if (*me->expand && param->expand_arg)
1952 struct stringi_map vars = STRINGI_MAP_INITIALIZER (vars);
1953 struct macro_expander subme = {
1954 .macros = me->macros,
1957 .segmenter_mode = me->segmenter_mode,
1958 .expand = me->expand,
1960 .nesting_countdown = me->nesting_countdown,
1962 macro_expand (arg, &subme,
1963 &(struct macro_expansion_stack) {
1967 stringi_map_destroy (&vars);
1970 for (size_t k = 0; k < arg->n; k++)
1971 macro_tokens_add (exp, &arg->mts[k]);
1977 size_t n = macro_expand_if (&mts->mts[i], mts->n - i, me, stack,
1986 if (token->type == T_MACRO_ID)
1988 const char *value = stringi_map_find__ (me->vars,
1989 token->string.string,
1990 token->string.length);
1993 macro_tokens_from_string__ (exp, ss_cstr (value),
1994 me->segmenter_mode, stack);
2001 struct macro_call *submc;
2002 int retval = macro_call_create (me->macros, token, &submc);
2003 for (size_t j = 1; !retval; j++)
2005 const struct macro_token endcmd
2006 = { .token = { .type = T_ENDCMD } };
2007 retval = macro_call_add (
2008 submc, i + j < mts->n ? &mts->mts[i + j] : &endcmd);
2013 struct stringi_map vars = STRINGI_MAP_INITIALIZER (vars);
2014 struct macro_expander subme = {
2015 .macros = submc->macros,
2016 .macro = submc->macro,
2017 .args = submc->args,
2018 .segmenter_mode = me->segmenter_mode,
2019 .expand = me->expand,
2021 .nesting_countdown = me->nesting_countdown - 1,
2023 macro_expand (&submc->macro->body, &subme,
2024 &(struct macro_expansion_stack) {
2025 .name = submc->macro->name,
2026 .file_name = submc->macro->file_name,
2027 .first_line = submc->macro->first_line,
2028 .last_line = submc->macro->last_line,
2031 macro_call_destroy (submc);
2032 stringi_map_destroy (&vars);
2036 macro_call_destroy (submc);
2039 if (token->type != T_MACRO_ID)
2041 macro_tokens_add (exp, mt);
2045 if (ss_equals_case (token->string, ss_cstr ("!break")))
2048 macro_error (stack, mt, _("!BREAK outside !DO."));
2056 struct parse_macro_function_ctx ctx = {
2057 .input = &mts->mts[i],
2058 .n_input = mts->n - i,
2062 struct string function_output = DS_EMPTY_INITIALIZER;
2063 size_t function_consumed;
2064 if (expand_macro_function (&ctx, &function_output, &function_consumed))
2066 i += function_consumed - 1;
2068 macro_tokens_from_string__ (exp, function_output.ss,
2069 me->segmenter_mode, stack);
2070 ds_destroy (&function_output);
2075 size_t n = macro_parse_let (&mts->mts[i], mts->n - i, me, stack);
2082 n = macro_expand_do (&mts->mts[i], mts->n - i, me, stack, exp);
2089 if (ss_equals_case (token->string, ss_cstr ("!onexpand")))
2091 else if (ss_equals_case (token->string, ss_cstr ("!offexpand")))
2092 *me->expand = false;
2094 macro_tokens_add (exp, mt);
2099 macro_call_expand (struct macro_call *mc, enum segmenter_mode segmenter_mode,
2100 struct macro_tokens *exp)
2102 assert (mc->state == MC_FINISHED);
2105 struct stringi_map vars = STRINGI_MAP_INITIALIZER (vars);
2106 struct macro_expander me = {
2107 .macros = mc->macros,
2110 .segmenter_mode = segmenter_mode,
2113 .nesting_countdown = settings_get_mnest (),
2116 struct macro_expansion_stack stack = {
2117 .name = mc->macro->name,
2118 .file_name = mc->macro->file_name,
2119 .first_line = mc->macro->first_line,
2120 .last_line = mc->macro->last_line,
2122 macro_expand (&mc->macro->body, &me, &stack, NULL, exp);
2124 stringi_map_destroy (&vars);