1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/lexer/macro.h"
25 #include "data/settings.h"
26 #include "language/lexer/lexer.h"
27 #include "language/lexer/segment.h"
28 #include "language/lexer/scan.h"
29 #include "libpspp/assertion.h"
30 #include "libpspp/cast.h"
31 #include "libpspp/i18n.h"
32 #include "libpspp/message.h"
33 #include "libpspp/str.h"
34 #include "libpspp/string-array.h"
35 #include "libpspp/stringi-map.h"
36 #include "libpspp/stringi-set.h"
38 #include "gl/c-ctype.h"
39 #include "gl/ftoastr.h"
42 #define _(msgid) gettext (msgid)
44 /* An entry in the stack of macros and macro directives being expanded. The
45 stack is maintained as a linked list. Entries are not dynamically allocated
46 but on the program stack. */
47 struct macro_expansion_stack
49 /* Points to an outer stack entry, or NULL if this is the outermost. */
50 const struct macro_expansion_stack *next;
52 /* A macro name or !IF, !DO, etc. */
55 /* Location of the macro definition, if available. */
56 const char *file_name;
61 /* Reports an error during macro expansion. STACK is the stack for reporting
62 the location of the error, MT is the optional token at which the error was
63 detected, and FORMAT along with the varargs is the message to report. */
64 static void PRINTF_FORMAT (3, 4)
65 macro_error (const struct macro_expansion_stack *stack,
66 const struct macro_token *mt,
67 const char *format, ...)
69 struct msg_stack **ms = NULL;
70 size_t allocated_ms = 0;
73 for (const struct macro_expansion_stack *p = stack; p; p = p->next)
75 if (n_ms >= allocated_ms)
76 ms = x2nrealloc (ms, &allocated_ms, sizeof *ms);
78 /* TRANSLATORS: These strings are used for explaining the context of an
79 error. The "While expanding" message appears first, followed by zero
80 or more of the "inside expansion" messages. `innermost',
81 `next_inner`, etc., are names of macros, and `foobar' is a piece of
84 foo.sps:12: At `foobar' in the expansion of 'innermost',
85 foo.sps:23: inside the expansion of 'next_inner',
86 foo.sps:34: inside the expansion of 'next_inner2',
87 foo.sps:45: inside the expansion of 'outermost',
88 foo.sps:76: This is the actual error message. */
92 if (mt && mt->representation.length)
95 str_ellipsize (mt->representation, syntax, sizeof syntax);
96 description = xasprintf (_("At `%s' in the expansion of `%s',"),
100 description = xasprintf (_("In the expansion of `%s',"), p->name);
103 description = xasprintf (_("inside the expansion of `%s',"), p->name);
105 ms[n_ms] = xmalloc (sizeof *ms[n_ms]);
106 *ms[n_ms] = (struct msg_stack) {
108 .file_name = xstrdup_if_nonnull (p->file_name),
109 .first_line = p->first_line,
110 .last_line = p->last_line,
112 .description = description,
118 va_start (args, format);
119 char *s = xvasprintf (format, args);
122 struct msg *m = xmalloc (sizeof *m);
124 .category = MSG_C_SYNTAX,
125 .severity = MSG_S_ERROR,
134 macro_token_copy (struct macro_token *dst, const struct macro_token *src)
136 token_copy (&dst->token, &src->token);
137 ss_alloc_substring (&dst->representation, src->representation);
141 macro_token_uninit (struct macro_token *mt)
143 token_uninit (&mt->token);
144 ss_dealloc (&mt->representation);
148 macro_token_to_representation (struct macro_token *mt, struct string *s)
150 ds_put_substring (s, mt->representation);
153 is_macro_keyword (struct substring s)
155 static struct stringi_set keywords = STRINGI_SET_INITIALIZER (keywords);
156 if (stringi_set_is_empty (&keywords))
158 static const char *kws[] = {
179 for (size_t i = 0; i < sizeof kws / sizeof *kws; i++)
180 stringi_set_insert (&keywords, kws[i]);
183 ss_ltrim (&s, ss_cstr ("!"));
184 return stringi_set_contains_len (&keywords, s.string, s.length);
188 macro_tokens_copy (struct macro_tokens *dst, const struct macro_tokens *src)
190 *dst = (struct macro_tokens) {
191 .mts = xmalloc (src->n * sizeof *dst->mts),
195 for (size_t i = 0; i < src->n; i++)
196 macro_token_copy (&dst->mts[i], &src->mts[i]);
200 macro_tokens_uninit (struct macro_tokens *mts)
202 for (size_t i = 0; i < mts->n; i++)
203 macro_token_uninit (&mts->mts[i]);
208 macro_tokens_add_uninit (struct macro_tokens *mts)
210 if (mts->n >= mts->allocated)
211 mts->mts = x2nrealloc (mts->mts, &mts->allocated, sizeof *mts->mts);
212 return &mts->mts[mts->n++];
216 macro_tokens_add (struct macro_tokens *mts, const struct macro_token *mt)
218 macro_token_copy (macro_tokens_add_uninit (mts), mt);
221 /* Tokenizes SRC according to MODE and appends the tokens to MTS. Uses STACK,
222 if nonull, for error reporting. */
224 macro_tokens_from_string__ (struct macro_tokens *mts, const struct substring src,
225 enum segmenter_mode mode,
226 const struct macro_expansion_stack *stack)
230 struct segmenter segmenter;
231 struct substring body;
234 struct state state = {
235 .segmenter = segmenter_init (mode, true),
238 struct state saved = state;
240 while (state.body.length > 0)
242 struct macro_token mt = {
243 .token = { .type = T_STOP },
244 .representation = { .string = state.body.string },
246 struct token *token = &mt.token;
248 struct scanner scanner;
249 scanner_init (&scanner, token);
253 enum segment_type type;
254 int seg_len = segmenter_push (&state.segmenter, state.body.string,
255 state.body.length, true, &type);
256 assert (seg_len >= 0);
258 struct substring segment = ss_head (state.body, seg_len);
259 ss_advance (&state.body, seg_len);
261 enum scan_result result = scanner_push (&scanner, type, segment, token);
262 if (result == SCAN_SAVE)
264 else if (result == SCAN_BACK)
269 else if (result == SCAN_DONE)
273 /* We have a token in 'token'. */
274 mt.representation.length = state.body.string - mt.representation.string;
275 if (is_scan_type (token->type))
277 if (token->type != SCAN_SKIP)
279 char *s = scan_token_to_error (token);
282 mt.token.type = T_STRING;
283 macro_error (stack, &mt, "%s", s);
291 macro_tokens_add (mts, &mt);
292 token_uninit (token);
296 /* Tokenizes SRC according to MODE and appends the tokens to MTS. */
298 macro_tokens_from_string (struct macro_tokens *mts, const struct substring src,
299 enum segmenter_mode mode)
301 macro_tokens_from_string__ (mts, src, mode, NULL);
305 macro_tokens_print (const struct macro_tokens *mts, FILE *stream)
307 for (size_t i = 0; i < mts->n; i++)
308 token_print (&mts->mts[i].token, stream);
313 TC_ENDCMD, /* No space before or after (new-line after). */
314 TC_BINOP, /* Space on both sides. */
315 TC_COMMA, /* Space afterward. */
316 TC_ID, /* Don't need spaces except sequentially. */
317 TC_PUNCT, /* Don't need spaces except sequentially. */
321 needs_space (enum token_class prev, enum token_class next)
323 /* Don't need a space before or after the end of a command.
324 (A new-line is needed afterward as a special case.) */
325 if (prev == TC_ENDCMD || next == TC_ENDCMD)
328 /* Binary operators always have a space on both sides. */
329 if (prev == TC_BINOP || next == TC_BINOP)
332 /* A comma always has a space afterward. */
333 if (prev == TC_COMMA)
336 /* Otherwise, PREV is TC_ID or TC_PUNCT, which only need a space if there are
337 two or them in a row. */
341 static enum token_class
342 classify_token (enum token_type type)
394 /* Appends a syntax representation of the tokens in MTS to S. If OFS and LEN
395 are nonnull, sets OFS[i] to the offset within S of the start of token 'i' in
396 MTS and LEN[i] to its length. OFS[i] + LEN[i] is not necessarily OFS[i + 1]
397 because some tokens are separated by white space. */
399 macro_tokens_to_representation (struct macro_tokens *mts, struct string *s,
400 size_t *ofs, size_t *len)
402 assert ((ofs != NULL) == (len != NULL));
407 for (size_t i = 0; i < mts->n; i++)
411 enum token_type prev = mts->mts[i - 1].token.type;
412 enum token_type next = mts->mts[i].token.type;
414 if (prev == T_ENDCMD)
415 ds_put_byte (s, '\n');
418 enum token_class pc = classify_token (prev);
419 enum token_class nc = classify_token (next);
420 if (needs_space (pc, nc))
421 ds_put_byte (s, ' ');
426 ofs[i] = s->ss.length;
427 macro_token_to_representation (&mts->mts[i], s);
429 len[i] = s->ss.length - ofs[i];
434 macro_destroy (struct macro *m)
441 for (size_t i = 0; i < m->n_params; i++)
443 struct macro_param *p = &m->params[i];
446 macro_tokens_uninit (&p->def);
454 token_uninit (&p->charend);
458 token_uninit (&p->enclose[0]);
459 token_uninit (&p->enclose[1]);
467 macro_tokens_uninit (&m->body);
472 macro_set_create (void)
474 struct macro_set *set = xmalloc (sizeof *set);
475 *set = (struct macro_set) {
476 .macros = HMAP_INITIALIZER (set->macros),
482 macro_set_destroy (struct macro_set *set)
487 struct macro *macro, *next;
488 HMAP_FOR_EACH_SAFE (macro, next, struct macro, hmap_node, &set->macros)
490 hmap_delete (&set->macros, ¯o->hmap_node);
491 macro_destroy (macro);
493 hmap_destroy (&set->macros);
498 hash_macro_name (const char *name)
500 return utf8_hash_case_string (name, 0);
503 static struct macro *
504 macro_set_find__ (struct macro_set *set, const char *name)
506 if (macro_set_is_empty (set))
510 HMAP_FOR_EACH_WITH_HASH (macro, struct macro, hmap_node,
511 hash_macro_name (name), &set->macros)
512 if (!utf8_strcasecmp (macro->name, name))
519 macro_set_find (const struct macro_set *set, const char *name)
521 return macro_set_find__ (CONST_CAST (struct macro_set *, set), name);
524 /* Adds M to SET. M replaces any existing macro with the same name. Takes
527 macro_set_add (struct macro_set *set, struct macro *m)
529 struct macro *victim = macro_set_find__ (set, m->name);
532 hmap_delete (&set->macros, &victim->hmap_node);
533 macro_destroy (victim);
536 hmap_insert (&set->macros, &m->hmap_node, hash_macro_name (m->name));
539 /* Macro call parsing.. */
546 /* Accumulating tokens in mc->params toward the end of any type of
550 /* Expecting the opening delimiter of an ARG_ENCLOSE argument. */
553 /* Expecting a keyword for a keyword argument. */
556 /* Expecting an equal sign for a keyword argument. */
559 /* Macro fully parsed and ready for expansion. */
563 /* Parsing macro calls. This is a FSM driven by macro_call_create() and
564 macro_call_add() to identify the macro being called and obtain its
565 arguments. 'state' identifies the FSM state. */
568 const struct macro_set *macros;
569 const struct macro *macro;
570 struct macro_tokens **args;
574 const struct macro_param *param; /* Parameter currently being parsed. */
577 /* Completes macro expansion by initializing arguments that weren't supplied to
580 mc_finished (struct macro_call *mc)
582 mc->state = MC_FINISHED;
583 for (size_t i = 0; i < mc->macro->n_params; i++)
585 mc->args[i] = &mc->macro->params[i].def;
590 mc_next_arg (struct macro_call *mc)
594 assert (!mc->macro->n_params);
595 return mc_finished (mc);
597 else if (mc->param->positional)
600 if (mc->param >= &mc->macro->params[mc->macro->n_params])
601 return mc_finished (mc);
604 mc->state = (!mc->param->positional ? MC_KEYWORD
605 : mc->param->arg_type == ARG_ENCLOSE ? MC_ENCLOSE
612 for (size_t i = 0; i < mc->macro->n_params; i++)
615 mc->state = MC_KEYWORD;
618 return mc_finished (mc);
623 mc_error (struct macro_call *mc)
625 mc->state = MC_ERROR;
630 mc_add_arg (struct macro_call *mc, const struct macro_token *mt)
632 const struct macro_param *p = mc->param;
634 const struct token *token = &mt->token;
635 if ((token->type == T_ENDCMD || token->type == T_STOP)
636 && p->arg_type != ARG_CMDEND)
638 msg (SE, _("Unexpected end of command reading argument %s "
639 "to macro %s."), mc->param->name, mc->macro->name);
641 return mc_error (mc);
646 struct macro_tokens **argp = &mc->args[p - mc->macro->params];
648 *argp = xzalloc (sizeof **argp);
649 struct macro_tokens *arg = *argp;
650 if (p->arg_type == ARG_N_TOKENS)
652 macro_tokens_add (arg, mt);
653 if (arg->n >= p->n_tokens)
654 return mc_next_arg (mc);
657 else if (p->arg_type == ARG_CMDEND)
659 if (token->type == T_ENDCMD || token->type == T_STOP)
660 return mc_next_arg (mc);
661 macro_tokens_add (arg, mt);
666 const struct token *end
667 = p->arg_type == ARG_CHAREND ? &p->charend : &p->enclose[1];
668 if (token_equal (token, end))
669 return mc_next_arg (mc);
670 macro_tokens_add (arg, mt);
676 mc_expected (struct macro_call *mc, const struct macro_token *actual,
677 const struct token *expected)
679 const struct substring actual_s
680 = (actual->representation.length ? actual->representation
681 : ss_cstr (_("<end of input>")));
682 char *expected_s = token_to_string (expected);
683 msg (SE, _("Found `%.*s' while expecting `%s' reading argument %s "
685 (int) actual_s.length, actual_s.string, expected_s,
686 mc->param->name, mc->macro->name);
689 return mc_error (mc);
693 mc_enclose (struct macro_call *mc, const struct macro_token *mt)
695 const struct token *token = &mt->token;
698 if (token_equal (&mc->param->enclose[0], token))
704 return mc_expected (mc, mt, &mc->param->enclose[0]);
707 static const struct macro_param *
708 macro_find_parameter_by_name (const struct macro *m, struct substring name)
710 ss_ltrim (&name, ss_cstr ("!"));
712 for (size_t i = 0; i < m->n_params; i++)
714 const struct macro_param *p = &m->params[i];
715 struct substring p_name = ss_cstr (p->name + 1);
716 if (!utf8_strncasecmp (p_name.string, p_name.length,
717 name.string, name.length))
724 mc_keyword (struct macro_call *mc, const struct macro_token *mt)
726 const struct token *token = &mt->token;
727 if (token->type != T_ID)
728 return mc_finished (mc);
730 const struct macro_param *p = macro_find_parameter_by_name (mc->macro,
734 size_t arg_index = p - mc->macro->params;
736 if (mc->args[arg_index])
739 _("Argument %s multiply specified in call to macro %s."),
740 p->name, mc->macro->name);
741 return mc_error (mc);
745 mc->state = MC_EQUALS;
749 return mc_finished (mc);
753 mc_equals (struct macro_call *mc, const struct macro_token *mt)
755 const struct token *token = &mt->token;
758 if (token->type == T_EQUALS)
764 return mc_expected (mc, mt, &(struct token) { .type = T_EQUALS });
767 /* If TOKEN is the first token of a call to a macro in MACROS, create a new
768 macro expander, initializes *MCP to it. Returns 0 if more tokens are needed
769 and should be added via macro_call_add() or 1 if the caller should next call
770 macro_call_get_expansion().
772 If TOKEN is not the first token of a macro call, returns -1 and sets *MCP to
775 macro_call_create (const struct macro_set *macros,
776 const struct token *token,
777 struct macro_call **mcp)
779 const struct macro *macro = (token->type == T_ID || token->type == T_MACRO_ID
780 ? macro_set_find (macros, token->string.string)
788 struct macro_call *mc = xmalloc (sizeof *mc);
789 *mc = (struct macro_call) {
793 .state = (!macro->n_params ? MC_FINISHED
794 : !macro->params[0].positional ? MC_KEYWORD
795 : macro->params[0].arg_type == ARG_ENCLOSE ? MC_ENCLOSE
797 .args = macro->n_params ? xcalloc (macro->n_params, sizeof *mc->args) : NULL,
798 .param = macro->params,
802 return mc->state == MC_FINISHED ? 1 : 0;
806 macro_call_destroy (struct macro_call *mc)
811 for (size_t i = 0; i < mc->macro->n_params; i++)
813 struct macro_tokens *a = mc->args[i];
814 if (a && a != &mc->macro->params[i].def)
816 macro_tokens_uninit (a);
824 /* Adds TOKEN to the collection of tokens in MC that potentially need to be
827 Returns -1 if the tokens added do not actually invoke a macro. The caller
828 should consume the first token without expanding it. (Later tokens might
829 invoke a macro so it's best to feed the second token into a new expander.)
831 Returns 0 if the macro expander needs more tokens, for macro arguments or to
832 decide whether this is actually a macro invocation. The caller should call
833 macro_call_add() again with the next token.
835 Returns a positive number to indicate that the returned number of tokens
836 invoke a macro. The number returned might be less than the number of tokens
837 added because it can take a few tokens of lookahead to determine whether the
838 macro invocation is finished. The caller should call
839 macro_call_get_expansion() to obtain the expansion. */
841 macro_call_add (struct macro_call *mc, const struct macro_token *mt)
849 return mc_add_arg (mc, mt);
852 return mc_enclose (mc, mt);
855 return mc_keyword (mc, mt);
858 return mc_equals (mc, mt);
865 /* Macro expansion. */
867 struct macro_expander
869 /* Always available. */
870 const struct macro_set *macros; /* Macros to expand recursively. */
871 enum segmenter_mode segmenter_mode; /* Mode for tokenization. */
872 int nesting_countdown; /* Remaining nesting levels. */
873 const struct macro_expansion_stack *stack; /* Stack for error reporting. */
874 bool *expand; /* May macro calls be expanded? */
875 struct stringi_map *vars; /* Variables from !DO and !LET. */
877 /* Only nonnull if inside a !DO loop. */
878 bool *break_; /* Set to true to break out of loop. */
880 /* Only nonnull if expanding a macro (and not, say, a macro argument). */
881 const struct macro *macro;
882 struct macro_tokens **args;
886 macro_expand (const struct macro_tokens *, const struct macro_expander *,
887 struct macro_tokens *);
890 expand_macro_function (const struct macro_expander *me,
891 const struct macro_token *input, size_t n_input,
892 struct string *output, size_t *input_consumed);
894 /* Returns true if the N tokens within MTS start with !*, false otherwise. */
896 is_bang_star (const struct macro_token *mts, size_t n)
899 && mts[0].token.type == T_MACRO_ID
900 && ss_equals (mts[0].token.string, ss_cstr ("!"))
901 && mts[1].token.type == T_ASTERISK);
904 /* Parses one function argument from the N_INPUT tokens in INPUT
905 Each argument to a macro function is one of:
907 - A quoted string or other single literal token.
909 - An argument to the macro being expanded, e.g. !1 or a named argument.
913 - A function invocation.
915 Each function invocation yields a character sequence to be turned into a
916 sequence of tokens. The case where that character sequence is a single
917 quoted string is an important special case.
920 parse_function_arg (const struct macro_expander *me,
921 const struct macro_token *input, size_t n_input,
924 assert (n_input > 0);
926 const struct token *token = &input[0].token;
927 if (token->type == T_MACRO_ID && me->macro)
929 const struct macro_param *param = macro_find_parameter_by_name (
930 me->macro, token->string);
933 size_t param_idx = param - me->macro->params;
934 const struct macro_tokens *marg = me->args[param_idx];
935 for (size_t i = 0; i < marg->n; i++)
938 ds_put_byte (farg, ' ');
939 ds_put_substring (farg, marg->mts[i].representation);
944 if (is_bang_star (input, n_input))
946 for (size_t i = 0; i < me->macro->n_params; i++)
948 if (!me->macro->params[i].positional)
951 const struct macro_tokens *marg = me->args[i];
952 for (size_t j = 0; j < marg->n; j++)
955 ds_put_byte (farg, ' ');
956 ds_put_substring (farg, marg->mts[j].representation);
962 const char *value = stringi_map_find__ (me->vars,
963 token->string.string,
964 token->string.length);
967 ds_put_cstr (farg, value);
971 size_t subinput_consumed;
972 if (expand_macro_function (me, input, n_input,
973 farg, &subinput_consumed))
974 return subinput_consumed;
977 ds_put_substring (farg, input[0].representation);
982 parse_macro_function (const struct macro_expander *me,
983 const struct macro_token *tokens, size_t n_tokens,
984 struct string_array *args,
985 struct substring function,
986 int min_args, int max_args,
987 size_t *input_consumed)
990 || tokens[0].token.type != T_MACRO_ID
991 || !ss_equals_case (tokens[0].token.string, function)) /* XXX abbrevs allowed */
994 if (n_tokens < 2 || tokens[1].token.type != T_LPAREN)
996 macro_error (me->stack, n_tokens > 1 ? &tokens[1] : NULL,
997 _("`(' expected following %s."), function.string);
1001 string_array_init (args);
1003 for (size_t i = 2;; )
1006 goto unexpected_end;
1007 if (tokens[i].token.type == T_RPAREN)
1009 *input_consumed = i + 1;
1010 if (args->n < min_args || args->n > max_args)
1012 macro_error (me->stack, &tokens[i],
1013 _("Wrong number of arguments to macro function %s."),
1020 struct string s = DS_EMPTY_INITIALIZER;
1021 i += parse_function_arg (me, tokens + i, n_tokens - i, &s);
1025 goto unexpected_end;
1027 string_array_append_nocopy (args, ds_steal_cstr (&s));
1029 if (tokens[i].token.type == T_COMMA)
1031 else if (tokens[i].token.type != T_RPAREN)
1033 macro_error (me->stack, &tokens[i],
1034 _("`,' or `)' expected in call to macro function %s."),
1041 macro_error (me->stack, NULL, _("Missing `)' in call to macro function %s."),
1045 string_array_destroy (args);
1050 unquote_string (const char *s, enum segmenter_mode segmenter_mode,
1051 struct string *content)
1053 struct string_lexer slex;
1054 string_lexer_init (&slex, s, strlen (s), segmenter_mode, true);
1056 struct token token1;
1057 if (!string_lexer_next (&slex, &token1))
1060 if (token1.type != T_STRING)
1062 token_uninit (&token1);
1066 struct token token2;
1067 if (string_lexer_next (&slex, &token2))
1069 token_uninit (&token1);
1070 token_uninit (&token2);
1074 ds_put_substring (content, token1.string);
1075 token_uninit (&token1);
1080 unquote_string_in_place (const char *s, enum segmenter_mode segmenter_mode,
1083 ds_init_empty (tmp);
1084 return unquote_string (s, segmenter_mode, tmp) ? ds_cstr (tmp) : s;
1088 parse_integer (const char *s, int *np)
1093 long int n = strtol (s, &tail, 10);
1094 *np = n < INT_MIN ? INT_MIN : n > INT_MAX ? INT_MAX : n;
1095 tail += strspn (tail, CC_SPACES);
1096 return *tail == '\0' && errno != ERANGE && n == *np;
1100 expand_macro_function (const struct macro_expander *me,
1101 const struct macro_token *input, size_t n_input,
1102 struct string *output, size_t *input_consumed)
1104 struct string_array args;
1105 if (parse_macro_function (me, input, n_input, &args, ss_cstr ("!LENGTH"), 1, 1,
1107 ds_put_format (output, "%zu", strlen (args.strings[0]));
1108 else if (parse_macro_function (me, input, n_input, &args, ss_cstr ("!BLANKS"), 1, 1,
1112 if (!parse_integer (args.strings[0], &n))
1114 macro_error (me->stack, NULL,
1115 _("Argument to !BLANKS must be non-negative integer "
1116 "(not \"%s\")."), args.strings[0]);
1117 string_array_destroy (&args);
1121 ds_put_byte_multiple (output, ' ', n);
1123 else if (parse_macro_function (me, input, n_input, &args, ss_cstr ("!CONCAT"), 1, INT_MAX,
1126 for (size_t i = 0; i < args.n; i++)
1127 if (!unquote_string (args.strings[i], me->segmenter_mode, output))
1128 ds_put_cstr (output, args.strings[i]);
1130 else if (parse_macro_function (me, input, n_input, &args, ss_cstr ("!HEAD"), 1, 1,
1134 const char *s = unquote_string_in_place (args.strings[0],
1135 me->segmenter_mode, &tmp);
1137 struct macro_tokens mts = { .n = 0 };
1138 macro_tokens_from_string__ (&mts, ss_cstr (s), me->segmenter_mode,
1141 ds_put_substring (output, mts.mts[0].representation);
1142 macro_tokens_uninit (&mts);
1145 else if (parse_macro_function (me, input, n_input, &args, ss_cstr ("!INDEX"), 2, 2,
1148 const char *haystack = args.strings[0];
1149 const char *needle = strstr (haystack, args.strings[1]);
1150 ds_put_format (output, "%zu", needle ? needle - haystack + 1 : 0);
1152 else if (parse_macro_function (me, input, n_input, &args, ss_cstr ("!QUOTE"), 1, 1,
1155 if (unquote_string (args.strings[0], me->segmenter_mode, NULL))
1156 ds_put_cstr (output, args.strings[0]);
1159 ds_extend (output, strlen (args.strings[0]) + 2);
1160 ds_put_byte (output, '\'');
1161 for (const char *p = args.strings[0]; *p; p++)
1164 ds_put_byte (output, '\'');
1165 ds_put_byte (output, *p);
1167 ds_put_byte (output, '\'');
1170 else if (parse_macro_function (me, input, n_input, &args, ss_cstr ("!SUBSTR"), 2, 3,
1174 if (!parse_integer (args.strings[1], &start) || start < 1)
1176 macro_error (me->stack, NULL,
1177 _("Second argument of !SUBSTR must be "
1178 "positive integer (not \"%s\")."),
1180 string_array_destroy (&args);
1184 int count = INT_MAX;
1185 if (args.n > 2 && (!parse_integer (args.strings[2], &count) || count < 0))
1187 macro_error (me->stack, NULL,
1188 _("Third argument of !SUBSTR must be "
1189 "non-negative integer (not \"%s\")."),
1191 string_array_destroy (&args);
1195 struct substring s = ss_cstr (args.strings[0]);
1196 ds_put_substring (output, ss_substr (s, start - 1, count));
1198 else if (parse_macro_function (me, input, n_input, &args, ss_cstr ("!TAIL"), 1, 1,
1202 const char *s = unquote_string_in_place (args.strings[0],
1203 me->segmenter_mode, &tmp);
1205 struct macro_tokens mts = { .n = 0 };
1206 macro_tokens_from_string__ (&mts, ss_cstr (s), me->segmenter_mode,
1210 struct macro_tokens tail = { .mts = mts.mts + 1, .n = mts.n - 1 };
1211 macro_tokens_to_representation (&tail, output, NULL, NULL);
1213 macro_tokens_uninit (&mts);
1216 else if (parse_macro_function (me, input, n_input, &args, ss_cstr ("!UNQUOTE"), 1, 1,
1219 if (!unquote_string (args.strings[0], me->segmenter_mode, output))
1220 ds_put_cstr (output, args.strings[0]);
1222 else if (parse_macro_function (me, input, n_input, &args, ss_cstr ("!UPCASE"), 1, 1,
1226 const char *s = unquote_string_in_place (args.strings[0],
1227 me->segmenter_mode, &tmp);
1228 char *upper = utf8_to_upper (s);
1229 ds_put_cstr (output, upper);
1233 else if (parse_macro_function (me, input, n_input, &args, ss_cstr ("!EVAL"), 1, 1,
1236 struct macro_tokens mts = { .n = 0 };
1237 macro_tokens_from_string__ (&mts, ss_cstr (args.strings[0]),
1238 me->segmenter_mode, me->stack);
1239 struct macro_tokens exp = { .n = 0 };
1240 struct macro_expansion_stack stack = {
1244 struct macro_expander subme = *me;
1245 subme.break_ = NULL;
1246 subme.stack = &stack;
1248 macro_expand (&mts, &subme, &exp);
1249 macro_tokens_to_representation (&exp, output, NULL, NULL);
1250 macro_tokens_uninit (&exp);
1251 macro_tokens_uninit (&mts);
1253 else if (n_input > 0
1254 && input[0].token.type == T_MACRO_ID
1255 && ss_equals_case (input[0].token.string, ss_cstr ("!NULL")))
1257 *input_consumed = 1;
1263 string_array_destroy (&args);
1267 static char *macro_evaluate_or (const struct macro_expander *me,
1268 const struct macro_token **tokens,
1269 const struct macro_token *end);
1272 macro_evaluate_literal (const struct macro_expander *me,
1273 const struct macro_token **tokens,
1274 const struct macro_token *end)
1276 const struct macro_token *p = *tokens;
1279 if (p->token.type == T_LPAREN)
1282 char *value = macro_evaluate_or (me, &p, end);
1285 if (p >= end || p->token.type != T_RPAREN)
1288 macro_error (me->stack, p < end ? p : NULL,
1289 _("Expecting ')' in macro expression."));
1296 else if (p->token.type == T_RPAREN)
1298 macro_error (me->stack, p, _("Expecting literal or function invocation "
1299 "in macro expression."));
1303 struct string function_output = DS_EMPTY_INITIALIZER;
1304 size_t function_consumed = parse_function_arg (me, p, end - p,
1306 struct string unquoted = DS_EMPTY_INITIALIZER;
1307 if (unquote_string (ds_cstr (&function_output), me->segmenter_mode,
1310 ds_swap (&function_output, &unquoted);
1311 ds_destroy (&unquoted);
1313 *tokens = p + function_consumed;
1314 return ds_steal_cstr (&function_output);
1317 /* Returns true if MT is valid as a macro operator. Only operators written as
1318 symbols (e.g. <>) are usable in macro expressions, not operator written as
1319 letters (e.g. EQ). */
1321 is_macro_operator (const struct macro_token *mt)
1323 return (mt->representation.length > 0
1324 && !c_isalpha (mt->representation.string[0]));
1327 static enum token_type
1328 parse_relational_op (const struct macro_token *mt)
1330 switch (mt->token.type)
1340 return is_macro_operator (mt) ? mt->token.type : T_STOP;
1343 return (ss_equals_case (mt->token.string, ss_cstr ("!EQ")) ? T_EQ
1344 : ss_equals_case (mt->token.string, ss_cstr ("!NE")) ? T_NE
1345 : ss_equals_case (mt->token.string, ss_cstr ("!LT")) ? T_LT
1346 : ss_equals_case (mt->token.string, ss_cstr ("!GT")) ? T_GT
1347 : ss_equals_case (mt->token.string, ss_cstr ("!LE")) ? T_LE
1348 : ss_equals_case (mt->token.string, ss_cstr ("!GE")) ? T_GE
1357 macro_evaluate_relational (const struct macro_expander *me,
1358 const struct macro_token **tokens,
1359 const struct macro_token *end)
1361 const struct macro_token *p = *tokens;
1362 char *lhs = macro_evaluate_literal (me, &p, end);
1366 enum token_type op = p >= end ? T_STOP : parse_relational_op (p);
1374 char *rhs = macro_evaluate_literal (me, &p, end);
1381 struct string lhs_tmp, rhs_tmp;
1382 int cmp = strcmp (unquote_string_in_place (lhs, me->segmenter_mode,
1384 unquote_string_in_place (rhs, me->segmenter_mode,
1386 ds_destroy (&lhs_tmp);
1387 ds_destroy (&rhs_tmp);
1392 bool b = (op == T_EQUALS || op == T_EQ ? !cmp
1394 : op == T_LT ? cmp < 0
1395 : op == T_GT ? cmp > 0
1396 : op == T_LE ? cmp <= 0
1397 : /* T_GE */ cmp >= 0);
1400 return xstrdup (b ? "1" : "0");
1404 macro_evaluate_not (const struct macro_expander *me,
1405 const struct macro_token **tokens,
1406 const struct macro_token *end)
1408 const struct macro_token *p = *tokens;
1410 unsigned int negations = 0;
1412 && (ss_equals_case (p->representation, ss_cstr ("!NOT"))
1413 || ss_equals (p->representation, ss_cstr ("~"))))
1419 char *operand = macro_evaluate_relational (me, &p, end);
1420 if (!operand || !negations)
1426 bool b = strcmp (operand, "0") ^ (negations & 1);
1429 return xstrdup (b ? "1" : "0");
1433 macro_evaluate_and (const struct macro_expander *me,
1434 const struct macro_token **tokens,
1435 const struct macro_token *end)
1437 const struct macro_token *p = *tokens;
1438 char *lhs = macro_evaluate_not (me, &p, end);
1443 && (ss_equals_case (p->representation, ss_cstr ("!AND"))
1444 || ss_equals (p->representation, ss_cstr ("&"))))
1447 char *rhs = macro_evaluate_not (me, &p, end);
1454 bool b = strcmp (lhs, "0") && strcmp (rhs, "0");
1457 lhs = xstrdup (b ? "1" : "0");
1464 macro_evaluate_or (const struct macro_expander *me,
1465 const struct macro_token **tokens,
1466 const struct macro_token *end)
1468 const struct macro_token *p = *tokens;
1469 char *lhs = macro_evaluate_and (me, &p, end);
1474 && (ss_equals_case (p->representation, ss_cstr ("!OR"))
1475 || ss_equals (p->representation, ss_cstr ("|"))))
1478 char *rhs = macro_evaluate_and (me, &p, end);
1485 bool b = strcmp (lhs, "0") || strcmp (rhs, "0");
1488 lhs = xstrdup (b ? "1" : "0");
1495 macro_evaluate_expression (const struct macro_token **tokens, size_t n_tokens,
1496 const struct macro_expander *me)
1498 return macro_evaluate_or (me, tokens, *tokens + n_tokens);
1502 macro_evaluate_number (const struct macro_token **tokens, size_t n_tokens,
1503 const struct macro_expander *me,
1506 char *s = macro_evaluate_expression (tokens, n_tokens, me);
1510 struct macro_tokens mts = { .n = 0 };
1511 macro_tokens_from_string__ (&mts, ss_cstr (s), me->segmenter_mode, me->stack);
1512 if (mts.n != 1 || !token_is_number (&mts.mts[0].token))
1514 macro_error (me->stack, mts.n > 0 ? &mts.mts[0] : NULL,
1515 _("Macro expression must evaluate to "
1516 "a number (not \"%s\")."), s);
1518 macro_tokens_uninit (&mts);
1522 *number = token_number (&mts.mts[0].token);
1524 macro_tokens_uninit (&mts);
1528 static const struct macro_token *
1529 find_ifend_clause (const struct macro_token *p, const struct macro_token *end)
1532 for (; p < end; p++)
1534 if (p->token.type != T_MACRO_ID)
1537 if (ss_equals_case (p->token.string, ss_cstr ("!IF")))
1539 else if (ss_equals_case (p->token.string, ss_cstr ("!IFEND")))
1545 else if (ss_equals_case (p->token.string, ss_cstr ("!ELSE")) && !nesting)
1552 macro_expand_if (const struct macro_token *tokens, size_t n_tokens,
1553 const struct macro_expander *me,
1554 struct macro_tokens *exp)
1556 const struct macro_token *p = tokens;
1557 const struct macro_token *end = tokens + n_tokens;
1559 if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!IF")))
1563 char *result = macro_evaluate_expression (&p, end - p, me);
1566 bool b = strcmp (result, "0");
1570 || p->token.type != T_MACRO_ID
1571 || !ss_equals_case (p->token.string, ss_cstr ("!THEN")))
1573 macro_error (me->stack, p < end ? p : NULL,
1574 _("!THEN expected in macro !IF construct."));
1578 const struct macro_token *start_then = p + 1;
1579 const struct macro_token *end_then = find_ifend_clause (start_then, end);
1582 macro_error (me->stack, NULL,
1583 _("!ELSE or !IFEND expected in macro !IF construct."));
1587 const struct macro_token *start_else, *end_if;
1588 if (ss_equals_case (end_then->token.string, ss_cstr ("!ELSE")))
1590 start_else = end_then + 1;
1591 end_if = find_ifend_clause (start_else, end);
1593 || !ss_equals_case (end_if->token.string, ss_cstr ("!IFEND")))
1595 macro_error (me->stack, end_if ? end_if : NULL,
1596 _("!IFEND expected in macro !IF construct."));
1606 const struct macro_token *start;
1611 n = end_then - start_then;
1613 else if (start_else)
1616 n = end_if - start_else;
1626 struct macro_tokens mts = {
1627 .mts = CONST_CAST (struct macro_token *, start),
1630 struct macro_expansion_stack stack = {
1634 struct macro_expander subme = *me;
1635 subme.stack = &stack;
1636 macro_expand (&mts, &subme, exp);
1638 return (end_if + 1) - tokens;
1642 macro_parse_let (const struct macro_token *tokens, size_t n_tokens,
1643 const struct macro_expander *me)
1645 const struct macro_token *p = tokens;
1646 const struct macro_token *end = tokens + n_tokens;
1648 if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!LET")))
1652 if (p >= end || p->token.type != T_MACRO_ID)
1654 macro_error (me->stack, p < end ? p : NULL,
1655 _("Expected macro variable name following !LET."));
1658 const struct substring var_name = p->token.string;
1659 if (is_macro_keyword (var_name)
1660 || (me->macro && macro_find_parameter_by_name (me->macro, var_name)))
1662 macro_error (me->stack, p < end ? p : NULL,
1663 _("Cannot use argument name or macro keyword "
1664 "\"%.*s\" as !LET variable."),
1665 (int) var_name.length, var_name.string);
1670 if (p >= end || p->token.type != T_EQUALS)
1672 macro_error (me->stack, p < end ? p : NULL,
1673 _("Expected `=' following !LET."));
1678 char *value = macro_evaluate_expression (&p, end - p, me);
1682 stringi_map_replace_nocopy (me->vars, ss_xstrdup (var_name), value);
1686 static const struct macro_token *
1687 find_doend (const struct macro_expansion_stack *stack,
1688 const struct macro_token *p, const struct macro_token *end)
1691 for (; p < end; p++)
1693 if (p->token.type != T_MACRO_ID)
1696 if (ss_equals_case (p->token.string, ss_cstr ("!DO")))
1698 else if (ss_equals_case (p->token.string, ss_cstr ("!DOEND")))
1705 macro_error (stack, NULL, _("Missing !DOEND."));
1710 macro_expand_do (const struct macro_token *tokens, size_t n_tokens,
1711 const struct macro_expander *me,
1712 struct macro_tokens *exp)
1714 const struct macro_token *p = tokens;
1715 const struct macro_token *end = tokens + n_tokens;
1717 if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!DO")))
1721 if (p >= end || p->token.type != T_MACRO_ID)
1723 macro_error (me->stack, p < end ? p : NULL,
1724 _("Expected macro variable name following !DO."));
1727 const struct substring var_name = p->token.string;
1728 if (is_macro_keyword (var_name)
1729 || (me->macro && macro_find_parameter_by_name (me->macro, var_name)))
1731 macro_error (me->stack, p, _("Cannot use argument name or macro "
1732 "keyword as !DO variable."));
1737 struct macro_expansion_stack substack = {
1741 bool break_ = false;
1742 struct macro_expander subme = *me;
1743 subme.break_ = &break_;
1744 subme.stack = &substack;
1746 int miterate = settings_get_miterate ();
1747 if (p < end && p->token.type == T_MACRO_ID
1748 && ss_equals_case (p->token.string, ss_cstr ("!IN")))
1751 char *list = macro_evaluate_expression (&p, end - p, &subme);
1755 struct macro_tokens items = { .n = 0 };
1756 macro_tokens_from_string__ (&items, ss_cstr (list), me->segmenter_mode,
1760 const struct macro_token *do_end = find_doend (subme.stack, p, end);
1763 macro_tokens_uninit (&items);
1767 const struct macro_tokens inner = {
1768 .mts = CONST_CAST (struct macro_token *, p),
1772 for (size_t i = 0; i < items.n && !break_; i++)
1776 macro_error (&substack, NULL,
1777 _("!DO loop over list exceeded "
1778 "maximum number of iterations %d. "
1779 "(Use SET MITERATE to change the limit.)"),
1783 stringi_map_replace_nocopy (me->vars, ss_xstrdup (var_name),
1784 ss_xstrdup (items.mts[i].representation));
1786 macro_expand (&inner, &subme, exp);
1788 return do_end - tokens + 1;
1790 else if (p < end && p->token.type == T_EQUALS)
1794 if (!macro_evaluate_number (&p, end - p, &subme, &first))
1797 if (p >= end || p->token.type != T_MACRO_ID
1798 || !ss_equals_case (p->token.string, ss_cstr ("!TO")))
1800 macro_error (subme.stack, p < end ? p : NULL,
1801 _("Expected !TO in numerical !DO loop."));
1807 if (!macro_evaluate_number (&p, end - p, &subme, &last))
1811 if (p < end && p->token.type == T_MACRO_ID
1812 && ss_equals_case (p->token.string, ss_cstr ("!BY")))
1815 if (!macro_evaluate_number (&p, end - p, &subme, &by))
1820 macro_error (subme.stack, NULL, _("!BY value cannot be zero."));
1825 const struct macro_token *do_end = find_doend (subme.stack, p, end);
1828 const struct macro_tokens inner = {
1829 .mts = CONST_CAST (struct macro_token *, p),
1833 if ((by > 0 && first <= last) || (by < 0 && first >= last))
1836 for (double index = first;
1837 by > 0 ? (index <= last) : (index >= last) && !break_;
1842 macro_error (subme.stack, NULL,
1843 _("Numerical !DO loop exceeded "
1844 "maximum number of iterations %d. "
1845 "(Use SET MITERATE to change the limit.)"),
1850 char index_s[DBL_BUFSIZE_BOUND];
1851 c_dtoastr (index_s, sizeof index_s, 0, 0, index);
1852 stringi_map_replace_nocopy (me->vars, ss_xstrdup (var_name),
1855 macro_expand (&inner, &subme, exp);
1859 return do_end - tokens + 1;
1863 macro_error (me->stack, p < end ? p : NULL,
1864 _("Expected `=' or !IN in !DO loop."));
1870 macro_expand (const struct macro_tokens *mts,
1871 const struct macro_expander *me,
1872 struct macro_tokens *exp)
1874 if (me->nesting_countdown <= 0)
1876 macro_error (me->stack, NULL, _("Maximum nesting level %d exceeded. "
1877 "(Use SET MNEST to change the limit.)"),
1878 settings_get_mnest ());
1879 for (size_t i = 0; i < mts->n; i++)
1880 macro_tokens_add (exp, &mts->mts[i]);
1884 for (size_t i = 0; i < mts->n && (!me->break_ || !*me->break_); i++)
1886 const struct macro_token *mt = &mts->mts[i];
1887 const struct token *token = &mt->token;
1888 if (token->type == T_MACRO_ID && me->macro)
1890 const struct macro_param *param = macro_find_parameter_by_name (
1891 me->macro, token->string);
1894 const struct macro_tokens *arg
1895 = me->args[param - me->macro->params];
1896 if (*me->expand && param->expand_arg)
1898 struct stringi_map vars = STRINGI_MAP_INITIALIZER (vars);
1899 struct macro_expansion_stack stack = {
1900 .name = param->name,
1903 struct macro_expander subme = {
1904 .macros = me->macros,
1907 .segmenter_mode = me->segmenter_mode,
1908 .expand = me->expand,
1911 .nesting_countdown = me->nesting_countdown,
1914 macro_expand (arg, &subme, exp);
1915 stringi_map_destroy (&vars);
1918 for (size_t i = 0; i < arg->n; i++)
1919 macro_tokens_add (exp, &arg->mts[i]);
1923 if (is_bang_star (mts->mts + i, mts->n - i))
1925 for (size_t j = 0; j < me->macro->n_params; j++)
1927 const struct macro_param *param = &me->macro->params[j];
1928 if (!param->positional)
1931 const struct macro_tokens *arg = me->args[j];
1932 if (*me->expand && param->expand_arg)
1934 struct stringi_map vars = STRINGI_MAP_INITIALIZER (vars);
1935 struct macro_expansion_stack stack = {
1939 struct macro_expander subme = {
1940 .macros = me->macros,
1943 .segmenter_mode = me->segmenter_mode,
1944 .expand = me->expand,
1947 .nesting_countdown = me->nesting_countdown,
1950 macro_expand (arg, &subme, exp);
1951 stringi_map_destroy (&vars);
1954 for (size_t k = 0; k < arg->n; k++)
1955 macro_tokens_add (exp, &arg->mts[k]);
1961 size_t n = macro_expand_if (&mts->mts[i], mts->n - i, me, exp);
1969 if (token->type == T_MACRO_ID)
1971 const char *value = stringi_map_find__ (me->vars,
1972 token->string.string,
1973 token->string.length);
1976 macro_tokens_from_string__ (exp, ss_cstr (value),
1977 me->segmenter_mode, me->stack);
1984 struct macro_call *submc;
1985 int retval = macro_call_create (me->macros, token, &submc);
1986 for (size_t j = 1; !retval; j++)
1988 const struct macro_token endcmd
1989 = { .token = { .type = T_ENDCMD } };
1990 retval = macro_call_add (
1991 submc, i + j < mts->n ? &mts->mts[i + j] : &endcmd);
1996 struct stringi_map vars = STRINGI_MAP_INITIALIZER (vars);
1997 struct macro_expansion_stack stack = {
1998 .name = submc->macro->name,
1999 .file_name = submc->macro->file_name,
2000 .first_line = submc->macro->first_line,
2001 .last_line = submc->macro->last_line,
2004 struct macro_expander subme = {
2005 .macros = submc->macros,
2006 .macro = submc->macro,
2007 .args = submc->args,
2008 .segmenter_mode = me->segmenter_mode,
2009 .expand = me->expand,
2012 .nesting_countdown = me->nesting_countdown - 1,
2015 macro_expand (&submc->macro->body, &subme, exp);
2016 macro_call_destroy (submc);
2017 stringi_map_destroy (&vars);
2021 macro_call_destroy (submc);
2024 if (token->type != T_MACRO_ID)
2026 macro_tokens_add (exp, mt);
2030 if (ss_equals_case (token->string, ss_cstr ("!break")))
2033 macro_error (me->stack, mt, _("!BREAK outside !DO."));
2041 struct string function_output = DS_EMPTY_INITIALIZER;
2042 size_t function_consumed;
2043 if (expand_macro_function (me, &mts->mts[i], mts->n - i,
2044 &function_output, &function_consumed))
2046 i += function_consumed - 1;
2048 macro_tokens_from_string__ (exp, function_output.ss,
2049 me->segmenter_mode, me->stack);
2050 ds_destroy (&function_output);
2055 size_t n = macro_parse_let (&mts->mts[i], mts->n - i, me);
2062 n = macro_expand_do (&mts->mts[i], mts->n - i, me, exp);
2069 if (ss_equals_case (token->string, ss_cstr ("!onexpand")))
2071 else if (ss_equals_case (token->string, ss_cstr ("!offexpand")))
2072 *me->expand = false;
2074 macro_tokens_add (exp, mt);
2079 macro_call_expand (struct macro_call *mc, enum segmenter_mode segmenter_mode,
2080 struct macro_tokens *exp)
2082 assert (mc->state == MC_FINISHED);
2085 struct stringi_map vars = STRINGI_MAP_INITIALIZER (vars);
2086 struct macro_expansion_stack stack = {
2087 .name = mc->macro->name,
2088 .file_name = mc->macro->file_name,
2089 .first_line = mc->macro->first_line,
2090 .last_line = mc->macro->last_line,
2092 struct macro_expander me = {
2093 .macros = mc->macros,
2096 .segmenter_mode = segmenter_mode,
2100 .nesting_countdown = settings_get_mnest (),
2104 macro_expand (&mc->macro->body, &me, exp);
2106 stringi_map_destroy (&vars);