1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/lexer/macro.h"
25 #include "data/settings.h"
26 #include "language/lexer/lexer.h"
27 #include "language/lexer/segment.h"
28 #include "language/lexer/scan.h"
29 #include "libpspp/assertion.h"
30 #include "libpspp/cast.h"
31 #include "libpspp/i18n.h"
32 #include "libpspp/message.h"
33 #include "libpspp/str.h"
34 #include "libpspp/string-array.h"
35 #include "libpspp/stringi-map.h"
36 #include "libpspp/stringi-set.h"
38 #include "gl/c-ctype.h"
39 #include "gl/ftoastr.h"
42 #define _(msgid) gettext (msgid)
44 /* An entry in the stack of macros and macro directives being expanded. The
45 stack is maintained as a linked list. Entries are not dynamically allocated
46 but on the program stack. */
47 struct macro_expansion_stack
49 /* Points to an outer stack entry, or NULL if this is the outermost. */
50 const struct macro_expansion_stack *next;
52 /* A macro name or !IF, !DO, etc. */
55 /* Location of the macro definition, if available. */
56 const char *file_name;
61 /* Reports an error during macro expansion. STACK is the stack for reporting
62 the location of the error, MT is the optional token at which the error was
63 detected, and FORMAT along with the varargs is the message to report. */
64 static void PRINTF_FORMAT (3, 4)
65 macro_error (const struct macro_expansion_stack *stack,
66 const struct macro_token *mt,
67 const char *format, ...)
69 struct msg_stack **ms = NULL;
70 size_t allocated_ms = 0;
73 for (const struct macro_expansion_stack *p = stack; p; p = p->next)
75 if (n_ms >= allocated_ms)
76 ms = x2nrealloc (ms, &allocated_ms, sizeof *ms);
78 /* TRANSLATORS: These strings are used for explaining the context of an
79 error. The "While expanding" message appears first, followed by zero
80 or more of the "inside expansion" messages. `innermost',
81 `next_inner`, etc., are names of macros, and `foobar' is a piece of
84 foo.sps:12: At `foobar' in the expansion of 'innermost',
85 foo.sps:23: inside the expansion of 'next_inner',
86 foo.sps:34: inside the expansion of 'next_inner2',
87 foo.sps:45: inside the expansion of 'outermost',
88 foo.sps:76: This is the actual error message. */
92 if (mt && mt->syntax.length)
95 str_ellipsize (mt->syntax, syntax, sizeof syntax);
96 description = xasprintf (_("At `%s' in the expansion of `%s',"),
100 description = xasprintf (_("In the expansion of `%s',"), p->name);
103 description = xasprintf (_("inside the expansion of `%s',"), p->name);
105 ms[n_ms] = xmalloc (sizeof *ms[n_ms]);
106 *ms[n_ms] = (struct msg_stack) {
108 .file_name = xstrdup_if_nonnull (p->file_name),
109 .first_line = p->first_line,
110 .last_line = p->last_line,
112 .description = description,
118 va_start (args, format);
119 char *s = xvasprintf (format, args);
122 struct msg *m = xmalloc (sizeof *m);
124 .category = MSG_C_SYNTAX,
125 .severity = MSG_S_ERROR,
134 macro_token_copy (struct macro_token *dst, const struct macro_token *src)
136 token_copy (&dst->token, &src->token);
137 ss_alloc_substring (&dst->syntax, src->syntax);
141 macro_token_uninit (struct macro_token *mt)
143 token_uninit (&mt->token);
144 ss_dealloc (&mt->syntax);
148 macro_token_to_syntax (struct macro_token *mt, struct string *s)
150 ds_put_substring (s, mt->syntax);
153 is_macro_keyword (struct substring s)
155 static struct stringi_set keywords = STRINGI_SET_INITIALIZER (keywords);
156 if (stringi_set_is_empty (&keywords))
158 static const char *kws[] = {
179 for (size_t i = 0; i < sizeof kws / sizeof *kws; i++)
180 stringi_set_insert (&keywords, kws[i]);
183 ss_ltrim (&s, ss_cstr ("!"));
184 return stringi_set_contains_len (&keywords, s.string, s.length);
188 macro_tokens_copy (struct macro_tokens *dst, const struct macro_tokens *src)
190 *dst = (struct macro_tokens) {
191 .mts = xmalloc (src->n * sizeof *dst->mts),
195 for (size_t i = 0; i < src->n; i++)
196 macro_token_copy (&dst->mts[i], &src->mts[i]);
200 macro_tokens_uninit (struct macro_tokens *mts)
202 for (size_t i = 0; i < mts->n; i++)
203 macro_token_uninit (&mts->mts[i]);
208 macro_tokens_add_uninit (struct macro_tokens *mts)
210 if (mts->n >= mts->allocated)
211 mts->mts = x2nrealloc (mts->mts, &mts->allocated, sizeof *mts->mts);
212 return &mts->mts[mts->n++];
216 macro_tokens_add (struct macro_tokens *mts, const struct macro_token *mt)
218 macro_token_copy (macro_tokens_add_uninit (mts), mt);
221 /* Tokenizes SRC according to MODE and appends the tokens to MTS. Uses STACK,
222 if nonull, for error reporting. */
224 macro_tokens_from_string__ (struct macro_tokens *mts, const struct substring src,
225 enum segmenter_mode mode,
226 const struct macro_expansion_stack *stack)
230 struct segmenter segmenter;
231 struct substring body;
234 struct state state = {
235 .segmenter = segmenter_init (mode, true),
238 struct state saved = state;
240 while (state.body.length > 0)
242 struct macro_token mt = {
243 .token = { .type = T_STOP },
244 .syntax = { .string = state.body.string },
246 struct token *token = &mt.token;
248 struct scanner scanner;
249 scanner_init (&scanner, token);
253 enum segment_type type;
254 int seg_len = segmenter_push (&state.segmenter, state.body.string,
255 state.body.length, true, &type);
256 assert (seg_len >= 0);
258 struct substring segment = ss_head (state.body, seg_len);
259 ss_advance (&state.body, seg_len);
261 enum scan_result result = scanner_push (&scanner, type, segment, token);
262 if (result == SCAN_SAVE)
264 else if (result == SCAN_BACK)
269 else if (result == SCAN_DONE)
273 /* We have a token in 'token'. */
274 mt.syntax.length = state.body.string - mt.syntax.string;
275 if (is_scan_type (token->type))
277 if (token->type != SCAN_SKIP)
279 char *s = scan_token_to_error (token);
282 mt.token.type = T_STRING;
283 macro_error (stack, &mt, "%s", s);
291 macro_tokens_add (mts, &mt);
292 token_uninit (token);
296 /* Tokenizes SRC according to MODE and appends the tokens to MTS. */
298 macro_tokens_from_string (struct macro_tokens *mts, const struct substring src,
299 enum segmenter_mode mode)
301 macro_tokens_from_string__ (mts, src, mode, NULL);
305 macro_tokens_print (const struct macro_tokens *mts, FILE *stream)
307 for (size_t i = 0; i < mts->n; i++)
308 token_print (&mts->mts[i].token, stream);
313 TC_ENDCMD, /* No space before or after (new-line after). */
314 TC_BINOP, /* Space on both sides. */
315 TC_COMMA, /* Space afterward. */
316 TC_ID, /* Don't need spaces except sequentially. */
317 TC_PUNCT, /* Don't need spaces except sequentially. */
321 needs_space (enum token_class prev, enum token_class next)
323 /* Don't need a space before or after the end of a command.
324 (A new-line is needed afterward as a special case.) */
325 if (prev == TC_ENDCMD || next == TC_ENDCMD)
328 /* Binary operators always have a space on both sides. */
329 if (prev == TC_BINOP || next == TC_BINOP)
332 /* A comma always has a space afterward. */
333 if (prev == TC_COMMA)
336 /* Otherwise, PREV is TC_ID or TC_PUNCT, which only need a space if there are
337 two or them in a row. */
341 static enum token_class
342 classify_token (enum token_type type)
394 /* Appends syntax for the tokens in MTS to S. If OFS and LEN are nonnull, sets
395 OFS[i] to the offset within S of the start of token 'i' in MTS and LEN[i] to
396 its length. OFS[i] + LEN[i] is not necessarily OFS[i + 1] because some
397 tokens are separated by white space. */
399 macro_tokens_to_syntax (struct macro_tokens *mts, struct string *s,
400 size_t *ofs, size_t *len)
402 assert ((ofs != NULL) == (len != NULL));
407 for (size_t i = 0; i < mts->n; i++)
411 enum token_type prev = mts->mts[i - 1].token.type;
412 enum token_type next = mts->mts[i].token.type;
414 if (prev == T_ENDCMD)
415 ds_put_byte (s, '\n');
418 enum token_class pc = classify_token (prev);
419 enum token_class nc = classify_token (next);
420 if (needs_space (pc, nc))
421 ds_put_byte (s, ' ');
426 ofs[i] = s->ss.length;
427 macro_token_to_syntax (&mts->mts[i], s);
429 len[i] = s->ss.length - ofs[i];
434 macro_destroy (struct macro *m)
441 for (size_t i = 0; i < m->n_params; i++)
443 struct macro_param *p = &m->params[i];
446 macro_tokens_uninit (&p->def);
454 token_uninit (&p->charend);
458 token_uninit (&p->enclose[0]);
459 token_uninit (&p->enclose[1]);
467 macro_tokens_uninit (&m->body);
472 macro_set_create (void)
474 struct macro_set *set = xmalloc (sizeof *set);
475 *set = (struct macro_set) {
476 .macros = HMAP_INITIALIZER (set->macros),
482 macro_set_destroy (struct macro_set *set)
487 struct macro *macro, *next;
488 HMAP_FOR_EACH_SAFE (macro, next, struct macro, hmap_node, &set->macros)
490 hmap_delete (&set->macros, ¯o->hmap_node);
491 macro_destroy (macro);
493 hmap_destroy (&set->macros);
498 hash_macro_name (const char *name)
500 return utf8_hash_case_string (name, 0);
503 static struct macro *
504 macro_set_find__ (struct macro_set *set, const char *name)
506 if (macro_set_is_empty (set))
510 HMAP_FOR_EACH_WITH_HASH (macro, struct macro, hmap_node,
511 hash_macro_name (name), &set->macros)
512 if (!utf8_strcasecmp (macro->name, name))
519 macro_set_find (const struct macro_set *set, const char *name)
521 return macro_set_find__ (CONST_CAST (struct macro_set *, set), name);
524 /* Adds M to SET. M replaces any existing macro with the same name. Takes
527 macro_set_add (struct macro_set *set, struct macro *m)
529 struct macro *victim = macro_set_find__ (set, m->name);
532 hmap_delete (&set->macros, &victim->hmap_node);
533 macro_destroy (victim);
536 hmap_insert (&set->macros, &m->hmap_node, hash_macro_name (m->name));
539 /* Macro call parsing.. */
546 /* Accumulating tokens in mc->params toward the end of any type of
550 /* Expecting the opening delimiter of an ARG_ENCLOSE argument. */
553 /* Expecting a keyword for a keyword argument. */
556 /* Expecting an equal sign for a keyword argument. */
559 /* Macro fully parsed and ready for expansion. */
563 /* Parsing macro calls. This is a FSM driven by macro_call_create() and
564 macro_call_add() to identify the macro being called and obtain its
565 arguments. 'state' identifies the FSM state. */
568 const struct macro_set *macros;
569 const struct macro *macro;
570 struct macro_tokens **args;
574 const struct macro_param *param; /* Parameter currently being parsed. */
577 /* Completes macro expansion by initializing arguments that weren't supplied to
580 mc_finished (struct macro_call *mc)
582 mc->state = MC_FINISHED;
583 for (size_t i = 0; i < mc->macro->n_params; i++)
585 mc->args[i] = &mc->macro->params[i].def;
590 mc_next_arg (struct macro_call *mc)
594 assert (!mc->macro->n_params);
595 return mc_finished (mc);
597 else if (mc->param->positional)
600 if (mc->param >= &mc->macro->params[mc->macro->n_params])
601 return mc_finished (mc);
604 mc->state = (!mc->param->positional ? MC_KEYWORD
605 : mc->param->arg_type == ARG_ENCLOSE ? MC_ENCLOSE
612 for (size_t i = 0; i < mc->macro->n_params; i++)
615 mc->state = MC_KEYWORD;
618 return mc_finished (mc);
623 mc_error (struct macro_call *mc)
625 mc->state = MC_ERROR;
630 mc_add_arg (struct macro_call *mc, const struct macro_token *mt)
632 const struct macro_param *p = mc->param;
634 const struct token *token = &mt->token;
635 if ((token->type == T_ENDCMD || token->type == T_STOP)
636 && p->arg_type != ARG_CMDEND)
638 msg (SE, _("Unexpected end of command reading argument %s "
639 "to macro %s."), mc->param->name, mc->macro->name);
641 return mc_error (mc);
646 struct macro_tokens **argp = &mc->args[p - mc->macro->params];
648 *argp = xzalloc (sizeof **argp);
649 struct macro_tokens *arg = *argp;
650 if (p->arg_type == ARG_N_TOKENS)
652 macro_tokens_add (arg, mt);
653 if (arg->n >= p->n_tokens)
654 return mc_next_arg (mc);
657 else if (p->arg_type == ARG_CMDEND)
659 if (token->type == T_ENDCMD || token->type == T_STOP)
660 return mc_next_arg (mc);
661 macro_tokens_add (arg, mt);
666 const struct token *end
667 = p->arg_type == ARG_CHAREND ? &p->charend : &p->enclose[1];
668 if (token_equal (token, end))
669 return mc_next_arg (mc);
670 macro_tokens_add (arg, mt);
676 mc_expected (struct macro_call *mc, const struct macro_token *actual,
677 const struct token *expected)
679 const struct substring actual_s = (actual->syntax.length ? actual->syntax
680 : ss_cstr (_("<end of input>")));
681 char *expected_s = token_to_string (expected);
682 msg (SE, _("Found `%.*s' while expecting `%s' reading argument %s "
684 (int) actual_s.length, actual_s.string, expected_s,
685 mc->param->name, mc->macro->name);
688 return mc_error (mc);
692 mc_enclose (struct macro_call *mc, const struct macro_token *mt)
694 const struct token *token = &mt->token;
697 if (token_equal (&mc->param->enclose[0], token))
703 return mc_expected (mc, mt, &mc->param->enclose[0]);
706 static const struct macro_param *
707 macro_find_parameter_by_name (const struct macro *m, struct substring name)
712 ss_ltrim (&name, ss_cstr ("!"));
714 for (size_t i = 0; i < m->n_params; i++)
716 const struct macro_param *p = &m->params[i];
717 struct substring p_name = ss_cstr (p->name + 1);
718 if (!utf8_strncasecmp (p_name.string, p_name.length,
719 name.string, name.length))
726 mc_keyword (struct macro_call *mc, const struct macro_token *mt)
728 const struct token *token = &mt->token;
729 if (token->type != T_ID)
730 return mc_finished (mc);
732 const struct macro_param *p = macro_find_parameter_by_name (mc->macro,
736 size_t arg_index = p - mc->macro->params;
738 if (mc->args[arg_index])
741 _("Argument %s multiply specified in call to macro %s."),
742 p->name, mc->macro->name);
743 return mc_error (mc);
747 mc->state = MC_EQUALS;
751 return mc_finished (mc);
755 mc_equals (struct macro_call *mc, const struct macro_token *mt)
757 const struct token *token = &mt->token;
760 if (token->type == T_EQUALS)
766 return mc_expected (mc, mt, &(struct token) { .type = T_EQUALS });
769 /* If TOKEN is the first token of a call to a macro in MACROS, create a new
770 macro expander, initializes *MCP to it. Returns 0 if more tokens are needed
771 and should be added via macro_call_add() or 1 if the caller should next call
772 macro_call_get_expansion().
774 If TOKEN is not the first token of a macro call, returns -1 and sets *MCP to
777 macro_call_create (const struct macro_set *macros,
778 const struct token *token,
779 struct macro_call **mcp)
781 const struct macro *macro = (token->type == T_ID || token->type == T_MACRO_ID
782 ? macro_set_find (macros, token->string.string)
790 struct macro_call *mc = xmalloc (sizeof *mc);
791 *mc = (struct macro_call) {
795 .state = (!macro->n_params ? MC_FINISHED
796 : !macro->params[0].positional ? MC_KEYWORD
797 : macro->params[0].arg_type == ARG_ENCLOSE ? MC_ENCLOSE
799 .args = macro->n_params ? xcalloc (macro->n_params, sizeof *mc->args) : NULL,
800 .param = macro->params,
804 return mc->state == MC_FINISHED ? 1 : 0;
808 macro_call_destroy (struct macro_call *mc)
813 for (size_t i = 0; i < mc->macro->n_params; i++)
815 struct macro_tokens *a = mc->args[i];
816 if (a && a != &mc->macro->params[i].def)
818 macro_tokens_uninit (a);
826 /* Adds TOKEN to the collection of tokens in MC that potentially need to be
829 Returns -1 if the tokens added do not actually invoke a macro. The caller
830 should consume the first token without expanding it. (Later tokens might
831 invoke a macro so it's best to feed the second token into a new expander.)
833 Returns 0 if the macro expander needs more tokens, for macro arguments or to
834 decide whether this is actually a macro invocation. The caller should call
835 macro_call_add() again with the next token.
837 Returns a positive number to indicate that the returned number of tokens
838 invoke a macro. The number returned might be less than the number of tokens
839 added because it can take a few tokens of lookahead to determine whether the
840 macro invocation is finished. The caller should call
841 macro_call_get_expansion() to obtain the expansion. */
843 macro_call_add (struct macro_call *mc, const struct macro_token *mt)
851 return mc_add_arg (mc, mt);
854 return mc_enclose (mc, mt);
857 return mc_keyword (mc, mt);
860 return mc_equals (mc, mt);
867 /* Macro expansion. */
869 struct macro_expander
871 /* Always available. */
872 const struct macro_set *macros; /* Macros to expand recursively. */
873 enum segmenter_mode segmenter_mode; /* Mode for tokenization. */
874 int nesting_countdown; /* Remaining nesting levels. */
875 const struct macro_expansion_stack *stack; /* Stack for error reporting. */
876 bool *expand; /* May macro calls be expanded? */
877 struct stringi_map *vars; /* Variables from !DO and !LET. */
879 /* Only nonnull if inside a !DO loop. */
880 bool *break_; /* Set to true to break out of loop. */
882 /* Only nonnull if expanding a macro (and not, say, a macro argument). */
883 const struct macro *macro;
884 struct macro_tokens **args;
888 macro_expand (const struct macro_token *mts, size_t n_mts,
889 const struct macro_expander *, struct macro_tokens *);
892 expand_macro_function (const struct macro_expander *me,
893 const struct macro_token *input, size_t n_input,
894 struct string *output);
896 /* Returns true if the N tokens within MTS start with !*, false otherwise. */
898 is_bang_star (const struct macro_token *mts, size_t n)
901 && mts[0].token.type == T_MACRO_ID
902 && ss_equals (mts[0].token.string, ss_cstr ("!"))
903 && mts[1].token.type == T_ASTERISK);
906 /* Parses one function argument from the N_INPUT tokens in INPUT
907 Each argument to a macro function is one of:
909 - A quoted string or other single literal token.
911 - An argument to the macro being expanded, e.g. !1 or a named argument.
915 - A function invocation.
917 Each function invocation yields a character sequence to be turned into a
918 sequence of tokens. The case where that character sequence is a single
919 quoted string is an important special case.
922 parse_function_arg (const struct macro_expander *me,
923 const struct macro_token *input, size_t n_input,
926 assert (n_input > 0);
928 const struct token *token = &input[0].token;
929 if (token->type == T_MACRO_ID && me->macro)
931 const struct macro_param *param = macro_find_parameter_by_name (
932 me->macro, token->string);
935 size_t param_idx = param - me->macro->params;
936 macro_tokens_to_syntax (me->args[param_idx], farg, NULL, NULL);
940 if (is_bang_star (input, n_input))
942 for (size_t i = 0; i < me->macro->n_params; i++)
944 if (!me->macro->params[i].positional)
947 ds_put_byte (farg, ' ');
948 macro_tokens_to_syntax (me->args[i], farg, NULL, NULL);
953 const char *var = stringi_map_find__ (me->vars,
954 token->string.string,
955 token->string.length);
958 ds_put_cstr (farg, var);
962 size_t n_function = expand_macro_function (me, input, n_input, farg);
967 ds_put_substring (farg, input[0].syntax);
972 parse_function_args (const struct macro_expander *me,
973 const struct macro_token *mts, size_t n,
974 const char *function,
975 struct string_array *args)
977 if (n < 2 || mts[1].token.type != T_LPAREN)
979 macro_error (me->stack, n > 1 ? &mts[1] : NULL,
980 _("`(' expected following %s."), function);
984 for (size_t i = 2; i < n; )
986 if (mts[i].token.type == T_RPAREN)
989 struct string s = DS_EMPTY_INITIALIZER;
990 i += parse_function_arg (me, mts + i, n - i, &s);
991 string_array_append_nocopy (args, ds_steal_cstr (&s));
995 else if (mts[i].token.type == T_COMMA)
997 else if (mts[i].token.type != T_RPAREN)
999 macro_error (me->stack, &mts[i],
1000 _("`,' or `)' expected in call to macro function %s."),
1006 macro_error (me->stack, NULL, _("Missing `)' in call to macro function %s."),
1012 unquote_string (const char *s, enum segmenter_mode segmenter_mode,
1013 struct string *content)
1015 struct string_lexer slex;
1016 string_lexer_init (&slex, s, strlen (s), segmenter_mode, true);
1018 struct token token1;
1019 if (!string_lexer_next (&slex, &token1))
1022 if (token1.type != T_STRING)
1024 token_uninit (&token1);
1028 struct token token2;
1029 if (string_lexer_next (&slex, &token2))
1031 token_uninit (&token1);
1032 token_uninit (&token2);
1036 ds_put_substring (content, token1.string);
1037 token_uninit (&token1);
1042 unquote_string_in_place (const char *s, enum segmenter_mode segmenter_mode,
1045 ds_init_empty (tmp);
1046 return unquote_string (s, segmenter_mode, tmp) ? ds_cstr (tmp) : s;
1050 parse_integer (const char *s, int *np)
1055 long int n = strtol (s, &tail, 10);
1056 *np = n < INT_MIN ? INT_MIN : n > INT_MAX ? INT_MAX : n;
1057 tail += strspn (tail, CC_SPACES);
1058 return *tail == '\0' && errno != ERANGE && n == *np;
1062 expand_macro_function (const struct macro_expander *me,
1063 const struct macro_token *input, size_t n_input,
1064 struct string *output)
1066 if (!n_input || input[0].token.type != T_MACRO_ID)
1069 struct macro_function
1075 enum macro_function_id
1090 static const struct macro_function mfs[] = {
1091 [MF_BLANKS] = { "!BLANKS", 1, 1 },
1092 [MF_CONCAT] = { "!CONCAT", 1, INT_MAX },
1093 [MF_EVAL] = { "!EVAL", 1, 1 },
1094 [MF_HEAD] = { "!HEAD", 1, 1 },
1095 [MF_INDEX] = { "!INDEX", 2, 2 },
1096 [MF_LENGTH] = { "!LENGTH", 1, 1 },
1097 [MF_NULL] = { "!NULL", 0, 0 },
1098 [MF_QUOTE] = { "!QUOTE", 1, 1 },
1099 [MF_SUBSTR] = { "!SUBSTR", 2, 3 },
1100 [MF_TAIL] = { "!TAIL", 1, 1 },
1101 [MF_UNQUOTE] = { "!UNQUOTE", 1, 1 },
1102 [MF_UPCASE] = { "!UPCASE", 1, 1 },
1105 /* Is this a macro function? */
1106 const struct macro_function *mf;
1107 for (mf = mfs; ; mf++)
1109 if (mf >= mfs + sizeof mfs / sizeof *mfs)
1111 /* Not a macro function. */
1115 if (lex_id_match_n (ss_cstr (mf->name), input[0].token.string, 4))
1119 enum macro_function_id id = mf - mfs;
1123 struct string_array args = STRING_ARRAY_INITIALIZER;
1124 size_t n_consumed = parse_function_args (me, input, n_input, mf->name, &args);
1128 if (args.n < mf->min_args || args.n > mf->max_args)
1130 if (mf->min_args == 1 && mf->max_args == 1)
1131 macro_error (me->stack, NULL,
1132 _("Macro function %s takes one argument (not %zu)."),
1134 else if (mf->min_args == 2 && mf->max_args == 2)
1135 macro_error (me->stack, NULL,
1136 _("Macro function %s takes two arguments (not %zu)."),
1138 else if (mf->min_args == 2 && mf->max_args == 3)
1139 macro_error (me->stack, NULL,
1140 _("Macro function %s takes two or three arguments "
1143 else if (mf->min_args == 1 && mf->max_args == INT_MAX)
1144 macro_error (me->stack, NULL,
1145 _("Macro function %s needs at least one argument."),
1155 ds_put_format (output, "%zu", strlen (args.strings[0]));
1161 if (!parse_integer (args.strings[0], &n))
1163 macro_error (me->stack, NULL,
1164 _("Argument to !BLANKS must be non-negative integer "
1165 "(not \"%s\")."), args.strings[0]);
1166 string_array_destroy (&args);
1170 ds_put_byte_multiple (output, ' ', n);
1175 for (size_t i = 0; i < args.n; i++)
1176 if (!unquote_string (args.strings[i], me->segmenter_mode, output))
1177 ds_put_cstr (output, args.strings[i]);
1183 const char *s = unquote_string_in_place (args.strings[0],
1184 me->segmenter_mode, &tmp);
1186 struct macro_tokens mts = { .n = 0 };
1187 macro_tokens_from_string__ (&mts, ss_cstr (s), me->segmenter_mode,
1190 ds_put_substring (output, mts.mts[0].syntax);
1191 macro_tokens_uninit (&mts);
1198 const char *haystack = args.strings[0];
1199 const char *needle = strstr (haystack, args.strings[1]);
1200 ds_put_format (output, "%zu", needle ? needle - haystack + 1 : 0);
1205 if (unquote_string (args.strings[0], me->segmenter_mode, NULL))
1206 ds_put_cstr (output, args.strings[0]);
1209 ds_extend (output, strlen (args.strings[0]) + 2);
1210 ds_put_byte (output, '\'');
1211 for (const char *p = args.strings[0]; *p; p++)
1214 ds_put_byte (output, '\'');
1215 ds_put_byte (output, *p);
1217 ds_put_byte (output, '\'');
1224 if (!parse_integer (args.strings[1], &start) || start < 1)
1226 macro_error (me->stack, NULL,
1227 _("Second argument of !SUBSTR must be "
1228 "positive integer (not \"%s\")."),
1230 string_array_destroy (&args);
1234 int count = INT_MAX;
1235 if (args.n > 2 && (!parse_integer (args.strings[2], &count) || count < 0))
1237 macro_error (me->stack, NULL,
1238 _("Third argument of !SUBSTR must be "
1239 "non-negative integer (not \"%s\")."),
1241 string_array_destroy (&args);
1245 struct substring s = ss_cstr (args.strings[0]);
1246 ds_put_substring (output, ss_substr (s, start - 1, count));
1253 const char *s = unquote_string_in_place (args.strings[0],
1254 me->segmenter_mode, &tmp);
1256 struct macro_tokens mts = { .n = 0 };
1257 macro_tokens_from_string__ (&mts, ss_cstr (s), me->segmenter_mode,
1261 struct macro_tokens tail = { .mts = mts.mts + 1, .n = mts.n - 1 };
1262 macro_tokens_to_syntax (&tail, output, NULL, NULL);
1264 macro_tokens_uninit (&mts);
1270 if (!unquote_string (args.strings[0], me->segmenter_mode, output))
1271 ds_put_cstr (output, args.strings[0]);
1277 const char *s = unquote_string_in_place (args.strings[0],
1278 me->segmenter_mode, &tmp);
1279 char *upper = utf8_to_upper (s);
1280 ds_put_cstr (output, upper);
1288 struct macro_tokens mts = { .n = 0 };
1289 macro_tokens_from_string__ (&mts, ss_cstr (args.strings[0]),
1290 me->segmenter_mode, me->stack);
1291 struct macro_tokens exp = { .n = 0 };
1292 struct macro_expansion_stack stack = {
1296 struct macro_expander subme = *me;
1297 subme.break_ = NULL;
1298 subme.stack = &stack;
1300 macro_expand (mts.mts, mts.n, &subme, &exp);
1301 macro_tokens_to_syntax (&exp, output, NULL, NULL);
1302 macro_tokens_uninit (&exp);
1303 macro_tokens_uninit (&mts);
1311 string_array_destroy (&args);
1315 static char *macro_evaluate_or (const struct macro_expander *me,
1316 const struct macro_token **tokens,
1317 const struct macro_token *end);
1320 macro_evaluate_literal (const struct macro_expander *me,
1321 const struct macro_token **tokens,
1322 const struct macro_token *end)
1324 const struct macro_token *p = *tokens;
1327 if (p->token.type == T_LPAREN)
1330 char *value = macro_evaluate_or (me, &p, end);
1333 if (p >= end || p->token.type != T_RPAREN)
1336 macro_error (me->stack, p < end ? p : NULL,
1337 _("Expecting ')' in macro expression."));
1344 else if (p->token.type == T_RPAREN)
1346 macro_error (me->stack, p, _("Expecting literal or function invocation "
1347 "in macro expression."));
1351 struct string function_output = DS_EMPTY_INITIALIZER;
1352 size_t function_consumed = parse_function_arg (me, p, end - p,
1354 struct string unquoted = DS_EMPTY_INITIALIZER;
1355 if (unquote_string (ds_cstr (&function_output), me->segmenter_mode,
1358 ds_swap (&function_output, &unquoted);
1359 ds_destroy (&unquoted);
1361 *tokens = p + function_consumed;
1362 return ds_steal_cstr (&function_output);
1365 /* Returns true if MT is valid as a macro operator. Only operators written as
1366 symbols (e.g. <>) are usable in macro expressions, not operator written as
1367 letters (e.g. EQ). */
1369 is_macro_operator (const struct macro_token *mt)
1371 return mt->syntax.length > 0 && !c_isalpha (mt->syntax.string[0]);
1374 static enum token_type
1375 parse_relational_op (const struct macro_token *mt)
1377 switch (mt->token.type)
1387 return is_macro_operator (mt) ? mt->token.type : T_STOP;
1390 return (ss_equals_case (mt->token.string, ss_cstr ("!EQ")) ? T_EQ
1391 : ss_equals_case (mt->token.string, ss_cstr ("!NE")) ? T_NE
1392 : ss_equals_case (mt->token.string, ss_cstr ("!LT")) ? T_LT
1393 : ss_equals_case (mt->token.string, ss_cstr ("!GT")) ? T_GT
1394 : ss_equals_case (mt->token.string, ss_cstr ("!LE")) ? T_LE
1395 : ss_equals_case (mt->token.string, ss_cstr ("!GE")) ? T_GE
1404 macro_evaluate_relational (const struct macro_expander *me,
1405 const struct macro_token **tokens,
1406 const struct macro_token *end)
1408 const struct macro_token *p = *tokens;
1409 char *lhs = macro_evaluate_literal (me, &p, end);
1413 enum token_type op = p >= end ? T_STOP : parse_relational_op (p);
1421 char *rhs = macro_evaluate_literal (me, &p, end);
1428 struct string lhs_tmp, rhs_tmp;
1429 int cmp = strcmp (unquote_string_in_place (lhs, me->segmenter_mode,
1431 unquote_string_in_place (rhs, me->segmenter_mode,
1433 ds_destroy (&lhs_tmp);
1434 ds_destroy (&rhs_tmp);
1439 bool b = (op == T_EQUALS || op == T_EQ ? !cmp
1441 : op == T_LT ? cmp < 0
1442 : op == T_GT ? cmp > 0
1443 : op == T_LE ? cmp <= 0
1444 : /* T_GE */ cmp >= 0);
1447 return xstrdup (b ? "1" : "0");
1451 macro_evaluate_not (const struct macro_expander *me,
1452 const struct macro_token **tokens,
1453 const struct macro_token *end)
1455 const struct macro_token *p = *tokens;
1457 unsigned int negations = 0;
1459 && (ss_equals_case (p->syntax, ss_cstr ("!NOT"))
1460 || ss_equals (p->syntax, ss_cstr ("~"))))
1466 char *operand = macro_evaluate_relational (me, &p, end);
1467 if (!operand || !negations)
1473 bool b = strcmp (operand, "0") ^ (negations & 1);
1476 return xstrdup (b ? "1" : "0");
1480 macro_evaluate_and (const struct macro_expander *me,
1481 const struct macro_token **tokens,
1482 const struct macro_token *end)
1484 const struct macro_token *p = *tokens;
1485 char *lhs = macro_evaluate_not (me, &p, end);
1490 && (ss_equals_case (p->syntax, ss_cstr ("!AND"))
1491 || ss_equals (p->syntax, ss_cstr ("&"))))
1494 char *rhs = macro_evaluate_not (me, &p, end);
1501 bool b = strcmp (lhs, "0") && strcmp (rhs, "0");
1504 lhs = xstrdup (b ? "1" : "0");
1511 macro_evaluate_or (const struct macro_expander *me,
1512 const struct macro_token **tokens,
1513 const struct macro_token *end)
1515 const struct macro_token *p = *tokens;
1516 char *lhs = macro_evaluate_and (me, &p, end);
1521 && (ss_equals_case (p->syntax, ss_cstr ("!OR"))
1522 || ss_equals (p->syntax, ss_cstr ("|"))))
1525 char *rhs = macro_evaluate_and (me, &p, end);
1532 bool b = strcmp (lhs, "0") || strcmp (rhs, "0");
1535 lhs = xstrdup (b ? "1" : "0");
1542 macro_evaluate_expression (const struct macro_token **tokens, size_t n_tokens,
1543 const struct macro_expander *me)
1545 return macro_evaluate_or (me, tokens, *tokens + n_tokens);
1549 macro_evaluate_number (const struct macro_token **tokens, size_t n_tokens,
1550 const struct macro_expander *me,
1553 char *s = macro_evaluate_expression (tokens, n_tokens, me);
1557 struct macro_tokens mts = { .n = 0 };
1558 macro_tokens_from_string__ (&mts, ss_cstr (s), me->segmenter_mode, me->stack);
1559 if (mts.n != 1 || !token_is_number (&mts.mts[0].token))
1561 macro_error (me->stack, mts.n > 0 ? &mts.mts[0] : NULL,
1562 _("Macro expression must evaluate to "
1563 "a number (not \"%s\")."), s);
1565 macro_tokens_uninit (&mts);
1569 *number = token_number (&mts.mts[0].token);
1571 macro_tokens_uninit (&mts);
1575 static const struct macro_token *
1576 find_ifend_clause (const struct macro_token *p, const struct macro_token *end)
1579 for (; p < end; p++)
1581 if (p->token.type != T_MACRO_ID)
1584 if (ss_equals_case (p->token.string, ss_cstr ("!IF")))
1586 else if (lex_id_match_n (p->token.string, ss_cstr ("!IFEND"), 4))
1592 else if (lex_id_match_n (p->token.string, ss_cstr ("!ELSE"), 4)
1600 macro_expand_if (const struct macro_token *tokens, size_t n_tokens,
1601 const struct macro_expander *me,
1602 struct macro_tokens *exp)
1604 const struct macro_token *p = tokens;
1605 const struct macro_token *end = tokens + n_tokens;
1607 if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!IF")))
1611 char *result = macro_evaluate_expression (&p, end - p, me);
1614 bool b = strcmp (result, "0");
1618 || p->token.type != T_MACRO_ID
1619 || !lex_id_match_n (p->token.string, ss_cstr ("!THEN"), 4))
1621 macro_error (me->stack, p < end ? p : NULL,
1622 _("!THEN expected in macro !IF construct."));
1626 const struct macro_token *start_then = p + 1;
1627 const struct macro_token *end_then = find_ifend_clause (start_then, end);
1630 macro_error (me->stack, NULL,
1631 _("!ELSE or !IFEND expected in macro !IF construct."));
1635 const struct macro_token *start_else, *end_if;
1636 if (lex_id_match_n (end_then->token.string, ss_cstr ("!ELSE"), 4))
1638 start_else = end_then + 1;
1639 end_if = find_ifend_clause (start_else, end);
1641 || !lex_id_match_n (end_if->token.string, ss_cstr ("!IFEND"), 4))
1643 macro_error (me->stack, end_if ? end_if : NULL,
1644 _("!IFEND expected in macro !IF construct."));
1654 const struct macro_token *start;
1659 n = end_then - start_then;
1661 else if (start_else)
1664 n = end_if - start_else;
1674 struct macro_expansion_stack stack = {
1678 struct macro_expander subme = *me;
1679 subme.stack = &stack;
1680 macro_expand (start, n, &subme, exp);
1682 return (end_if + 1) - tokens;
1686 macro_parse_let (const struct macro_token *tokens, size_t n_tokens,
1687 const struct macro_expander *me)
1689 const struct macro_token *p = tokens;
1690 const struct macro_token *end = tokens + n_tokens;
1692 if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!LET")))
1696 if (p >= end || p->token.type != T_MACRO_ID)
1698 macro_error (me->stack, p < end ? p : NULL,
1699 _("Expected macro variable name following !LET."));
1702 const struct substring var_name = p->token.string;
1703 if (is_macro_keyword (var_name)
1704 || macro_find_parameter_by_name (me->macro, var_name))
1706 macro_error (me->stack, p < end ? p : NULL,
1707 _("Cannot use argument name or macro keyword "
1708 "\"%.*s\" as !LET variable."),
1709 (int) var_name.length, var_name.string);
1714 if (p >= end || p->token.type != T_EQUALS)
1716 macro_error (me->stack, p < end ? p : NULL,
1717 _("Expected `=' following !LET."));
1722 char *value = macro_evaluate_expression (&p, end - p, me);
1726 stringi_map_replace_nocopy (me->vars, ss_xstrdup (var_name), value);
1730 static const struct macro_token *
1731 find_doend (const struct macro_expansion_stack *stack,
1732 const struct macro_token *p, const struct macro_token *end)
1735 for (; p < end; p++)
1737 if (p->token.type != T_MACRO_ID)
1740 if (ss_equals_case (p->token.string, ss_cstr ("!DO")))
1742 else if (lex_id_match_n (p->token.string, ss_cstr ("!DOEND"), 4))
1749 macro_error (stack, NULL, _("Missing !DOEND."));
1754 macro_expand_do (const struct macro_token *tokens, size_t n_tokens,
1755 const struct macro_expander *me,
1756 struct macro_tokens *exp)
1758 const struct macro_token *p = tokens;
1759 const struct macro_token *end = tokens + n_tokens;
1761 if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!DO")))
1765 if (p >= end || p->token.type != T_MACRO_ID)
1767 macro_error (me->stack, p < end ? p : NULL,
1768 _("Expected macro variable name following !DO."));
1771 const struct substring var_name = p->token.string;
1772 if (is_macro_keyword (var_name)
1773 || macro_find_parameter_by_name (me->macro, var_name))
1775 macro_error (me->stack, p, _("Cannot use argument name or macro "
1776 "keyword as !DO variable."));
1781 struct macro_expansion_stack substack = {
1785 bool break_ = false;
1786 struct macro_expander subme = *me;
1787 subme.break_ = &break_;
1788 subme.stack = &substack;
1790 int miterate = settings_get_miterate ();
1791 if (p < end && p->token.type == T_MACRO_ID
1792 && ss_equals_case (p->token.string, ss_cstr ("!IN")))
1795 char *list = macro_evaluate_expression (&p, end - p, &subme);
1799 struct macro_tokens items = { .n = 0 };
1800 macro_tokens_from_string__ (&items, ss_cstr (list), me->segmenter_mode,
1804 const struct macro_token *do_end = find_doend (subme.stack, p, end);
1807 macro_tokens_uninit (&items);
1811 for (size_t i = 0; i < items.n && !break_; i++)
1815 macro_error (&substack, NULL,
1816 _("!DO loop over list exceeded "
1817 "maximum number of iterations %d. "
1818 "(Use SET MITERATE to change the limit.)"),
1822 stringi_map_replace_nocopy (me->vars, ss_xstrdup (var_name),
1823 ss_xstrdup (items.mts[i].syntax));
1825 macro_expand (p, do_end - p, &subme, exp);
1827 return do_end - tokens + 1;
1829 else if (p < end && p->token.type == T_EQUALS)
1833 if (!macro_evaluate_number (&p, end - p, &subme, &first))
1836 if (p >= end || p->token.type != T_MACRO_ID
1837 || !ss_equals_case (p->token.string, ss_cstr ("!TO")))
1839 macro_error (subme.stack, p < end ? p : NULL,
1840 _("Expected !TO in numerical !DO loop."));
1846 if (!macro_evaluate_number (&p, end - p, &subme, &last))
1850 if (p < end && p->token.type == T_MACRO_ID
1851 && ss_equals_case (p->token.string, ss_cstr ("!BY")))
1854 if (!macro_evaluate_number (&p, end - p, &subme, &by))
1859 macro_error (subme.stack, NULL, _("!BY value cannot be zero."));
1864 const struct macro_token *do_end = find_doend (subme.stack, p, end);
1867 if ((by > 0 && first <= last) || (by < 0 && first >= last))
1870 for (double index = first;
1871 by > 0 ? (index <= last) : (index >= last) && !break_;
1876 macro_error (subme.stack, NULL,
1877 _("Numerical !DO loop exceeded "
1878 "maximum number of iterations %d. "
1879 "(Use SET MITERATE to change the limit.)"),
1884 char index_s[DBL_BUFSIZE_BOUND];
1885 c_dtoastr (index_s, sizeof index_s, 0, 0, index);
1886 stringi_map_replace_nocopy (me->vars, ss_xstrdup (var_name),
1889 macro_expand (p, do_end - p, &subme, exp);
1893 return do_end - tokens + 1;
1897 macro_error (me->stack, p < end ? p : NULL,
1898 _("Expected `=' or !IN in !DO loop."));
1904 macro_expand_arg (const struct macro_expander *me, size_t idx,
1905 struct macro_tokens *exp)
1907 const struct macro_param *param = &me->macro->params[idx];
1908 const struct macro_tokens *arg = me->args[idx];
1910 if (*me->expand && param->expand_arg)
1912 struct stringi_map vars = STRINGI_MAP_INITIALIZER (vars);
1913 struct macro_expansion_stack stack = {
1914 .name = param->name,
1917 struct macro_expander subme = {
1918 .macros = me->macros,
1921 .segmenter_mode = me->segmenter_mode,
1922 .expand = me->expand,
1925 .nesting_countdown = me->nesting_countdown,
1928 macro_expand (arg->mts, arg->n, &subme, exp);
1929 stringi_map_destroy (&vars);
1932 for (size_t i = 0; i < arg->n; i++)
1933 macro_tokens_add (exp, &arg->mts[i]);
1937 macro_expand__ (const struct macro_token *mts, size_t n,
1938 const struct macro_expander *me,
1939 struct macro_tokens *exp)
1941 const struct token *token = &mts[0].token;
1943 /* Recursive macro calls. */
1946 struct macro_call *submc;
1947 int n_call = macro_call_create (me->macros, token, &submc);
1948 for (size_t j = 1; !n_call; j++)
1950 const struct macro_token endcmd
1951 = { .token = { .type = T_ENDCMD } };
1952 n_call = macro_call_add (submc, j < n ? &mts[j] : &endcmd);
1956 struct stringi_map vars = STRINGI_MAP_INITIALIZER (vars);
1957 struct macro_expansion_stack stack = {
1958 .name = submc->macro->name,
1959 .file_name = submc->macro->file_name,
1960 .first_line = submc->macro->first_line,
1961 .last_line = submc->macro->last_line,
1964 struct macro_expander subme = {
1965 .macros = submc->macros,
1966 .macro = submc->macro,
1967 .args = submc->args,
1968 .segmenter_mode = me->segmenter_mode,
1969 .expand = me->expand,
1972 .nesting_countdown = me->nesting_countdown - 1,
1975 const struct macro_tokens *body = &submc->macro->body;
1976 macro_expand (body->mts, body->n, &subme, exp);
1977 macro_call_destroy (submc);
1978 stringi_map_destroy (&vars);
1982 macro_call_destroy (submc);
1985 if (token->type != T_MACRO_ID)
1987 macro_tokens_add (exp, &mts[0]);
1994 const struct macro_param *param = macro_find_parameter_by_name (
1995 me->macro, token->string);
1998 macro_expand_arg (me, param - me->macro->params, exp);
2001 else if (is_bang_star (mts, n))
2003 for (size_t j = 0; j < me->macro->n_params; j++)
2004 macro_expand_arg (me, j, exp);
2009 /* Variables set by !DO or !LET. */
2010 const char *var = stringi_map_find__ (me->vars, token->string.string,
2011 token->string.length);
2014 macro_tokens_from_string__ (exp, ss_cstr (var),
2015 me->segmenter_mode, me->stack);
2019 /* Macro functions. */
2020 struct string function_output = DS_EMPTY_INITIALIZER;
2021 size_t n_function = expand_macro_function (me, mts, n, &function_output);
2024 macro_tokens_from_string__ (exp, function_output.ss,
2025 me->segmenter_mode, me->stack);
2026 ds_destroy (&function_output);
2031 size_t n_if = macro_expand_if (mts, n, me, exp);
2035 size_t n_let = macro_parse_let (mts, n, me);
2039 size_t n_do = macro_expand_do (mts, n, me, exp);
2043 if (lex_id_match_n (token->string, ss_cstr ("!break"), 4))
2048 macro_error (me->stack, &mts[0], _("!BREAK outside !DO."));
2050 else if (lex_id_match_n (token->string, ss_cstr ("!onexpand"), 4))
2052 else if (lex_id_match_n (token->string, ss_cstr ("!offexpand"), 4))
2053 *me->expand = false;
2055 macro_tokens_add (exp, &mts[0]);
2060 macro_expand (const struct macro_token *mts, size_t n,
2061 const struct macro_expander *me,
2062 struct macro_tokens *exp)
2064 if (me->nesting_countdown <= 0)
2066 macro_error (me->stack, NULL, _("Maximum nesting level %d exceeded. "
2067 "(Use SET MNEST to change the limit.)"),
2068 settings_get_mnest ());
2069 for (size_t i = 0; i < n; i++)
2070 macro_tokens_add (exp, &mts[i]);
2074 for (size_t i = 0; i < n; )
2076 if (me->break_ && *me->break_)
2079 size_t consumed = macro_expand__ (&mts[i], n - i, me, exp);
2080 assert (consumed > 0 && i + consumed <= n);
2086 macro_call_expand (struct macro_call *mc, enum segmenter_mode segmenter_mode,
2087 struct macro_tokens *exp)
2089 assert (mc->state == MC_FINISHED);
2092 struct stringi_map vars = STRINGI_MAP_INITIALIZER (vars);
2093 struct macro_expansion_stack stack = {
2094 .name = mc->macro->name,
2095 .file_name = mc->macro->file_name,
2096 .first_line = mc->macro->first_line,
2097 .last_line = mc->macro->last_line,
2099 struct macro_expander me = {
2100 .macros = mc->macros,
2103 .segmenter_mode = segmenter_mode,
2107 .nesting_countdown = settings_get_mnest (),
2111 const struct macro_tokens *body = &mc->macro->body;
2112 macro_expand (body->mts, body->n, &me, exp);
2114 stringi_map_destroy (&vars);