1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/lexer/macro.h"
25 #include "data/settings.h"
26 #include "language/lexer/lexer.h"
27 #include "language/lexer/segment.h"
28 #include "language/lexer/scan.h"
29 #include "libpspp/assertion.h"
30 #include "libpspp/cast.h"
31 #include "libpspp/i18n.h"
32 #include "libpspp/message.h"
33 #include "libpspp/str.h"
34 #include "libpspp/string-array.h"
35 #include "libpspp/stringi-map.h"
36 #include "libpspp/stringi-set.h"
38 #include "gl/c-ctype.h"
39 #include "gl/ftoastr.h"
42 #define _(msgid) gettext (msgid)
44 /* An entry in the stack of macros and macro directives being expanded. The
45 stack is maintained as a linked list. Entries are not dynamically allocated
46 but on the program stack. */
47 struct macro_expansion_stack
49 /* Points to an outer stack entry, or NULL if this is the outermost. */
50 const struct macro_expansion_stack *next;
52 /* A macro name or !IF, !DO, etc. */
55 /* Location of the macro definition, if available. */
56 const char *file_name;
61 /* Reports an error during macro expansion. STACK is the stack for reporting
62 the location of the error, MT is the optional token at which the error was
63 detected, and FORMAT along with the varargs is the message to report. */
64 static void PRINTF_FORMAT (3, 4)
65 macro_error (const struct macro_expansion_stack *stack,
66 const struct macro_token *mt,
67 const char *format, ...)
69 struct msg_stack **ms = NULL;
70 size_t allocated_ms = 0;
73 for (const struct macro_expansion_stack *p = stack; p; p = p->next)
75 if (n_ms >= allocated_ms)
76 ms = x2nrealloc (ms, &allocated_ms, sizeof *ms);
78 /* TRANSLATORS: These strings are used for explaining the context of an
79 error. The "While expanding" message appears first, followed by zero
80 or more of the "inside expansion" messages. `innermost',
81 `next_inner`, etc., are names of macros, and `foobar' is a piece of
84 foo.sps:12: At `foobar' in the expansion of 'innermost',
85 foo.sps:23: inside the expansion of 'next_inner',
86 foo.sps:34: inside the expansion of 'next_inner2',
87 foo.sps:45: inside the expansion of 'outermost',
88 foo.sps:76: This is the actual error message. */
92 if (mt && mt->representation.length)
95 str_ellipsize (mt->representation, syntax, sizeof syntax);
96 description = xasprintf (_("At `%s' in the expansion of `%s',"),
100 description = xasprintf (_("In the expansion of `%s',"), p->name);
103 description = xasprintf (_("inside the expansion of `%s',"), p->name);
105 ms[n_ms] = xmalloc (sizeof *ms[n_ms]);
106 *ms[n_ms] = (struct msg_stack) {
108 .file_name = xstrdup_if_nonnull (p->file_name),
109 .first_line = p->first_line,
110 .last_line = p->last_line,
112 .description = description,
118 va_start (args, format);
119 char *s = xvasprintf (format, args);
122 struct msg *m = xmalloc (sizeof *m);
124 .category = MSG_C_SYNTAX,
125 .severity = MSG_S_ERROR,
134 macro_token_copy (struct macro_token *dst, const struct macro_token *src)
136 token_copy (&dst->token, &src->token);
137 ss_alloc_substring (&dst->representation, src->representation);
141 macro_token_uninit (struct macro_token *mt)
143 token_uninit (&mt->token);
144 ss_dealloc (&mt->representation);
148 macro_token_to_representation (struct macro_token *mt, struct string *s)
150 ds_put_substring (s, mt->representation);
153 is_macro_keyword (struct substring s)
155 static struct stringi_set keywords = STRINGI_SET_INITIALIZER (keywords);
156 if (stringi_set_is_empty (&keywords))
158 static const char *kws[] = {
179 for (size_t i = 0; i < sizeof kws / sizeof *kws; i++)
180 stringi_set_insert (&keywords, kws[i]);
183 ss_ltrim (&s, ss_cstr ("!"));
184 return stringi_set_contains_len (&keywords, s.string, s.length);
188 macro_tokens_copy (struct macro_tokens *dst, const struct macro_tokens *src)
190 *dst = (struct macro_tokens) {
191 .mts = xmalloc (src->n * sizeof *dst->mts),
195 for (size_t i = 0; i < src->n; i++)
196 macro_token_copy (&dst->mts[i], &src->mts[i]);
200 macro_tokens_uninit (struct macro_tokens *mts)
202 for (size_t i = 0; i < mts->n; i++)
203 macro_token_uninit (&mts->mts[i]);
208 macro_tokens_add_uninit (struct macro_tokens *mts)
210 if (mts->n >= mts->allocated)
211 mts->mts = x2nrealloc (mts->mts, &mts->allocated, sizeof *mts->mts);
212 return &mts->mts[mts->n++];
216 macro_tokens_add (struct macro_tokens *mts, const struct macro_token *mt)
218 macro_token_copy (macro_tokens_add_uninit (mts), mt);
221 /* Tokenizes SRC according to MODE and appends the tokens to MTS. Uses STACK,
222 if nonull, for error reporting. */
224 macro_tokens_from_string__ (struct macro_tokens *mts, const struct substring src,
225 enum segmenter_mode mode,
226 const struct macro_expansion_stack *stack)
230 struct segmenter segmenter;
231 struct substring body;
234 struct state state = {
235 .segmenter = segmenter_init (mode, true),
238 struct state saved = state;
240 while (state.body.length > 0)
242 struct macro_token mt = {
243 .token = { .type = T_STOP },
244 .representation = { .string = state.body.string },
246 struct token *token = &mt.token;
248 struct scanner scanner;
249 scanner_init (&scanner, token);
253 enum segment_type type;
254 int seg_len = segmenter_push (&state.segmenter, state.body.string,
255 state.body.length, true, &type);
256 assert (seg_len >= 0);
258 struct substring segment = ss_head (state.body, seg_len);
259 ss_advance (&state.body, seg_len);
261 enum scan_result result = scanner_push (&scanner, type, segment, token);
262 if (result == SCAN_SAVE)
264 else if (result == SCAN_BACK)
269 else if (result == SCAN_DONE)
273 /* We have a token in 'token'. */
274 mt.representation.length = state.body.string - mt.representation.string;
275 if (is_scan_type (token->type))
277 if (token->type != SCAN_SKIP)
279 char *s = scan_token_to_error (token);
282 mt.token.type = T_STRING;
283 macro_error (stack, &mt, "%s", s);
291 macro_tokens_add (mts, &mt);
292 token_uninit (token);
296 /* Tokenizes SRC according to MODE and appends the tokens to MTS. */
298 macro_tokens_from_string (struct macro_tokens *mts, const struct substring src,
299 enum segmenter_mode mode)
301 macro_tokens_from_string__ (mts, src, mode, NULL);
305 macro_tokens_print (const struct macro_tokens *mts, FILE *stream)
307 for (size_t i = 0; i < mts->n; i++)
308 token_print (&mts->mts[i].token, stream);
313 TC_ENDCMD, /* No space before or after (new-line after). */
314 TC_BINOP, /* Space on both sides. */
315 TC_COMMA, /* Space afterward. */
316 TC_ID, /* Don't need spaces except sequentially. */
317 TC_PUNCT, /* Don't need spaces except sequentially. */
321 needs_space (enum token_class prev, enum token_class next)
323 /* Don't need a space before or after the end of a command.
324 (A new-line is needed afterward as a special case.) */
325 if (prev == TC_ENDCMD || next == TC_ENDCMD)
328 /* Binary operators always have a space on both sides. */
329 if (prev == TC_BINOP || next == TC_BINOP)
332 /* A comma always has a space afterward. */
333 if (prev == TC_COMMA)
336 /* Otherwise, PREV is TC_ID or TC_PUNCT, which only need a space if there are
337 two or them in a row. */
341 static enum token_class
342 classify_token (enum token_type type)
394 /* Appends a syntax representation of the tokens in MTS to S. If OFS and LEN
395 are nonnull, sets OFS[i] to the offset within S of the start of token 'i' in
396 MTS and LEN[i] to its length. OFS[i] + LEN[i] is not necessarily OFS[i + 1]
397 because some tokens are separated by white space. */
399 macro_tokens_to_representation (struct macro_tokens *mts, struct string *s,
400 size_t *ofs, size_t *len)
402 assert ((ofs != NULL) == (len != NULL));
407 for (size_t i = 0; i < mts->n; i++)
411 enum token_type prev = mts->mts[i - 1].token.type;
412 enum token_type next = mts->mts[i].token.type;
414 if (prev == T_ENDCMD)
415 ds_put_byte (s, '\n');
418 enum token_class pc = classify_token (prev);
419 enum token_class nc = classify_token (next);
420 if (needs_space (pc, nc))
421 ds_put_byte (s, ' ');
426 ofs[i] = s->ss.length;
427 macro_token_to_representation (&mts->mts[i], s);
429 len[i] = s->ss.length - ofs[i];
434 macro_destroy (struct macro *m)
441 for (size_t i = 0; i < m->n_params; i++)
443 struct macro_param *p = &m->params[i];
446 macro_tokens_uninit (&p->def);
454 token_uninit (&p->charend);
458 token_uninit (&p->enclose[0]);
459 token_uninit (&p->enclose[1]);
467 macro_tokens_uninit (&m->body);
472 macro_set_create (void)
474 struct macro_set *set = xmalloc (sizeof *set);
475 *set = (struct macro_set) {
476 .macros = HMAP_INITIALIZER (set->macros),
482 macro_set_destroy (struct macro_set *set)
487 struct macro *macro, *next;
488 HMAP_FOR_EACH_SAFE (macro, next, struct macro, hmap_node, &set->macros)
490 hmap_delete (&set->macros, ¯o->hmap_node);
491 macro_destroy (macro);
493 hmap_destroy (&set->macros);
498 hash_macro_name (const char *name)
500 return utf8_hash_case_string (name, 0);
503 static struct macro *
504 macro_set_find__ (struct macro_set *set, const char *name)
506 if (macro_set_is_empty (set))
510 HMAP_FOR_EACH_WITH_HASH (macro, struct macro, hmap_node,
511 hash_macro_name (name), &set->macros)
512 if (!utf8_strcasecmp (macro->name, name))
519 macro_set_find (const struct macro_set *set, const char *name)
521 return macro_set_find__ (CONST_CAST (struct macro_set *, set), name);
524 /* Adds M to SET. M replaces any existing macro with the same name. Takes
527 macro_set_add (struct macro_set *set, struct macro *m)
529 struct macro *victim = macro_set_find__ (set, m->name);
532 hmap_delete (&set->macros, &victim->hmap_node);
533 macro_destroy (victim);
536 hmap_insert (&set->macros, &m->hmap_node, hash_macro_name (m->name));
539 /* Macro call parsing.. */
546 /* Accumulating tokens in mc->params toward the end of any type of
550 /* Expecting the opening delimiter of an ARG_ENCLOSE argument. */
553 /* Expecting a keyword for a keyword argument. */
556 /* Expecting an equal sign for a keyword argument. */
559 /* Macro fully parsed and ready for expansion. */
563 /* Parsing macro calls. This is a FSM driven by macro_call_create() and
564 macro_call_add() to identify the macro being called and obtain its
565 arguments. 'state' identifies the FSM state. */
568 const struct macro_set *macros;
569 const struct macro *macro;
570 struct macro_tokens **args;
574 const struct macro_param *param; /* Parameter currently being parsed. */
577 /* Completes macro expansion by initializing arguments that weren't supplied to
580 mc_finished (struct macro_call *mc)
582 mc->state = MC_FINISHED;
583 for (size_t i = 0; i < mc->macro->n_params; i++)
585 mc->args[i] = &mc->macro->params[i].def;
590 mc_next_arg (struct macro_call *mc)
594 assert (!mc->macro->n_params);
595 return mc_finished (mc);
597 else if (mc->param->positional)
600 if (mc->param >= &mc->macro->params[mc->macro->n_params])
601 return mc_finished (mc);
604 mc->state = (!mc->param->positional ? MC_KEYWORD
605 : mc->param->arg_type == ARG_ENCLOSE ? MC_ENCLOSE
612 for (size_t i = 0; i < mc->macro->n_params; i++)
615 mc->state = MC_KEYWORD;
618 return mc_finished (mc);
623 mc_error (struct macro_call *mc)
625 mc->state = MC_ERROR;
630 mc_add_arg (struct macro_call *mc, const struct macro_token *mt)
632 const struct macro_param *p = mc->param;
634 const struct token *token = &mt->token;
635 if ((token->type == T_ENDCMD || token->type == T_STOP)
636 && p->arg_type != ARG_CMDEND)
638 msg (SE, _("Unexpected end of command reading argument %s "
639 "to macro %s."), mc->param->name, mc->macro->name);
641 return mc_error (mc);
646 struct macro_tokens **argp = &mc->args[p - mc->macro->params];
648 *argp = xzalloc (sizeof **argp);
649 struct macro_tokens *arg = *argp;
650 if (p->arg_type == ARG_N_TOKENS)
652 macro_tokens_add (arg, mt);
653 if (arg->n >= p->n_tokens)
654 return mc_next_arg (mc);
657 else if (p->arg_type == ARG_CMDEND)
659 if (token->type == T_ENDCMD || token->type == T_STOP)
660 return mc_next_arg (mc);
661 macro_tokens_add (arg, mt);
666 const struct token *end
667 = p->arg_type == ARG_CHAREND ? &p->charend : &p->enclose[1];
668 if (token_equal (token, end))
669 return mc_next_arg (mc);
670 macro_tokens_add (arg, mt);
676 mc_expected (struct macro_call *mc, const struct macro_token *actual,
677 const struct token *expected)
679 const struct substring actual_s
680 = (actual->representation.length ? actual->representation
681 : ss_cstr (_("<end of input>")));
682 char *expected_s = token_to_string (expected);
683 msg (SE, _("Found `%.*s' while expecting `%s' reading argument %s "
685 (int) actual_s.length, actual_s.string, expected_s,
686 mc->param->name, mc->macro->name);
689 return mc_error (mc);
693 mc_enclose (struct macro_call *mc, const struct macro_token *mt)
695 const struct token *token = &mt->token;
698 if (token_equal (&mc->param->enclose[0], token))
704 return mc_expected (mc, mt, &mc->param->enclose[0]);
707 static const struct macro_param *
708 macro_find_parameter_by_name (const struct macro *m, struct substring name)
710 ss_ltrim (&name, ss_cstr ("!"));
712 for (size_t i = 0; i < m->n_params; i++)
714 const struct macro_param *p = &m->params[i];
715 struct substring p_name = ss_cstr (p->name + 1);
716 if (!utf8_strncasecmp (p_name.string, p_name.length,
717 name.string, name.length))
724 mc_keyword (struct macro_call *mc, const struct macro_token *mt)
726 const struct token *token = &mt->token;
727 if (token->type != T_ID)
728 return mc_finished (mc);
730 const struct macro_param *p = macro_find_parameter_by_name (mc->macro,
734 size_t arg_index = p - mc->macro->params;
736 if (mc->args[arg_index])
739 _("Argument %s multiply specified in call to macro %s."),
740 p->name, mc->macro->name);
741 return mc_error (mc);
745 mc->state = MC_EQUALS;
749 return mc_finished (mc);
753 mc_equals (struct macro_call *mc, const struct macro_token *mt)
755 const struct token *token = &mt->token;
758 if (token->type == T_EQUALS)
764 return mc_expected (mc, mt, &(struct token) { .type = T_EQUALS });
767 /* If TOKEN is the first token of a call to a macro in MACROS, create a new
768 macro expander, initializes *MCP to it. Returns 0 if more tokens are needed
769 and should be added via macro_call_add() or 1 if the caller should next call
770 macro_call_get_expansion().
772 If TOKEN is not the first token of a macro call, returns -1 and sets *MCP to
775 macro_call_create (const struct macro_set *macros,
776 const struct token *token,
777 struct macro_call **mcp)
779 const struct macro *macro = (token->type == T_ID || token->type == T_MACRO_ID
780 ? macro_set_find (macros, token->string.string)
788 struct macro_call *mc = xmalloc (sizeof *mc);
789 *mc = (struct macro_call) {
793 .state = (!macro->n_params ? MC_FINISHED
794 : !macro->params[0].positional ? MC_KEYWORD
795 : macro->params[0].arg_type == ARG_ENCLOSE ? MC_ENCLOSE
797 .args = macro->n_params ? xcalloc (macro->n_params, sizeof *mc->args) : NULL,
798 .param = macro->params,
802 return mc->state == MC_FINISHED ? 1 : 0;
806 macro_call_destroy (struct macro_call *mc)
811 for (size_t i = 0; i < mc->macro->n_params; i++)
813 struct macro_tokens *a = mc->args[i];
814 if (a && a != &mc->macro->params[i].def)
816 macro_tokens_uninit (a);
824 /* Adds TOKEN to the collection of tokens in MC that potentially need to be
827 Returns -1 if the tokens added do not actually invoke a macro. The caller
828 should consume the first token without expanding it. (Later tokens might
829 invoke a macro so it's best to feed the second token into a new expander.)
831 Returns 0 if the macro expander needs more tokens, for macro arguments or to
832 decide whether this is actually a macro invocation. The caller should call
833 macro_call_add() again with the next token.
835 Returns a positive number to indicate that the returned number of tokens
836 invoke a macro. The number returned might be less than the number of tokens
837 added because it can take a few tokens of lookahead to determine whether the
838 macro invocation is finished. The caller should call
839 macro_call_get_expansion() to obtain the expansion. */
841 macro_call_add (struct macro_call *mc, const struct macro_token *mt)
849 return mc_add_arg (mc, mt);
852 return mc_enclose (mc, mt);
855 return mc_keyword (mc, mt);
858 return mc_equals (mc, mt);
865 /* Macro expansion. */
867 struct macro_expander
869 /* Always available. */
870 const struct macro_set *macros; /* Macros to expand recursively. */
871 enum segmenter_mode segmenter_mode; /* Mode for tokenization. */
872 int nesting_countdown; /* Remaining nesting levels. */
873 const struct macro_expansion_stack *stack; /* Stack for error reporting. */
874 bool *expand; /* May macro calls be expanded? */
875 struct stringi_map *vars; /* Variables from !DO and !LET. */
877 /* Only nonnull if inside a !DO loop. */
878 bool *break_; /* Set to true to break out of loop. */
880 /* Only nonnull if expanding a macro (and not, say, a macro argument). */
881 const struct macro *macro;
882 struct macro_tokens **args;
886 macro_expand (const struct macro_tokens *, const struct macro_expander *,
887 struct macro_tokens *);
890 expand_macro_function (const struct macro_expander *me,
891 const struct macro_token *input, size_t n_input,
892 struct string *output, size_t *input_consumed);
894 /* Returns true if the N tokens within MTS start with !*, false otherwise. */
896 is_bang_star (const struct macro_token *mts, size_t n)
899 && mts[0].token.type == T_MACRO_ID
900 && ss_equals (mts[0].token.string, ss_cstr ("!"))
901 && mts[1].token.type == T_ASTERISK);
904 /* Parses one function argument from the N_INPUT tokens in INPUT
905 Each argument to a macro function is one of:
907 - A quoted string or other single literal token.
909 - An argument to the macro being expanded, e.g. !1 or a named argument.
913 - A function invocation.
915 Each function invocation yields a character sequence to be turned into a
916 sequence of tokens. The case where that character sequence is a single
917 quoted string is an important special case.
920 parse_function_arg (const struct macro_expander *me,
921 const struct macro_token *input, size_t n_input,
924 assert (n_input > 0);
926 const struct token *token = &input[0].token;
927 if (token->type == T_MACRO_ID && me->macro)
929 const struct macro_param *param = macro_find_parameter_by_name (
930 me->macro, token->string);
933 size_t param_idx = param - me->macro->params;
934 const struct macro_tokens *marg = me->args[param_idx];
935 for (size_t i = 0; i < marg->n; i++)
938 ds_put_byte (farg, ' ');
939 ds_put_substring (farg, marg->mts[i].representation);
944 if (is_bang_star (input, n_input))
946 for (size_t i = 0; i < me->macro->n_params; i++)
948 if (!me->macro->params[i].positional)
951 const struct macro_tokens *marg = me->args[i];
952 for (size_t j = 0; j < marg->n; j++)
955 ds_put_byte (farg, ' ');
956 ds_put_substring (farg, marg->mts[j].representation);
962 const char *value = stringi_map_find__ (me->vars,
963 token->string.string,
964 token->string.length);
967 ds_put_cstr (farg, value);
971 size_t subinput_consumed;
972 if (expand_macro_function (me, input, n_input,
973 farg, &subinput_consumed))
974 return subinput_consumed;
977 ds_put_substring (farg, input[0].representation);
982 parse_function_args (const struct macro_expander *me,
983 const struct macro_token *mts, size_t n,
984 const char *function,
985 struct string_array *args)
987 if (n < 2 || mts[1].token.type != T_LPAREN)
989 macro_error (me->stack, n > 1 ? &mts[1] : NULL,
990 _("`(' expected following %s."), function);
994 for (size_t i = 2; i < n; )
996 if (mts[i].token.type == T_RPAREN)
999 struct string s = DS_EMPTY_INITIALIZER;
1000 i += parse_function_arg (me, mts + i, n - i, &s);
1001 string_array_append_nocopy (args, ds_steal_cstr (&s));
1005 else if (mts[i].token.type == T_COMMA)
1007 else if (mts[i].token.type != T_RPAREN)
1009 macro_error (me->stack, &mts[i],
1010 _("`,' or `)' expected in call to macro function %s."),
1016 macro_error (me->stack, NULL, _("Missing `)' in call to macro function %s."),
1022 unquote_string (const char *s, enum segmenter_mode segmenter_mode,
1023 struct string *content)
1025 struct string_lexer slex;
1026 string_lexer_init (&slex, s, strlen (s), segmenter_mode, true);
1028 struct token token1;
1029 if (!string_lexer_next (&slex, &token1))
1032 if (token1.type != T_STRING)
1034 token_uninit (&token1);
1038 struct token token2;
1039 if (string_lexer_next (&slex, &token2))
1041 token_uninit (&token1);
1042 token_uninit (&token2);
1046 ds_put_substring (content, token1.string);
1047 token_uninit (&token1);
1052 unquote_string_in_place (const char *s, enum segmenter_mode segmenter_mode,
1055 ds_init_empty (tmp);
1056 return unquote_string (s, segmenter_mode, tmp) ? ds_cstr (tmp) : s;
1060 parse_integer (const char *s, int *np)
1065 long int n = strtol (s, &tail, 10);
1066 *np = n < INT_MIN ? INT_MIN : n > INT_MAX ? INT_MAX : n;
1067 tail += strspn (tail, CC_SPACES);
1068 return *tail == '\0' && errno != ERANGE && n == *np;
1072 expand_macro_function (const struct macro_expander *me,
1073 const struct macro_token *input, size_t n_input,
1074 struct string *output, size_t *input_consumed)
1076 if (!n_input || input[0].token.type != T_MACRO_ID)
1079 struct macro_function
1085 enum macro_function_id
1100 static const struct macro_function mfs[] = {
1101 [MF_BLANKS] = { "!BLANKS", 1, 1 },
1102 [MF_CONCAT] = { "!CONCAT", 1, INT_MAX },
1103 [MF_EVAL] = { "!EVAL", 1, 1 },
1104 [MF_HEAD] = { "!HEAD", 1, 1 },
1105 [MF_INDEX] = { "!INDEX", 2, 2 },
1106 [MF_LENGTH] = { "!LENGTH", 1, 1 },
1107 [MF_NULL] = { "!NULL", 0, 0 },
1108 [MF_QUOTE] = { "!QUOTE", 1, 1 },
1109 [MF_SUBSTR] = { "!SUBSTR", 2, 3 },
1110 [MF_TAIL] = { "!TAIL", 1, 1 },
1111 [MF_UNQUOTE] = { "!UNQUOTE", 1, 1 },
1112 [MF_UPCASE] = { "!UPCASE", 1, 1 },
1115 /* Is this a macro function? */
1116 const struct macro_function *mf;
1117 for (mf = mfs; ; mf++)
1119 if (mf >= mfs + sizeof mfs / sizeof *mfs)
1121 /* Not a macro function. */
1125 if (lex_id_match_n (ss_cstr (mf->name), input[0].token.string, 4))
1129 enum macro_function_id id = mf - mfs;
1132 *input_consumed = 1;
1136 struct string_array args = STRING_ARRAY_INITIALIZER;
1137 *input_consumed = parse_function_args (me, input, n_input, mf->name, &args);
1138 if (!*input_consumed)
1141 if (args.n < mf->min_args || args.n > mf->max_args)
1143 if (mf->min_args == 1 && mf->max_args == 1)
1144 macro_error (me->stack, NULL,
1145 _("Macro function %s takes one argument (not %zu)."),
1147 else if (mf->min_args == 2 && mf->max_args == 2)
1148 macro_error (me->stack, NULL,
1149 _("Macro function %s takes two arguments (not %zu)."),
1151 else if (mf->min_args == 2 && mf->max_args == 3)
1152 macro_error (me->stack, NULL,
1153 _("Macro function %s takes two or three arguments "
1156 else if (mf->min_args == 1 && mf->max_args == INT_MAX)
1157 macro_error (me->stack, NULL,
1158 _("Macro function %s needs at least one argument."),
1168 ds_put_format (output, "%zu", strlen (args.strings[0]));
1174 if (!parse_integer (args.strings[0], &n))
1176 macro_error (me->stack, NULL,
1177 _("Argument to !BLANKS must be non-negative integer "
1178 "(not \"%s\")."), args.strings[0]);
1179 string_array_destroy (&args);
1183 ds_put_byte_multiple (output, ' ', n);
1188 for (size_t i = 0; i < args.n; i++)
1189 if (!unquote_string (args.strings[i], me->segmenter_mode, output))
1190 ds_put_cstr (output, args.strings[i]);
1196 const char *s = unquote_string_in_place (args.strings[0],
1197 me->segmenter_mode, &tmp);
1199 struct macro_tokens mts = { .n = 0 };
1200 macro_tokens_from_string__ (&mts, ss_cstr (s), me->segmenter_mode,
1203 ds_put_substring (output, mts.mts[0].representation);
1204 macro_tokens_uninit (&mts);
1211 const char *haystack = args.strings[0];
1212 const char *needle = strstr (haystack, args.strings[1]);
1213 ds_put_format (output, "%zu", needle ? needle - haystack + 1 : 0);
1218 if (unquote_string (args.strings[0], me->segmenter_mode, NULL))
1219 ds_put_cstr (output, args.strings[0]);
1222 ds_extend (output, strlen (args.strings[0]) + 2);
1223 ds_put_byte (output, '\'');
1224 for (const char *p = args.strings[0]; *p; p++)
1227 ds_put_byte (output, '\'');
1228 ds_put_byte (output, *p);
1230 ds_put_byte (output, '\'');
1237 if (!parse_integer (args.strings[1], &start) || start < 1)
1239 macro_error (me->stack, NULL,
1240 _("Second argument of !SUBSTR must be "
1241 "positive integer (not \"%s\")."),
1243 string_array_destroy (&args);
1247 int count = INT_MAX;
1248 if (args.n > 2 && (!parse_integer (args.strings[2], &count) || count < 0))
1250 macro_error (me->stack, NULL,
1251 _("Third argument of !SUBSTR must be "
1252 "non-negative integer (not \"%s\")."),
1254 string_array_destroy (&args);
1258 struct substring s = ss_cstr (args.strings[0]);
1259 ds_put_substring (output, ss_substr (s, start - 1, count));
1266 const char *s = unquote_string_in_place (args.strings[0],
1267 me->segmenter_mode, &tmp);
1269 struct macro_tokens mts = { .n = 0 };
1270 macro_tokens_from_string__ (&mts, ss_cstr (s), me->segmenter_mode,
1274 struct macro_tokens tail = { .mts = mts.mts + 1, .n = mts.n - 1 };
1275 macro_tokens_to_representation (&tail, output, NULL, NULL);
1277 macro_tokens_uninit (&mts);
1283 if (!unquote_string (args.strings[0], me->segmenter_mode, output))
1284 ds_put_cstr (output, args.strings[0]);
1290 const char *s = unquote_string_in_place (args.strings[0],
1291 me->segmenter_mode, &tmp);
1292 char *upper = utf8_to_upper (s);
1293 ds_put_cstr (output, upper);
1301 struct macro_tokens mts = { .n = 0 };
1302 macro_tokens_from_string__ (&mts, ss_cstr (args.strings[0]),
1303 me->segmenter_mode, me->stack);
1304 struct macro_tokens exp = { .n = 0 };
1305 struct macro_expansion_stack stack = {
1309 struct macro_expander subme = *me;
1310 subme.break_ = NULL;
1311 subme.stack = &stack;
1313 macro_expand (&mts, &subme, &exp);
1314 macro_tokens_to_representation (&exp, output, NULL, NULL);
1315 macro_tokens_uninit (&exp);
1316 macro_tokens_uninit (&mts);
1324 string_array_destroy (&args);
1328 static char *macro_evaluate_or (const struct macro_expander *me,
1329 const struct macro_token **tokens,
1330 const struct macro_token *end);
1333 macro_evaluate_literal (const struct macro_expander *me,
1334 const struct macro_token **tokens,
1335 const struct macro_token *end)
1337 const struct macro_token *p = *tokens;
1340 if (p->token.type == T_LPAREN)
1343 char *value = macro_evaluate_or (me, &p, end);
1346 if (p >= end || p->token.type != T_RPAREN)
1349 macro_error (me->stack, p < end ? p : NULL,
1350 _("Expecting ')' in macro expression."));
1357 else if (p->token.type == T_RPAREN)
1359 macro_error (me->stack, p, _("Expecting literal or function invocation "
1360 "in macro expression."));
1364 struct string function_output = DS_EMPTY_INITIALIZER;
1365 size_t function_consumed = parse_function_arg (me, p, end - p,
1367 struct string unquoted = DS_EMPTY_INITIALIZER;
1368 if (unquote_string (ds_cstr (&function_output), me->segmenter_mode,
1371 ds_swap (&function_output, &unquoted);
1372 ds_destroy (&unquoted);
1374 *tokens = p + function_consumed;
1375 return ds_steal_cstr (&function_output);
1378 /* Returns true if MT is valid as a macro operator. Only operators written as
1379 symbols (e.g. <>) are usable in macro expressions, not operator written as
1380 letters (e.g. EQ). */
1382 is_macro_operator (const struct macro_token *mt)
1384 return (mt->representation.length > 0
1385 && !c_isalpha (mt->representation.string[0]));
1388 static enum token_type
1389 parse_relational_op (const struct macro_token *mt)
1391 switch (mt->token.type)
1401 return is_macro_operator (mt) ? mt->token.type : T_STOP;
1404 return (ss_equals_case (mt->token.string, ss_cstr ("!EQ")) ? T_EQ
1405 : ss_equals_case (mt->token.string, ss_cstr ("!NE")) ? T_NE
1406 : ss_equals_case (mt->token.string, ss_cstr ("!LT")) ? T_LT
1407 : ss_equals_case (mt->token.string, ss_cstr ("!GT")) ? T_GT
1408 : ss_equals_case (mt->token.string, ss_cstr ("!LE")) ? T_LE
1409 : ss_equals_case (mt->token.string, ss_cstr ("!GE")) ? T_GE
1418 macro_evaluate_relational (const struct macro_expander *me,
1419 const struct macro_token **tokens,
1420 const struct macro_token *end)
1422 const struct macro_token *p = *tokens;
1423 char *lhs = macro_evaluate_literal (me, &p, end);
1427 enum token_type op = p >= end ? T_STOP : parse_relational_op (p);
1435 char *rhs = macro_evaluate_literal (me, &p, end);
1442 struct string lhs_tmp, rhs_tmp;
1443 int cmp = strcmp (unquote_string_in_place (lhs, me->segmenter_mode,
1445 unquote_string_in_place (rhs, me->segmenter_mode,
1447 ds_destroy (&lhs_tmp);
1448 ds_destroy (&rhs_tmp);
1453 bool b = (op == T_EQUALS || op == T_EQ ? !cmp
1455 : op == T_LT ? cmp < 0
1456 : op == T_GT ? cmp > 0
1457 : op == T_LE ? cmp <= 0
1458 : /* T_GE */ cmp >= 0);
1461 return xstrdup (b ? "1" : "0");
1465 macro_evaluate_not (const struct macro_expander *me,
1466 const struct macro_token **tokens,
1467 const struct macro_token *end)
1469 const struct macro_token *p = *tokens;
1471 unsigned int negations = 0;
1473 && (ss_equals_case (p->representation, ss_cstr ("!NOT"))
1474 || ss_equals (p->representation, ss_cstr ("~"))))
1480 char *operand = macro_evaluate_relational (me, &p, end);
1481 if (!operand || !negations)
1487 bool b = strcmp (operand, "0") ^ (negations & 1);
1490 return xstrdup (b ? "1" : "0");
1494 macro_evaluate_and (const struct macro_expander *me,
1495 const struct macro_token **tokens,
1496 const struct macro_token *end)
1498 const struct macro_token *p = *tokens;
1499 char *lhs = macro_evaluate_not (me, &p, end);
1504 && (ss_equals_case (p->representation, ss_cstr ("!AND"))
1505 || ss_equals (p->representation, ss_cstr ("&"))))
1508 char *rhs = macro_evaluate_not (me, &p, end);
1515 bool b = strcmp (lhs, "0") && strcmp (rhs, "0");
1518 lhs = xstrdup (b ? "1" : "0");
1525 macro_evaluate_or (const struct macro_expander *me,
1526 const struct macro_token **tokens,
1527 const struct macro_token *end)
1529 const struct macro_token *p = *tokens;
1530 char *lhs = macro_evaluate_and (me, &p, end);
1535 && (ss_equals_case (p->representation, ss_cstr ("!OR"))
1536 || ss_equals (p->representation, ss_cstr ("|"))))
1539 char *rhs = macro_evaluate_and (me, &p, end);
1546 bool b = strcmp (lhs, "0") || strcmp (rhs, "0");
1549 lhs = xstrdup (b ? "1" : "0");
1556 macro_evaluate_expression (const struct macro_token **tokens, size_t n_tokens,
1557 const struct macro_expander *me)
1559 return macro_evaluate_or (me, tokens, *tokens + n_tokens);
1563 macro_evaluate_number (const struct macro_token **tokens, size_t n_tokens,
1564 const struct macro_expander *me,
1567 char *s = macro_evaluate_expression (tokens, n_tokens, me);
1571 struct macro_tokens mts = { .n = 0 };
1572 macro_tokens_from_string__ (&mts, ss_cstr (s), me->segmenter_mode, me->stack);
1573 if (mts.n != 1 || !token_is_number (&mts.mts[0].token))
1575 macro_error (me->stack, mts.n > 0 ? &mts.mts[0] : NULL,
1576 _("Macro expression must evaluate to "
1577 "a number (not \"%s\")."), s);
1579 macro_tokens_uninit (&mts);
1583 *number = token_number (&mts.mts[0].token);
1585 macro_tokens_uninit (&mts);
1589 static const struct macro_token *
1590 find_ifend_clause (const struct macro_token *p, const struct macro_token *end)
1593 for (; p < end; p++)
1595 if (p->token.type != T_MACRO_ID)
1598 if (ss_equals_case (p->token.string, ss_cstr ("!IF")))
1600 else if (ss_equals_case (p->token.string, ss_cstr ("!IFEND")))
1606 else if (ss_equals_case (p->token.string, ss_cstr ("!ELSE")) && !nesting)
1613 macro_expand_if (const struct macro_token *tokens, size_t n_tokens,
1614 const struct macro_expander *me,
1615 struct macro_tokens *exp)
1617 const struct macro_token *p = tokens;
1618 const struct macro_token *end = tokens + n_tokens;
1620 if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!IF")))
1624 char *result = macro_evaluate_expression (&p, end - p, me);
1627 bool b = strcmp (result, "0");
1631 || p->token.type != T_MACRO_ID
1632 || !ss_equals_case (p->token.string, ss_cstr ("!THEN")))
1634 macro_error (me->stack, p < end ? p : NULL,
1635 _("!THEN expected in macro !IF construct."));
1639 const struct macro_token *start_then = p + 1;
1640 const struct macro_token *end_then = find_ifend_clause (start_then, end);
1643 macro_error (me->stack, NULL,
1644 _("!ELSE or !IFEND expected in macro !IF construct."));
1648 const struct macro_token *start_else, *end_if;
1649 if (ss_equals_case (end_then->token.string, ss_cstr ("!ELSE")))
1651 start_else = end_then + 1;
1652 end_if = find_ifend_clause (start_else, end);
1654 || !ss_equals_case (end_if->token.string, ss_cstr ("!IFEND")))
1656 macro_error (me->stack, end_if ? end_if : NULL,
1657 _("!IFEND expected in macro !IF construct."));
1667 const struct macro_token *start;
1672 n = end_then - start_then;
1674 else if (start_else)
1677 n = end_if - start_else;
1687 struct macro_tokens mts = {
1688 .mts = CONST_CAST (struct macro_token *, start),
1691 struct macro_expansion_stack stack = {
1695 struct macro_expander subme = *me;
1696 subme.stack = &stack;
1697 macro_expand (&mts, &subme, exp);
1699 return (end_if + 1) - tokens;
1703 macro_parse_let (const struct macro_token *tokens, size_t n_tokens,
1704 const struct macro_expander *me)
1706 const struct macro_token *p = tokens;
1707 const struct macro_token *end = tokens + n_tokens;
1709 if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!LET")))
1713 if (p >= end || p->token.type != T_MACRO_ID)
1715 macro_error (me->stack, p < end ? p : NULL,
1716 _("Expected macro variable name following !LET."));
1719 const struct substring var_name = p->token.string;
1720 if (is_macro_keyword (var_name)
1721 || (me->macro && macro_find_parameter_by_name (me->macro, var_name)))
1723 macro_error (me->stack, p < end ? p : NULL,
1724 _("Cannot use argument name or macro keyword "
1725 "\"%.*s\" as !LET variable."),
1726 (int) var_name.length, var_name.string);
1731 if (p >= end || p->token.type != T_EQUALS)
1733 macro_error (me->stack, p < end ? p : NULL,
1734 _("Expected `=' following !LET."));
1739 char *value = macro_evaluate_expression (&p, end - p, me);
1743 stringi_map_replace_nocopy (me->vars, ss_xstrdup (var_name), value);
1747 static const struct macro_token *
1748 find_doend (const struct macro_expansion_stack *stack,
1749 const struct macro_token *p, const struct macro_token *end)
1752 for (; p < end; p++)
1754 if (p->token.type != T_MACRO_ID)
1757 if (ss_equals_case (p->token.string, ss_cstr ("!DO")))
1759 else if (ss_equals_case (p->token.string, ss_cstr ("!DOEND")))
1766 macro_error (stack, NULL, _("Missing !DOEND."));
1771 macro_expand_do (const struct macro_token *tokens, size_t n_tokens,
1772 const struct macro_expander *me,
1773 struct macro_tokens *exp)
1775 const struct macro_token *p = tokens;
1776 const struct macro_token *end = tokens + n_tokens;
1778 if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!DO")))
1782 if (p >= end || p->token.type != T_MACRO_ID)
1784 macro_error (me->stack, p < end ? p : NULL,
1785 _("Expected macro variable name following !DO."));
1788 const struct substring var_name = p->token.string;
1789 if (is_macro_keyword (var_name)
1790 || (me->macro && macro_find_parameter_by_name (me->macro, var_name)))
1792 macro_error (me->stack, p, _("Cannot use argument name or macro "
1793 "keyword as !DO variable."));
1798 struct macro_expansion_stack substack = {
1802 bool break_ = false;
1803 struct macro_expander subme = *me;
1804 subme.break_ = &break_;
1805 subme.stack = &substack;
1807 int miterate = settings_get_miterate ();
1808 if (p < end && p->token.type == T_MACRO_ID
1809 && ss_equals_case (p->token.string, ss_cstr ("!IN")))
1812 char *list = macro_evaluate_expression (&p, end - p, &subme);
1816 struct macro_tokens items = { .n = 0 };
1817 macro_tokens_from_string__ (&items, ss_cstr (list), me->segmenter_mode,
1821 const struct macro_token *do_end = find_doend (subme.stack, p, end);
1824 macro_tokens_uninit (&items);
1828 const struct macro_tokens inner = {
1829 .mts = CONST_CAST (struct macro_token *, p),
1833 for (size_t i = 0; i < items.n && !break_; i++)
1837 macro_error (&substack, NULL,
1838 _("!DO loop over list exceeded "
1839 "maximum number of iterations %d. "
1840 "(Use SET MITERATE to change the limit.)"),
1844 stringi_map_replace_nocopy (me->vars, ss_xstrdup (var_name),
1845 ss_xstrdup (items.mts[i].representation));
1847 macro_expand (&inner, &subme, exp);
1849 return do_end - tokens + 1;
1851 else if (p < end && p->token.type == T_EQUALS)
1855 if (!macro_evaluate_number (&p, end - p, &subme, &first))
1858 if (p >= end || p->token.type != T_MACRO_ID
1859 || !ss_equals_case (p->token.string, ss_cstr ("!TO")))
1861 macro_error (subme.stack, p < end ? p : NULL,
1862 _("Expected !TO in numerical !DO loop."));
1868 if (!macro_evaluate_number (&p, end - p, &subme, &last))
1872 if (p < end && p->token.type == T_MACRO_ID
1873 && ss_equals_case (p->token.string, ss_cstr ("!BY")))
1876 if (!macro_evaluate_number (&p, end - p, &subme, &by))
1881 macro_error (subme.stack, NULL, _("!BY value cannot be zero."));
1886 const struct macro_token *do_end = find_doend (subme.stack, p, end);
1889 const struct macro_tokens inner = {
1890 .mts = CONST_CAST (struct macro_token *, p),
1894 if ((by > 0 && first <= last) || (by < 0 && first >= last))
1897 for (double index = first;
1898 by > 0 ? (index <= last) : (index >= last) && !break_;
1903 macro_error (subme.stack, NULL,
1904 _("Numerical !DO loop exceeded "
1905 "maximum number of iterations %d. "
1906 "(Use SET MITERATE to change the limit.)"),
1911 char index_s[DBL_BUFSIZE_BOUND];
1912 c_dtoastr (index_s, sizeof index_s, 0, 0, index);
1913 stringi_map_replace_nocopy (me->vars, ss_xstrdup (var_name),
1916 macro_expand (&inner, &subme, exp);
1920 return do_end - tokens + 1;
1924 macro_error (me->stack, p < end ? p : NULL,
1925 _("Expected `=' or !IN in !DO loop."));
1931 macro_expand (const struct macro_tokens *mts,
1932 const struct macro_expander *me,
1933 struct macro_tokens *exp)
1935 if (me->nesting_countdown <= 0)
1937 macro_error (me->stack, NULL, _("Maximum nesting level %d exceeded. "
1938 "(Use SET MNEST to change the limit.)"),
1939 settings_get_mnest ());
1940 for (size_t i = 0; i < mts->n; i++)
1941 macro_tokens_add (exp, &mts->mts[i]);
1945 for (size_t i = 0; i < mts->n && (!me->break_ || !*me->break_); i++)
1947 const struct macro_token *mt = &mts->mts[i];
1948 const struct token *token = &mt->token;
1949 if (token->type == T_MACRO_ID && me->macro)
1951 const struct macro_param *param = macro_find_parameter_by_name (
1952 me->macro, token->string);
1955 const struct macro_tokens *arg
1956 = me->args[param - me->macro->params];
1957 if (*me->expand && param->expand_arg)
1959 struct stringi_map vars = STRINGI_MAP_INITIALIZER (vars);
1960 struct macro_expansion_stack stack = {
1961 .name = param->name,
1964 struct macro_expander subme = {
1965 .macros = me->macros,
1968 .segmenter_mode = me->segmenter_mode,
1969 .expand = me->expand,
1972 .nesting_countdown = me->nesting_countdown,
1975 macro_expand (arg, &subme, exp);
1976 stringi_map_destroy (&vars);
1979 for (size_t i = 0; i < arg->n; i++)
1980 macro_tokens_add (exp, &arg->mts[i]);
1984 if (is_bang_star (mts->mts + i, mts->n - i))
1986 for (size_t j = 0; j < me->macro->n_params; j++)
1988 const struct macro_param *param = &me->macro->params[j];
1989 if (!param->positional)
1992 const struct macro_tokens *arg = me->args[j];
1993 if (*me->expand && param->expand_arg)
1995 struct stringi_map vars = STRINGI_MAP_INITIALIZER (vars);
1996 struct macro_expansion_stack stack = {
2000 struct macro_expander subme = {
2001 .macros = me->macros,
2004 .segmenter_mode = me->segmenter_mode,
2005 .expand = me->expand,
2008 .nesting_countdown = me->nesting_countdown,
2011 macro_expand (arg, &subme, exp);
2012 stringi_map_destroy (&vars);
2015 for (size_t k = 0; k < arg->n; k++)
2016 macro_tokens_add (exp, &arg->mts[k]);
2022 size_t n = macro_expand_if (&mts->mts[i], mts->n - i, me, exp);
2030 if (token->type == T_MACRO_ID)
2032 const char *value = stringi_map_find__ (me->vars,
2033 token->string.string,
2034 token->string.length);
2037 macro_tokens_from_string__ (exp, ss_cstr (value),
2038 me->segmenter_mode, me->stack);
2045 struct macro_call *submc;
2046 int retval = macro_call_create (me->macros, token, &submc);
2047 for (size_t j = 1; !retval; j++)
2049 const struct macro_token endcmd
2050 = { .token = { .type = T_ENDCMD } };
2051 retval = macro_call_add (
2052 submc, i + j < mts->n ? &mts->mts[i + j] : &endcmd);
2057 struct stringi_map vars = STRINGI_MAP_INITIALIZER (vars);
2058 struct macro_expansion_stack stack = {
2059 .name = submc->macro->name,
2060 .file_name = submc->macro->file_name,
2061 .first_line = submc->macro->first_line,
2062 .last_line = submc->macro->last_line,
2065 struct macro_expander subme = {
2066 .macros = submc->macros,
2067 .macro = submc->macro,
2068 .args = submc->args,
2069 .segmenter_mode = me->segmenter_mode,
2070 .expand = me->expand,
2073 .nesting_countdown = me->nesting_countdown - 1,
2076 macro_expand (&submc->macro->body, &subme, exp);
2077 macro_call_destroy (submc);
2078 stringi_map_destroy (&vars);
2082 macro_call_destroy (submc);
2085 if (token->type != T_MACRO_ID)
2087 macro_tokens_add (exp, mt);
2091 if (ss_equals_case (token->string, ss_cstr ("!break")))
2094 macro_error (me->stack, mt, _("!BREAK outside !DO."));
2102 struct string function_output = DS_EMPTY_INITIALIZER;
2103 size_t function_consumed;
2104 if (expand_macro_function (me, &mts->mts[i], mts->n - i,
2105 &function_output, &function_consumed))
2107 i += function_consumed - 1;
2109 macro_tokens_from_string__ (exp, function_output.ss,
2110 me->segmenter_mode, me->stack);
2111 ds_destroy (&function_output);
2116 size_t n = macro_parse_let (&mts->mts[i], mts->n - i, me);
2123 n = macro_expand_do (&mts->mts[i], mts->n - i, me, exp);
2130 if (ss_equals_case (token->string, ss_cstr ("!onexpand")))
2132 else if (ss_equals_case (token->string, ss_cstr ("!offexpand")))
2133 *me->expand = false;
2135 macro_tokens_add (exp, mt);
2140 macro_call_expand (struct macro_call *mc, enum segmenter_mode segmenter_mode,
2141 struct macro_tokens *exp)
2143 assert (mc->state == MC_FINISHED);
2146 struct stringi_map vars = STRINGI_MAP_INITIALIZER (vars);
2147 struct macro_expansion_stack stack = {
2148 .name = mc->macro->name,
2149 .file_name = mc->macro->file_name,
2150 .first_line = mc->macro->first_line,
2151 .last_line = mc->macro->last_line,
2153 struct macro_expander me = {
2154 .macros = mc->macros,
2157 .segmenter_mode = segmenter_mode,
2161 .nesting_countdown = settings_get_mnest (),
2165 macro_expand (&mc->macro->body, &me, exp);
2167 stringi_map_destroy (&vars);