1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/lexer/macro.h"
24 #include "data/settings.h"
25 #include "language/lexer/segment.h"
26 #include "language/lexer/scan.h"
27 #include "libpspp/assertion.h"
28 #include "libpspp/i18n.h"
29 #include "libpspp/message.h"
30 #include "libpspp/str.h"
33 #define _(msgid) gettext (msgid)
36 macro_token_copy (struct macro_token *dst, const struct macro_token *src)
38 token_copy (&dst->token, &src->token);
39 ss_alloc_substring (&dst->representation, src->representation);
43 macro_token_uninit (struct macro_token *mt)
45 token_uninit (&mt->token);
46 ss_dealloc (&mt->representation);
50 macro_tokens_copy (struct macro_tokens *dst, const struct macro_tokens *src)
52 *dst = (struct macro_tokens) {
53 .mts = xmalloc (src->n * sizeof *dst->mts),
57 for (size_t i = 0; i < src->n; i++)
58 macro_token_copy (&dst->mts[i], &src->mts[i]);
62 macro_tokens_uninit (struct macro_tokens *mts)
64 for (size_t i = 0; i < mts->n; i++)
65 macro_token_uninit (&mts->mts[i]);
70 macro_tokens_add_uninit (struct macro_tokens *mts)
72 if (mts->n >= mts->allocated)
73 mts->mts = x2nrealloc (mts->mts, &mts->allocated, sizeof *mts->mts);
74 return &mts->mts[mts->n++];
78 macro_tokens_add (struct macro_tokens *mts, const struct macro_token *mt)
80 macro_token_copy (macro_tokens_add_uninit (mts), mt);
84 macro_tokens_print (const struct macro_tokens *mts, FILE *stream)
86 for (size_t i = 0; i < mts->n; i++)
87 token_print (&mts->mts[i].token, stream);
91 macro_destroy (struct macro *m)
97 for (size_t i = 0; i < m->n_params; i++)
99 struct macro_param *p = &m->params[i];
102 macro_tokens_uninit (&p->def);
110 token_uninit (&p->charend);
114 token_uninit (&p->enclose[0]);
115 token_uninit (&p->enclose[1]);
123 macro_tokens_uninit (&m->body);
128 macro_set_create (void)
130 struct macro_set *set = xmalloc (sizeof *set);
131 *set = (struct macro_set) {
132 .macros = HMAP_INITIALIZER (set->macros),
138 macro_set_destroy (struct macro_set *set)
143 struct macro *macro, *next;
144 HMAP_FOR_EACH_SAFE (macro, next, struct macro, hmap_node, &set->macros)
146 hmap_delete (&set->macros, ¯o->hmap_node);
147 macro_destroy (macro);
149 hmap_destroy (&set->macros);
154 hash_macro_name (const char *name)
156 return utf8_hash_case_string (name, 0);
159 static struct macro *
160 macro_set_find__ (struct macro_set *set, const char *name)
163 HMAP_FOR_EACH_WITH_HASH (macro, struct macro, hmap_node,
164 hash_macro_name (name), &set->macros)
165 if (!utf8_strcasecmp (macro->name, name))
172 macro_set_find (const struct macro_set *set, const char *name)
174 return macro_set_find__ (CONST_CAST (struct macro_set *, set), name);
177 /* Adds M to SET. M replaces any existing macro with the same name. Takes
180 macro_set_add (struct macro_set *set, struct macro *m)
182 struct macro *victim = macro_set_find__ (set, m->name);
185 hmap_delete (&set->macros, &victim->hmap_node);
186 macro_destroy (victim);
189 hmap_insert (&set->macros, &m->hmap_node, hash_macro_name (m->name));
197 /* Accumulating tokens in me->params toward the end of any type of
201 /* Expecting the opening delimiter of an ARG_ENCLOSE argument. */
204 /* Expecting a keyword for a keyword argument. */
207 /* Expecting an equal sign for a keyword argument. */
212 struct macro_expander
214 const struct macro_set *macros;
219 const struct macro *macro;
220 struct macro_tokens **args;
221 const struct macro_param *param;
225 me_finished (struct macro_expander *me)
227 for (size_t i = 0; i < me->macro->n_params; i++)
230 me->args[i] = xmalloc (sizeof *me->args[i]);
231 macro_tokens_copy (me->args[i], &me->macro->params[i].def);
237 me_next_arg (struct macro_expander *me)
241 assert (!me->macro->n_params);
242 return me_finished (me);
244 else if (me->param->positional)
247 if (me->param >= &me->macro->params[me->macro->n_params])
248 return me_finished (me);
251 me->state = me->param->positional ? ME_ARG : ME_KEYWORD;
257 for (size_t i = 0; i < me->macro->n_params; i++)
260 me->state = ME_KEYWORD;
263 return me_finished (me);
268 me_error (struct macro_expander *me)
270 me->state = ME_ERROR;
275 me_add_arg (struct macro_expander *me, const struct macro_token *mt)
277 const struct token *token = &mt->token;
278 if (token->type == T_STOP)
280 msg (SE, _("Unexpected end of file reading argument %s "
281 "to macro %s."), me->param->name, me->macro->name);
283 return me_error (me);
288 const struct macro_param *p = me->param;
289 struct macro_tokens **argp = &me->args[p - me->macro->params];
291 *argp = xzalloc (sizeof **argp);
292 struct macro_tokens *arg = *argp;
293 if (p->arg_type == ARG_N_TOKENS)
295 macro_tokens_add (arg, mt);
296 if (arg->n >= p->n_tokens)
297 return me_next_arg (me);
300 else if (p->arg_type == ARG_CMDEND)
302 if (token->type == T_ENDCMD || token->type == T_STOP)
303 return me_next_arg (me);
304 macro_tokens_add (arg, mt);
309 const struct token *end
310 = p->arg_type == ARG_CMDEND ? &p->charend : &p->enclose[1];
311 if (token_equal (token, end))
312 return me_next_arg (me);
313 macro_tokens_add (arg, mt);
319 me_expected (struct macro_expander *me, const struct macro_token *actual,
320 const struct token *expected)
322 const struct substring actual_s
323 = (actual->representation.length ? actual->representation
324 : ss_cstr (_("<end of input>")));
325 char *expected_s = token_to_string (expected);
326 msg (SE, _("Found `%.*s' while expecting `%s' reading argument %s "
328 (int) actual_s.length, actual_s.string, expected_s,
329 me->param->name, me->macro->name);
332 return me_error (me);
336 me_enclose (struct macro_expander *me, const struct macro_token *mt)
338 const struct token *token = &mt->token;
341 if (token_equal (&me->param->enclose[0], token))
347 return me_expected (me, mt, &me->param->enclose[0]);
350 static const struct macro_param *
351 macro_find_parameter_by_name (const struct macro *m, struct substring name)
353 for (size_t i = 0; i < m->n_params; i++)
355 const struct macro_param *p = &m->params[i];
356 struct substring p_name = ss_cstr (p->name);
357 if (!utf8_strncasecmp (p_name.string, p_name.length,
358 name.string, name.length))
365 me_keyword (struct macro_expander *me, const struct macro_token *mt)
367 const struct token *token = &mt->token;
368 if (token->type != T_ID)
369 return me_finished (me);
371 const struct macro_param *p = macro_find_parameter_by_name (me->macro,
375 size_t arg_index = p - me->macro->params;
377 if (me->args[arg_index])
380 _("Argument %s multiply specified in call to macro %s."),
381 p->name, me->macro->name);
382 return me_error (me);
386 me->state = ME_EQUALS;
390 return me_finished (me);
394 me_equals (struct macro_expander *me, const struct macro_token *mt)
396 const struct token *token = &mt->token;
399 if (token->type == T_EQUALS)
405 return me_expected (me, mt, &(struct token) { .type = T_EQUALS });
409 macro_expander_create (const struct macro_set *macros,
410 const struct token *token,
411 struct macro_expander **mep)
414 if (macro_set_is_empty (macros))
416 if (token->type != T_ID && token->type != T_MACRO_ID)
419 const struct macro *macro = macro_set_find (macros, token->string.string);
423 struct macro_expander *me = xmalloc (sizeof *me);
424 *me = (struct macro_expander) {
431 if (!macro->n_params)
435 me->state = macro->params[0].positional ? ME_ARG : ME_KEYWORD;
436 me->args = xcalloc (macro->n_params, sizeof *me->args);
437 me->param = macro->params;
443 macro_expander_destroy (struct macro_expander *me)
448 for (size_t i = 0; i < me->macro->n_params; i++)
451 macro_tokens_uninit (me->args[i]);
458 /* Adds TOKEN to the collection of tokens in ME that potentially need to be
461 Returns -1 if the tokens added do not actually invoke a macro. The caller
462 should consume the first token without expanding it.
464 Returns 0 if the macro expander needs more tokens, for macro arguments or to
465 decide whether this is actually a macro invocation. The caller should call
466 macro_expander_add() again with the next token.
468 Returns a positive number to indicate that the returned number of tokens
469 invoke a macro. The number returned might be less than the number of tokens
470 added because it can take a few tokens of lookahead to determine whether the
471 macro invocation is finished. The caller should call
472 macro_expander_get_expansion() to obtain the expansion. */
474 macro_expander_add (struct macro_expander *me, const struct macro_token *mt)
482 return me_add_arg (me, mt);
485 return me_enclose (me, mt);
488 return me_keyword (me, mt);
491 return me_equals (me, mt);
498 /* Each argument to a macro function is one of:
500 - A quoted string or other single literal token.
502 - An argument to the macro being expanded, e.g. !1 or a named argument.
506 - A function invocation.
508 Each function invocation yields a character sequence to be turned into a
509 sequence of tokens. The case where that character sequence is a single
510 quoted string is an important special case.
512 struct parse_macro_function_ctx
514 struct macro_token *input;
516 int nesting_countdown;
517 const struct macro_set *macros;
518 const struct macro_expander *me;
523 macro_expand (const struct macro_tokens *,
524 int nesting_countdown, const struct macro_set *,
525 const struct macro_expander *, bool *expand, struct macro_tokens *exp);
528 expand_macro_function (struct parse_macro_function_ctx *ctx,
529 struct macro_token *output,
530 size_t *input_consumed);
533 parse_function_arg (struct parse_macro_function_ctx *ctx,
534 size_t i, struct macro_token *farg)
536 struct macro_token *tokens = ctx->input;
537 const struct token *token = &tokens[i].token;
538 if (token->type == T_MACRO_ID)
540 const struct macro_param *param = macro_find_parameter_by_name (
541 ctx->me->macro, token->string);
544 size_t param_idx = param - ctx->me->macro->params;
545 const struct macro_tokens *marg = ctx->me->args[param_idx];
547 macro_token_copy (farg, &marg->mts[0]);
550 struct string s = DS_EMPTY_INITIALIZER;
551 for (size_t i = 0; i < marg->n; i++)
554 ds_put_byte (&s, ' ');
555 ds_put_substring (&s, marg->mts[i].representation);
558 struct substring s_copy;
559 ss_alloc_substring (&s_copy, s.ss);
561 *farg = (struct macro_token) {
562 .token = { .type = T_MACRO_ID, .string = s.ss },
563 .representation = s_copy,
569 struct parse_macro_function_ctx subctx = {
570 .input = &ctx->input[i],
571 .n_input = ctx->n_input - i,
572 .nesting_countdown = ctx->nesting_countdown,
573 .macros = ctx->macros,
575 .expand = ctx->expand,
577 size_t subinput_consumed;
578 if (expand_macro_function (&subctx, farg, &subinput_consumed))
579 return subinput_consumed;
582 macro_token_copy (farg, &tokens[i]);
587 parse_macro_function (struct parse_macro_function_ctx *ctx,
588 struct macro_tokens *args,
589 struct substring function,
590 int min_args, int max_args,
591 size_t *input_consumed)
593 struct macro_token *tokens = ctx->input;
594 size_t n_tokens = ctx->n_input;
597 || tokens[0].token.type != T_MACRO_ID
598 || !ss_equals_case (tokens[0].token.string, function))
601 if (n_tokens < 2 || tokens[1].token.type != T_LPAREN)
603 printf ("`(' expected following %s'\n", function.string);
607 *args = (struct macro_tokens) { .n = 0 };
609 for (size_t i = 2;; )
613 if (tokens[i].token.type == T_RPAREN)
615 *input_consumed = i + 1;
616 if (args->n < min_args || args->n > max_args)
618 printf ("Wrong number of arguments to %s.\n", function.string);
624 i += parse_function_arg (ctx, i, macro_tokens_add_uninit (args));
628 if (tokens[i].token.type == T_COMMA)
630 else if (tokens[i].token.type != T_RPAREN)
632 printf ("Expecting `,' or `)' in %s invocation.", function.string);
638 printf ("Missing closing parenthesis in arguments to %s.\n",
642 macro_tokens_uninit (args);
647 expand_macro_function (struct parse_macro_function_ctx *ctx,
648 struct macro_token *output,
649 size_t *input_consumed)
651 struct macro_tokens args;
653 if (parse_macro_function (ctx, &args, ss_cstr ("!length"), 1, 1,
656 size_t length = args.mts[0].representation.length;
657 *output = (struct macro_token) {
658 .token = { .type = T_POS_NUM, .number = length },
659 .representation = ss_cstr (xasprintf ("%zu", length)),
662 else if (parse_macro_function (ctx, &args, ss_cstr ("!blanks"), 1, 1,
665 /* XXX this isn't right, it might be a character string containing a
666 positive integer, e.g. via !CONCAT. */
667 if (args.mts[0].token.type != T_POS_NUM)
669 printf ("argument to !BLANKS must be positive integer\n");
670 macro_token_uninit (output);
674 struct string s = DS_EMPTY_INITIALIZER;
675 ds_put_byte_multiple (&s, ' ', args.mts[0].token.number);
677 *output = (struct macro_token) {
678 .token = { .type = T_ID, .string = s.ss },
679 .representation = s.ss,
682 else if (parse_macro_function (ctx, &args, ss_cstr ("!concat"), 1, INT_MAX,
686 bool all_strings = true;
687 for (size_t i = 0; i < args.n; i++)
689 if (args.mts[i].token.type == T_STRING)
690 ds_put_substring (&s, args.mts[i].token.string);
694 ds_put_substring (&s, args.mts[i].representation);
700 *output = (struct macro_token) {
701 .token = { .type = T_STRING, .string = s.ss },
703 output->representation = ss_cstr (token_to_string (&output->token));
707 *output = (struct macro_token) {
708 .token = { .type = T_MACRO_ID /*XXX*/, .string = s.ss },
710 ss_alloc_substring (&output->representation, s.ss);
716 macro_tokens_uninit (&args);
721 macro_expand (const struct macro_tokens *mts,
722 int nesting_countdown, const struct macro_set *macros,
723 const struct macro_expander *me, bool *expand,
724 struct macro_tokens *exp)
726 if (nesting_countdown <= 0)
728 printf ("maximum nesting level exceeded\n");
729 for (size_t i = 0; i < mts->n; i++)
730 macro_tokens_add (exp, &mts->mts[i]);
734 for (size_t i = 0; i < mts->n; i++)
736 const struct macro_token *mt = &mts->mts[i];
737 const struct token *token = &mt->token;
738 if (token->type == T_MACRO_ID && me)
740 const struct macro_param *param = macro_find_parameter_by_name (
741 me->macro, token->string);
744 const struct macro_tokens *arg = me->args[param - me->macro->params];
745 //macro_tokens_print (arg, stdout);
746 if (*expand && param->expand_arg)
747 macro_expand (arg, nesting_countdown, macros, NULL, expand, exp);
749 for (size_t i = 0; i < arg->n; i++)
750 macro_tokens_add (exp, &arg->mts[i]);
757 struct macro_expander *subme;
758 int retval = macro_expander_create (macros, token, &subme);
759 for (size_t j = 1; !retval; j++)
761 const struct macro_token stop = { .token = { .type = T_STOP } };
762 retval = macro_expander_add (
763 subme, i + j < mts->n ? &mts->mts[i + j] : &stop);
768 macro_expand (&subme->macro->body, nesting_countdown - 1, macros,
770 macro_expander_destroy (subme);
774 macro_expander_destroy (subme);
777 if (token->type != T_MACRO_ID)
779 macro_tokens_add (exp, mt);
783 /* Maybe each arg should just be a string, either a quoted string or a
784 non-quoted string containing tokens. */
785 struct parse_macro_function_ctx ctx = {
786 .input = &mts->mts[i],
787 .n_input = mts->n - i,
788 .nesting_countdown = nesting_countdown,
793 struct macro_token function_output;
794 size_t function_consumed;
795 if (expand_macro_function (&ctx, &function_output, &function_consumed))
797 i += function_consumed - 1;
804 if (ss_equals_case (token->string, ss_cstr ("!onexpand")))
806 else if (ss_equals_case (token->string, ss_cstr ("!offexpand")))
809 macro_tokens_add (exp, mt);
814 macro_expander_get_expansion (struct macro_expander *me, struct macro_tokens *exp)
817 for (size_t i = 0; i < me->macro->n_params; i++)
819 printf ("%s:\n", me->macro->params[i].name);
820 macro_tokens_print (me->args[i], stdout);
825 macro_expand (&me->macro->body, settings_get_mnest (),
826 me->macros, me, &expand, exp);
829 printf ("expansion:\n");
830 macro_tokens_print (exp, stdout);