1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/lexer/macro.h"
23 #include "data/settings.h"
24 #include "language/lexer/segment.h"
25 #include "language/lexer/scan.h"
26 #include "libpspp/assertion.h"
27 #include "libpspp/i18n.h"
28 #include "libpspp/message.h"
29 #include "libpspp/str.h"
32 #define _(msgid) gettext (msgid)
35 macro_token_copy (struct macro_token *dst, const struct macro_token *src)
37 token_copy (&dst->token, &src->token);
38 ss_alloc_substring (&dst->representation, src->representation);
42 macro_token_uninit (struct macro_token *mt)
44 token_uninit (&mt->token);
45 ss_dealloc (&mt->representation);
49 macro_tokens_copy (struct macro_tokens *dst, const struct macro_tokens *src)
51 *dst = (struct macro_tokens) {
52 .mts = xmalloc (src->n * sizeof *dst->mts),
56 for (size_t i = 0; i < src->n; i++)
57 macro_token_copy (&dst->mts[i], &src->mts[i]);
61 macro_tokens_uninit (struct macro_tokens *mts)
63 for (size_t i = 0; i < mts->n; i++)
64 macro_token_uninit (&mts->mts[i]);
69 macro_tokens_add (struct macro_tokens *mts, const struct macro_token *mt)
71 if (mts->n >= mts->allocated)
72 mts->mts = x2nrealloc (mts->mts, &mts->allocated, sizeof *mts->mts);
73 macro_token_copy (&mts->mts[mts->n++], mt);
77 macro_tokens_print (const struct macro_tokens *mts, FILE *stream)
79 for (size_t i = 0; i < mts->n; i++)
80 token_print (&mts->mts[i].token, stream);
84 macro_destroy (struct macro *m)
90 for (size_t i = 0; i < m->n_params; i++)
92 struct macro_param *p = &m->params[i];
95 macro_tokens_uninit (&p->def);
103 token_uninit (&p->charend);
107 token_uninit (&p->enclose[0]);
108 token_uninit (&p->enclose[1]);
116 macro_tokens_uninit (&m->body);
121 macro_set_create (void)
123 struct macro_set *set = xmalloc (sizeof *set);
124 *set = (struct macro_set) {
125 .macros = HMAP_INITIALIZER (set->macros),
131 macro_set_destroy (struct macro_set *set)
136 struct macro *macro, *next;
137 HMAP_FOR_EACH_SAFE (macro, next, struct macro, hmap_node, &set->macros)
139 hmap_delete (&set->macros, ¯o->hmap_node);
140 macro_destroy (macro);
142 hmap_destroy (&set->macros);
147 hash_macro_name (const char *name)
149 return utf8_hash_case_string (name, 0);
152 static struct macro *
153 macro_set_find__ (struct macro_set *set, const char *name)
156 HMAP_FOR_EACH_WITH_HASH (macro, struct macro, hmap_node,
157 hash_macro_name (name), &set->macros)
158 if (!utf8_strcasecmp (macro->name, name))
165 macro_set_find (const struct macro_set *set, const char *name)
167 return macro_set_find__ (CONST_CAST (struct macro_set *, set), name);
170 /* Adds M to SET. M replaces any existing macro with the same name. Takes
173 macro_set_add (struct macro_set *set, struct macro *m)
175 struct macro *victim = macro_set_find__ (set, m->name);
178 hmap_delete (&set->macros, &victim->hmap_node);
179 macro_destroy (victim);
182 hmap_insert (&set->macros, &m->hmap_node, hash_macro_name (m->name));
190 /* Accumulating tokens in me->params toward the end of any type of
194 /* Expecting the opening delimiter of an ARG_ENCLOSE argument. */
197 /* Expecting a keyword for a keyword argument. */
200 /* Expecting an equal sign for a keyword argument. */
205 struct macro_expander
207 const struct macro_set *macros;
212 const struct macro *macro;
213 struct macro_tokens **args;
214 const struct macro_param *param;
218 me_finished (struct macro_expander *me)
220 for (size_t i = 0; i < me->macro->n_params; i++)
223 me->args[i] = xmalloc (sizeof *me->args[i]);
224 macro_tokens_copy (me->args[i], &me->macro->params[i].def);
230 me_next_arg (struct macro_expander *me)
234 assert (!me->macro->n_params);
235 return me_finished (me);
237 else if (me->param->positional)
240 if (me->param >= &me->macro->params[me->macro->n_params])
241 return me_finished (me);
244 me->state = me->param->positional ? ME_ARG : ME_KEYWORD;
250 for (size_t i = 0; i < me->macro->n_params; i++)
253 me->state = ME_KEYWORD;
256 return me_finished (me);
261 me_error (struct macro_expander *me)
263 me->state = ME_ERROR;
268 me_add_arg (struct macro_expander *me, const struct macro_token *mt)
270 const struct token *token = &mt->token;
271 if (token->type == T_STOP)
273 msg (SE, _("Unexpected end of file reading argument %s "
274 "to macro %s."), me->param->name, me->macro->name);
276 return me_error (me);
281 const struct macro_param *p = me->param;
282 struct macro_tokens **argp = &me->args[p - me->macro->params];
284 *argp = xzalloc (sizeof **argp);
285 struct macro_tokens *arg = *argp;
286 if (p->arg_type == ARG_N_TOKENS)
288 macro_tokens_add (arg, mt);
289 if (arg->n >= p->n_tokens)
290 return me_next_arg (me);
293 else if (p->arg_type == ARG_CMDEND)
295 if (token->type == T_ENDCMD || token->type == T_STOP)
296 return me_next_arg (me);
297 macro_tokens_add (arg, mt);
302 const struct token *end
303 = p->arg_type == ARG_CMDEND ? &p->charend : &p->enclose[1];
304 if (token_equal (token, end))
305 return me_next_arg (me);
306 macro_tokens_add (arg, mt);
312 me_expected (struct macro_expander *me, const struct macro_token *actual,
313 const struct token *expected)
315 const struct substring actual_s
316 = (actual->representation.length ? actual->representation
317 : ss_cstr (_("<end of input>")));
318 char *expected_s = token_to_string (expected);
319 msg (SE, _("Found `%.*s' while expecting `%s' reading argument %s "
321 (int) actual_s.length, actual_s.string, expected_s,
322 me->param->name, me->macro->name);
325 return me_error (me);
329 me_enclose (struct macro_expander *me, const struct macro_token *mt)
331 const struct token *token = &mt->token;
334 if (token_equal (&me->param->enclose[0], token))
340 return me_expected (me, mt, &me->param->enclose[0]);
343 static const struct macro_param *
344 macro_find_parameter_by_name (const struct macro *m, struct substring name)
346 for (size_t i = 0; i < m->n_params; i++)
348 const struct macro_param *p = &m->params[i];
349 struct substring p_name = ss_cstr (p->name);
350 if (!utf8_strncasecmp (p_name.string, p_name.length,
351 name.string, name.length))
358 me_keyword (struct macro_expander *me, const struct macro_token *mt)
360 const struct token *token = &mt->token;
361 if (token->type != T_ID)
362 return me_finished (me);
364 const struct macro_param *p = macro_find_parameter_by_name (me->macro,
368 size_t arg_index = p - me->macro->params;
370 if (me->args[arg_index])
373 _("Argument %s multiply specified in call to macro %s."),
374 p->name, me->macro->name);
375 return me_error (me);
379 me->state = ME_EQUALS;
383 return me_finished (me);
387 me_equals (struct macro_expander *me, const struct macro_token *mt)
389 const struct token *token = &mt->token;
392 if (token->type == T_EQUALS)
398 return me_expected (me, mt, &(struct token) { .type = T_EQUALS });
402 macro_expander_create (const struct macro_set *macros,
403 const struct token *token,
404 struct macro_expander **mep)
407 if (macro_set_is_empty (macros))
409 if (token->type != T_ID && token->type != T_MACRO_ID)
412 const struct macro *macro = macro_set_find (macros, token->string.string);
416 struct macro_expander *me = xmalloc (sizeof *me);
417 *me = (struct macro_expander) {
424 if (!macro->n_params)
428 me->state = macro->params[0].positional ? ME_ARG : ME_KEYWORD;
429 me->args = xcalloc (macro->n_params, sizeof *me->args);
430 me->param = macro->params;
436 macro_expander_destroy (struct macro_expander *me)
441 for (size_t i = 0; i < me->macro->n_params; i++)
444 macro_tokens_uninit (me->args[i]);
451 /* Adds TOKEN to the collection of tokens in ME that potentially need to be
454 Returns -1 if the tokens added do not actually invoke a macro. The caller
455 should consume the first token without expanding it.
457 Returns 0 if the macro expander needs more tokens, for macro arguments or to
458 decide whether this is actually a macro invocation. The caller should call
459 macro_expander_add() again with the next token.
461 Returns a positive number to indicate that the returned number of tokens
462 invoke a macro. The number returned might be less than the number of tokens
463 added because it can take a few tokens of lookahead to determine whether the
464 macro invocation is finished. The caller should call
465 macro_expander_get_expansion() to obtain the expansion. */
467 macro_expander_add (struct macro_expander *me, const struct macro_token *mt)
475 return me_add_arg (me, mt);
478 return me_enclose (me, mt);
481 return me_keyword (me, mt);
484 return me_equals (me, mt);
491 struct parse_macro_function_ctx
493 const struct macro_tokens *mts;
495 struct macro_tokens *args;
496 int nesting_countdown;
497 const struct macro_set *macros;
498 const struct macro_expander *me;
503 macro_expand (const struct macro_tokens *,
504 int nesting_countdown, const struct macro_set *,
505 const struct macro_expander *, bool *expand, struct macro_tokens *exp);
508 parse_macro_function (struct parse_macro_function_ctx *ctx,
509 struct substring function,
510 int min_args, int max_args)
512 struct macro_token *tokens = ctx->mts->mts;
513 size_t n_tokens = ctx->mts->n;
515 if (!ss_equals_case (tokens[0].token.string, function))
518 size_t lparen_idx = *ctx->idx + 1;
519 if (lparen_idx >= n_tokens || tokens[lparen_idx].token.type != T_LPAREN)
521 printf ("`(' expected following %s'\n", function.string);
525 *ctx->args = (struct macro_tokens) { .n = 0 };
527 size_t i = lparen_idx + 1;
528 for (size_t j = i; ; j++)
532 printf ("Missing closing parenthesis in arguments to %s.\n",
537 int type = tokens[j].token.type;
538 if (type == T_LPAREN)
540 int paren_nesting_level = 1;
546 printf ("Missing closing parenthesis in argument %zu to %s.\n",
547 ctx->args->n + 1, function.string);
550 if (tokens[j].token.type == T_LPAREN)
551 paren_nesting_level++;
552 else if (tokens[j].token.type == T_RPAREN)
553 paren_nesting_level--;
555 while (paren_nesting_level != 0);
557 else if (type == T_RPAREN || type == T_COMMA)
559 struct macro_tokens expanded_arg = { .n = 0 };
560 macro_expand (&(const struct macro_tokens) { .mts = &tokens[i], .n = j - i },
561 ctx->nesting_countdown, ctx->macros,
562 ctx->me, ctx->expand, &expanded_arg);
564 if (expanded_arg.n != 1)
566 printf ("argument %zu to %s must be a single token "
567 "(not %zu tokens)\n", ctx->args->n + 1, function.string,
569 macro_tokens_uninit (&expanded_arg);
573 macro_tokens_add (ctx->args, &expanded_arg.mts[0]);
574 macro_tokens_uninit (&expanded_arg);
577 if (type == T_RPAREN)
582 if (ctx->args->n < min_args || ctx->args->n > max_args)
584 printf ("Wrong number of argument to %s.\n", function.string);
591 macro_tokens_uninit (ctx->args);
596 macro_expand (const struct macro_tokens *mts,
597 int nesting_countdown, const struct macro_set *macros,
598 const struct macro_expander *me, bool *expand,
599 struct macro_tokens *exp)
601 if (nesting_countdown <= 0)
603 printf ("maximum nesting level exceeded\n");
604 for (size_t i = 0; i < mts->n; i++)
605 macro_tokens_add (exp, &mts->mts[i]);
609 for (size_t i = 0; i < mts->n; i++)
611 const struct macro_token *mt = &mts->mts[i];
612 const struct token *token = &mt->token;
613 if (token->type == T_MACRO_ID && me)
615 const struct macro_param *param = macro_find_parameter_by_name (
616 me->macro, token->string);
619 printf ("expand %s to:\n", param->name);
620 const struct macro_tokens *arg = me->args[param - me->macro->params];
621 macro_tokens_print (arg, stdout);
622 if (*expand && param->expand_arg)
623 macro_expand (arg, nesting_countdown, macros, NULL, expand, exp);
625 for (size_t i = 0; i < arg->n; i++)
626 macro_tokens_add (exp, &arg->mts[i]);
633 struct macro_expander *subme;
634 int retval = macro_expander_create (macros, token, &subme);
635 for (size_t j = 1; !retval; j++)
637 const struct macro_token stop = { .token = { .type = T_STOP } };
638 retval = macro_expander_add (
639 subme, i + j < mts->n ? &mts->mts[i + j] : &stop);
644 macro_expand (&subme->macro->body, nesting_countdown - 1, macros,
646 macro_expander_destroy (subme);
650 macro_expander_destroy (subme);
653 if (token->type != T_MACRO_ID)
655 macro_tokens_add (exp, mt);
660 struct macro_function
666 static const struct macro_function functions[] = {
668 { "!concat", 1, INT_MAX },
674 { "!unquote", 1, 1 },
680 /* Maybe each arg should just be a string, either a quoted string or a
681 non-quoted string containing tokens. */
682 struct macro_tokens args;
683 struct parse_macro_function_ctx ctx = {
687 .nesting_countdown = nesting_countdown,
692 if (parse_macro_function (&ctx, ss_cstr ("!length"), 1, 1))
694 size_t length = args.mts[0].representation.length;
695 struct macro_token mt = {
696 .token = { .type = T_POS_NUM, .number = length },
697 .representation = ss_cstr (xasprintf ("%zu", length)),
699 macro_tokens_add (exp, &mt);
700 macro_token_uninit (&mt);
702 macro_tokens_uninit (&args);
704 else if (parse_macro_function (&ctx, ss_cstr ("!blanks"), 1, 1))
706 /* XXX this isn't right, it might be a character string containing a
707 positive integer, e.g. via !CONCAT. */
708 if (args.mts[0].token.type != T_POS_NUM)
709 printf ("argument to !BLANKS must be positive integer\n");
712 struct string s = DS_EMPTY_INITIALIZER;
713 ds_put_byte_multiple (&s, ' ', args.mts[0].token.number);
715 struct macro_token mt = {
716 .token = { .type = T_ID, .string = s.ss },
717 .representation = s.ss
719 macro_tokens_add (exp, &mt);
723 macro_tokens_uninit (&args);
725 else if (ss_equals_case (token->string, ss_cstr ("!onexpand")))
727 else if (ss_equals_case (token->string, ss_cstr ("!offexpand")))
730 macro_tokens_add (exp, mt);
735 macro_expander_get_expansion (struct macro_expander *me, struct macro_tokens *exp)
737 for (size_t i = 0; i < me->macro->n_params; i++)
739 printf ("%s:\n", me->macro->params[i].name);
740 macro_tokens_print (me->args[i], stdout);
744 macro_expand (&me->macro->body, settings_get_mnest (),
745 me->macros, me, &expand, exp);
747 printf ("expansion:\n");
748 macro_tokens_print (exp, stdout);