1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/lexer/macro.h"
24 #include "data/settings.h"
25 #include "language/lexer/segment.h"
26 #include "language/lexer/scan.h"
27 #include "libpspp/assertion.h"
28 #include "libpspp/i18n.h"
29 #include "libpspp/message.h"
30 #include "libpspp/str.h"
33 #define _(msgid) gettext (msgid)
36 macro_token_copy (struct macro_token *dst, const struct macro_token *src)
38 token_copy (&dst->token, &src->token);
39 ss_alloc_substring (&dst->representation, src->representation);
43 macro_token_uninit (struct macro_token *mt)
45 token_uninit (&mt->token);
46 ss_dealloc (&mt->representation);
50 macro_token_to_representation (struct macro_token *mt, struct string *s)
52 ds_put_substring (s, mt->representation);
56 macro_tokens_copy (struct macro_tokens *dst, const struct macro_tokens *src)
58 *dst = (struct macro_tokens) {
59 .mts = xmalloc (src->n * sizeof *dst->mts),
63 for (size_t i = 0; i < src->n; i++)
64 macro_token_copy (&dst->mts[i], &src->mts[i]);
68 macro_tokens_uninit (struct macro_tokens *mts)
70 for (size_t i = 0; i < mts->n; i++)
71 macro_token_uninit (&mts->mts[i]);
76 macro_tokens_add_uninit (struct macro_tokens *mts)
78 if (mts->n >= mts->allocated)
79 mts->mts = x2nrealloc (mts->mts, &mts->allocated, sizeof *mts->mts);
80 return &mts->mts[mts->n++];
84 macro_tokens_add (struct macro_tokens *mts, const struct macro_token *mt)
86 macro_token_copy (macro_tokens_add_uninit (mts), mt);
90 macro_tokens_from_string (struct macro_tokens *mts, const struct substring src,
91 enum segmenter_mode mode)
95 struct segmenter segmenter;
96 struct substring body;
99 struct state state = {
100 .segmenter = SEGMENTER_INIT (mode),
103 struct state saved = state;
105 while (state.body.length > 0)
107 struct macro_token mt = {
108 .token = { .type = T_STOP },
109 .representation = { .string = state.body.string },
111 struct token *token = &mt.token;
113 struct scanner scanner;
114 scanner_init (&scanner, token);
118 enum segment_type type;
119 int seg_len = segmenter_push (&state.segmenter, state.body.string,
120 state.body.length, true, &type);
121 assert (seg_len >= 0);
123 struct substring segment = ss_head (state.body, seg_len);
124 ss_advance (&state.body, seg_len);
126 enum scan_result result = scanner_push (&scanner, type, segment, token);
127 if (result == SCAN_SAVE)
129 else if (result == SCAN_BACK)
134 else if (result == SCAN_DONE)
138 /* We have a token in 'token'. */
139 if (is_scan_type (token->type))
141 if (token->type != SCAN_SKIP)
143 /* XXX report error */
148 mt.representation.length = state.body.string - mt.representation.string;
149 macro_tokens_add (mts, &mt);
151 token_uninit (token);
156 macro_tokens_print (const struct macro_tokens *mts, FILE *stream)
158 for (size_t i = 0; i < mts->n; i++)
159 token_print (&mts->mts[i].token, stream);
164 TC_ENDCMD, /* No space before or after (new-line after). */
165 TC_BINOP, /* Space on both sides. */
166 TC_COMMA, /* Space afterward. */
167 TC_ID, /* Don't need spaces except sequentially. */
168 TC_PUNCT, /* Don't need spaces except sequentially. */
172 needs_space (enum token_class prev, enum token_class next)
174 /* Don't need a space before or after the end of a command.
175 (A new-line is needed afterward as a special case.) */
176 if (prev == TC_ENDCMD || next == TC_ENDCMD)
179 /* Binary operators always have a space on both sides. */
180 if (prev == TC_BINOP || next == TC_BINOP)
183 /* A comma always has a space afterward. */
184 if (prev == TC_COMMA)
187 /* Otherwise, PREV is TC_ID or TC_PUNCT, which only need a space if there are
188 two or them in a row. */
192 static enum token_class
193 classify_token (enum token_type type)
246 macro_tokens_to_representation (struct macro_tokens *mts, struct string *s)
251 macro_token_to_representation (&mts->mts[0], s);
252 for (size_t i = 1; i < mts->n; i++)
254 enum token_type prev = mts->mts[i - 1].token.type;
255 enum token_type next = mts->mts[i].token.type;
257 if (prev == T_ENDCMD)
258 ds_put_byte (s, '\n');
261 enum token_class pc = classify_token (prev);
262 enum token_class nc = classify_token (next);
263 if (needs_space (pc, nc))
264 ds_put_byte (s, ' ');
267 macro_token_to_representation (&mts->mts[i], s);
272 macro_destroy (struct macro *m)
278 for (size_t i = 0; i < m->n_params; i++)
280 struct macro_param *p = &m->params[i];
283 macro_tokens_uninit (&p->def);
291 token_uninit (&p->charend);
295 token_uninit (&p->enclose[0]);
296 token_uninit (&p->enclose[1]);
304 macro_tokens_uninit (&m->body);
309 macro_set_create (void)
311 struct macro_set *set = xmalloc (sizeof *set);
312 *set = (struct macro_set) {
313 .macros = HMAP_INITIALIZER (set->macros),
319 macro_set_destroy (struct macro_set *set)
324 struct macro *macro, *next;
325 HMAP_FOR_EACH_SAFE (macro, next, struct macro, hmap_node, &set->macros)
327 hmap_delete (&set->macros, ¯o->hmap_node);
328 macro_destroy (macro);
330 hmap_destroy (&set->macros);
335 hash_macro_name (const char *name)
337 return utf8_hash_case_string (name, 0);
340 static struct macro *
341 macro_set_find__ (struct macro_set *set, const char *name)
344 HMAP_FOR_EACH_WITH_HASH (macro, struct macro, hmap_node,
345 hash_macro_name (name), &set->macros)
346 if (!utf8_strcasecmp (macro->name, name))
353 macro_set_find (const struct macro_set *set, const char *name)
355 return macro_set_find__ (CONST_CAST (struct macro_set *, set), name);
358 /* Adds M to SET. M replaces any existing macro with the same name. Takes
361 macro_set_add (struct macro_set *set, struct macro *m)
363 struct macro *victim = macro_set_find__ (set, m->name);
366 hmap_delete (&set->macros, &victim->hmap_node);
367 macro_destroy (victim);
370 hmap_insert (&set->macros, &m->hmap_node, hash_macro_name (m->name));
378 /* Accumulating tokens in me->params toward the end of any type of
382 /* Expecting the opening delimiter of an ARG_ENCLOSE argument. */
385 /* Expecting a keyword for a keyword argument. */
388 /* Expecting an equal sign for a keyword argument. */
393 struct macro_expander
395 const struct macro_set *macros;
400 const struct macro *macro;
401 struct macro_tokens **args;
402 const struct macro_param *param;
406 me_finished (struct macro_expander *me)
408 for (size_t i = 0; i < me->macro->n_params; i++)
411 me->args[i] = xmalloc (sizeof *me->args[i]);
412 macro_tokens_copy (me->args[i], &me->macro->params[i].def);
418 me_next_arg (struct macro_expander *me)
422 assert (!me->macro->n_params);
423 return me_finished (me);
425 else if (me->param->positional)
428 if (me->param >= &me->macro->params[me->macro->n_params])
429 return me_finished (me);
432 me->state = (!me->param->positional ? ME_KEYWORD
433 : me->param->arg_type == ARG_ENCLOSE ? ME_ENCLOSE
440 for (size_t i = 0; i < me->macro->n_params; i++)
443 me->state = ME_KEYWORD;
446 return me_finished (me);
451 me_error (struct macro_expander *me)
453 me->state = ME_ERROR;
458 me_add_arg (struct macro_expander *me, const struct macro_token *mt)
460 const struct macro_param *p = me->param;
462 const struct token *token = &mt->token;
463 if ((token->type == T_ENDCMD || token->type == T_STOP)
464 && p->arg_type != ARG_CMDEND)
466 msg (SE, _("Unexpected end of command reading argument %s "
467 "to macro %s."), me->param->name, me->macro->name);
469 return me_error (me);
474 struct macro_tokens **argp = &me->args[p - me->macro->params];
476 *argp = xzalloc (sizeof **argp);
477 struct macro_tokens *arg = *argp;
478 if (p->arg_type == ARG_N_TOKENS)
480 macro_tokens_add (arg, mt);
481 if (arg->n >= p->n_tokens)
482 return me_next_arg (me);
485 else if (p->arg_type == ARG_CMDEND)
487 if (token->type == T_ENDCMD || token->type == T_STOP)
488 return me_next_arg (me);
489 macro_tokens_add (arg, mt);
494 const struct token *end
495 = p->arg_type == ARG_CHAREND ? &p->charend : &p->enclose[1];
496 if (token_equal (token, end))
497 return me_next_arg (me);
498 macro_tokens_add (arg, mt);
504 me_expected (struct macro_expander *me, const struct macro_token *actual,
505 const struct token *expected)
507 const struct substring actual_s
508 = (actual->representation.length ? actual->representation
509 : ss_cstr (_("<end of input>")));
510 char *expected_s = token_to_string (expected);
511 msg (SE, _("Found `%.*s' while expecting `%s' reading argument %s "
513 (int) actual_s.length, actual_s.string, expected_s,
514 me->param->name, me->macro->name);
517 return me_error (me);
521 me_enclose (struct macro_expander *me, const struct macro_token *mt)
523 const struct token *token = &mt->token;
526 if (token_equal (&me->param->enclose[0], token))
532 return me_expected (me, mt, &me->param->enclose[0]);
535 static const struct macro_param *
536 macro_find_parameter_by_name (const struct macro *m, struct substring name)
538 if (ss_first (name) == '!')
539 ss_advance (&name, 1);
541 for (size_t i = 0; i < m->n_params; i++)
543 const struct macro_param *p = &m->params[i];
544 struct substring p_name = ss_cstr (p->name + 1);
545 if (!utf8_strncasecmp (p_name.string, p_name.length,
546 name.string, name.length))
553 me_keyword (struct macro_expander *me, const struct macro_token *mt)
555 const struct token *token = &mt->token;
556 if (token->type != T_ID)
557 return me_finished (me);
559 const struct macro_param *p = macro_find_parameter_by_name (me->macro,
563 size_t arg_index = p - me->macro->params;
565 if (me->args[arg_index])
568 _("Argument %s multiply specified in call to macro %s."),
569 p->name, me->macro->name);
570 return me_error (me);
574 me->state = ME_EQUALS;
578 return me_finished (me);
582 me_equals (struct macro_expander *me, const struct macro_token *mt)
584 const struct token *token = &mt->token;
587 if (token->type == T_EQUALS)
593 return me_expected (me, mt, &(struct token) { .type = T_EQUALS });
597 macro_expander_create (const struct macro_set *macros,
598 const struct token *token,
599 struct macro_expander **mep)
602 if (macro_set_is_empty (macros))
604 if (token->type != T_ID && token->type != T_MACRO_ID)
607 const struct macro *macro = macro_set_find (macros, token->string.string);
611 struct macro_expander *me = xmalloc (sizeof *me);
612 *me = (struct macro_expander) {
619 if (!macro->n_params)
623 me->state = (!macro->params[0].positional ? ME_KEYWORD
624 : macro->params[0].arg_type == ARG_ENCLOSE ? ME_ENCLOSE
626 me->args = xcalloc (macro->n_params, sizeof *me->args);
627 me->param = macro->params;
633 macro_expander_destroy (struct macro_expander *me)
638 for (size_t i = 0; i < me->macro->n_params; i++)
641 macro_tokens_uninit (me->args[i]);
648 /* Adds TOKEN to the collection of tokens in ME that potentially need to be
651 Returns -1 if the tokens added do not actually invoke a macro. The caller
652 should consume the first token without expanding it.
654 Returns 0 if the macro expander needs more tokens, for macro arguments or to
655 decide whether this is actually a macro invocation. The caller should call
656 macro_expander_add() again with the next token.
658 Returns a positive number to indicate that the returned number of tokens
659 invoke a macro. The number returned might be less than the number of tokens
660 added because it can take a few tokens of lookahead to determine whether the
661 macro invocation is finished. The caller should call
662 macro_expander_get_expansion() to obtain the expansion. */
664 macro_expander_add (struct macro_expander *me, const struct macro_token *mt)
672 return me_add_arg (me, mt);
675 return me_enclose (me, mt);
678 return me_keyword (me, mt);
681 return me_equals (me, mt);
688 /* Each argument to a macro function is one of:
690 - A quoted string or other single literal token.
692 - An argument to the macro being expanded, e.g. !1 or a named argument.
696 - A function invocation.
698 Each function invocation yields a character sequence to be turned into a
699 sequence of tokens. The case where that character sequence is a single
700 quoted string is an important special case.
702 struct parse_macro_function_ctx
704 struct macro_token *input;
706 int nesting_countdown;
707 const struct macro_set *macros;
708 const struct macro_expander *me;
713 macro_expand (const struct macro_tokens *,
714 int nesting_countdown, const struct macro_set *,
715 const struct macro_expander *, bool *expand, struct macro_tokens *exp);
718 expand_macro_function (struct parse_macro_function_ctx *ctx,
719 struct macro_token *output,
720 size_t *input_consumed);
723 parse_function_arg (struct parse_macro_function_ctx *ctx,
724 size_t i, struct macro_token *farg)
726 struct macro_token *tokens = ctx->input;
727 const struct token *token = &tokens[i].token;
728 if (token->type == T_MACRO_ID)
730 const struct macro_param *param = macro_find_parameter_by_name (
731 ctx->me->macro, token->string);
734 size_t param_idx = param - ctx->me->macro->params;
735 const struct macro_tokens *marg = ctx->me->args[param_idx];
737 macro_token_copy (farg, &marg->mts[0]);
740 struct string s = DS_EMPTY_INITIALIZER;
741 for (size_t i = 0; i < marg->n; i++)
744 ds_put_byte (&s, ' ');
745 ds_put_substring (&s, marg->mts[i].representation);
748 struct substring s_copy;
749 ss_alloc_substring (&s_copy, s.ss);
751 *farg = (struct macro_token) {
752 .token = { .type = T_MACRO_ID, .string = s.ss },
753 .representation = s_copy,
759 struct parse_macro_function_ctx subctx = {
760 .input = &ctx->input[i],
761 .n_input = ctx->n_input - i,
762 .nesting_countdown = ctx->nesting_countdown,
763 .macros = ctx->macros,
765 .expand = ctx->expand,
767 size_t subinput_consumed;
768 if (expand_macro_function (&subctx, farg, &subinput_consumed))
769 return subinput_consumed;
772 macro_token_copy (farg, &tokens[i]);
777 parse_macro_function (struct parse_macro_function_ctx *ctx,
778 struct macro_tokens *args,
779 struct substring function,
780 int min_args, int max_args,
781 size_t *input_consumed)
783 struct macro_token *tokens = ctx->input;
784 size_t n_tokens = ctx->n_input;
787 || tokens[0].token.type != T_MACRO_ID
788 || !ss_equals_case (tokens[0].token.string, function))
791 if (n_tokens < 2 || tokens[1].token.type != T_LPAREN)
793 printf ("`(' expected following %s'\n", function.string);
797 *args = (struct macro_tokens) { .n = 0 };
799 for (size_t i = 2;; )
803 if (tokens[i].token.type == T_RPAREN)
805 *input_consumed = i + 1;
806 if (args->n < min_args || args->n > max_args)
808 printf ("Wrong number of arguments to %s.\n", function.string);
814 i += parse_function_arg (ctx, i, macro_tokens_add_uninit (args));
818 if (tokens[i].token.type == T_COMMA)
820 else if (tokens[i].token.type != T_RPAREN)
822 printf ("Expecting `,' or `)' in %s invocation.", function.string);
828 printf ("Missing closing parenthesis in arguments to %s.\n",
832 macro_tokens_uninit (args);
837 expand_macro_function (struct parse_macro_function_ctx *ctx,
838 struct macro_token *output,
839 size_t *input_consumed)
841 struct macro_tokens args;
843 if (parse_macro_function (ctx, &args, ss_cstr ("!length"), 1, 1,
846 size_t length = args.mts[0].representation.length;
847 *output = (struct macro_token) {
848 .token = { .type = T_POS_NUM, .number = length },
849 .representation = ss_cstr (xasprintf ("%zu", length)),
852 else if (parse_macro_function (ctx, &args, ss_cstr ("!blanks"), 1, 1,
855 /* XXX this isn't right, it might be a character string containing a
856 positive integer, e.g. via !CONCAT. */
857 if (args.mts[0].token.type != T_POS_NUM)
859 printf ("argument to !BLANKS must be positive integer\n");
860 macro_tokens_uninit (&args);
864 struct string s = DS_EMPTY_INITIALIZER;
865 ds_put_byte_multiple (&s, ' ', args.mts[0].token.number);
867 struct substring s_copy;
868 ss_alloc_substring (&s_copy, s.ss);
870 *output = (struct macro_token) {
871 .token = { .type = T_ID, .string = s.ss },
872 .representation = s_copy,
875 else if (parse_macro_function (ctx, &args, ss_cstr ("!concat"), 1, INT_MAX,
878 struct string s = DS_EMPTY_INITIALIZER;
879 for (size_t i = 0; i < args.n; i++)
881 if (args.mts[i].token.type == T_STRING)
882 ds_put_substring (&s, args.mts[i].token.string);
884 ds_put_substring (&s, args.mts[i].representation);
887 *output = (struct macro_token) {
888 .token = { .type = T_MACRO_ID /*XXX*/, .string = s.ss },
890 ss_alloc_substring (&output->representation, s.ss);
892 else if (parse_macro_function (ctx, &args, ss_cstr ("!quote"), 1, 1,
895 if (args.mts[0].token.type == T_STRING)
896 macro_token_copy (output, &args.mts[0]);
899 *output = (struct macro_token) { .token = { .type = T_STRING } };
900 ss_alloc_substring (&output->token.string, args.mts[0].representation);
901 output->representation = ss_cstr (token_to_string (&output->token));
904 else if (parse_macro_function (ctx, &args, ss_cstr ("!unquote"), 1, 1,
907 if (args.mts[0].token.type == T_STRING)
909 *output = (struct macro_token) { .token = { .type = T_MACRO_ID } };
910 ss_alloc_substring (&output->token.string, args.mts[0].token.string);
911 output->representation = ss_cstr (token_to_string (&output->token));
914 macro_token_copy (output, &args.mts[0]);
916 else if (ctx->n_input > 0
917 && ctx->input[0].token.type == T_MACRO_ID
918 && ss_equals_case (ctx->input[0].token.string, ss_cstr ("!null")))
921 *output = (struct macro_token) {
922 .token = { .type = T_MACRO_ID /* XXX*/ },
924 ss_alloc_substring (&output->token.string, ss_cstr (""));
930 macro_tokens_uninit (&args);
935 macro_expand (const struct macro_tokens *mts,
936 int nesting_countdown, const struct macro_set *macros,
937 const struct macro_expander *me, bool *expand,
938 struct macro_tokens *exp)
942 - Macro names in macro bodies are not expanded by default. !EVAL()
945 - Macro names in arguments to macro invocations (outside of macro bodies)
946 are expanded by default, unless !NOEXPAND. */
947 if (nesting_countdown <= 0)
949 printf ("maximum nesting level exceeded\n");
950 for (size_t i = 0; i < mts->n; i++)
951 macro_tokens_add (exp, &mts->mts[i]);
955 for (size_t i = 0; i < mts->n; i++)
957 const struct macro_token *mt = &mts->mts[i];
958 const struct token *token = &mt->token;
959 if (token->type == T_MACRO_ID && me)
961 const struct macro_param *param = macro_find_parameter_by_name (
962 me->macro, token->string);
965 const struct macro_tokens *arg = me->args[param - me->macro->params];
966 //macro_tokens_print (arg, stdout);
967 if (*expand && param->expand_arg)
968 macro_expand (arg, nesting_countdown, macros, NULL, expand, exp);
970 for (size_t i = 0; i < arg->n; i++)
971 macro_tokens_add (exp, &arg->mts[i]);
978 struct macro_expander *subme;
979 int retval = macro_expander_create (macros, token, &subme);
980 for (size_t j = 1; !retval; j++)
982 const struct macro_token endcmd = { .token = { .type = T_ENDCMD } };
983 retval = macro_expander_add (
984 subme, i + j < mts->n ? &mts->mts[i + j] : &endcmd);
989 macro_expand (&subme->macro->body, nesting_countdown - 1, macros,
991 macro_expander_destroy (subme);
995 macro_expander_destroy (subme);
998 if (token->type != T_MACRO_ID)
1000 macro_tokens_add (exp, mt);
1004 /* Maybe each arg should just be a string, either a quoted string or a
1005 non-quoted string containing tokens. */
1006 struct parse_macro_function_ctx ctx = {
1007 .input = &mts->mts[i],
1008 .n_input = mts->n - i,
1009 .nesting_countdown = nesting_countdown,
1014 struct macro_token function_output;
1015 size_t function_consumed;
1016 if (expand_macro_function (&ctx, &function_output, &function_consumed))
1018 i += function_consumed - 1;
1020 if (function_output.token.type == T_MACRO_ID)
1021 macro_tokens_from_string (exp, function_output.token.string,
1022 SEG_MODE_INTERACTIVE /* XXX */);
1024 macro_tokens_add (exp, &function_output);
1025 macro_token_uninit (&function_output);
1030 if (ss_equals_case (token->string, ss_cstr ("!onexpand")))
1032 else if (ss_equals_case (token->string, ss_cstr ("!offexpand")))
1035 macro_tokens_add (exp, mt);
1040 macro_expander_get_expansion (struct macro_expander *me, struct macro_tokens *exp)
1043 for (size_t i = 0; i < me->macro->n_params; i++)
1045 printf ("%s:\n", me->macro->params[i].name);
1046 macro_tokens_print (me->args[i], stdout);
1051 macro_expand (&me->macro->body, settings_get_mnest (),
1052 me->macros, me, &expand, exp);
1055 printf ("expansion:\n");
1056 macro_tokens_print (exp, stdout);