From f38e4f61c183eef493b8d2717bf0a9702b22769b Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 9 May 2021 20:41:06 -0700 Subject: [PATCH] work on macro functions --- src/language/lexer/macro.c | 325 +++++++++++++++++++++++-------------- src/language/lexer/macro.h | 1 + 2 files changed, 205 insertions(+), 121 deletions(-) diff --git a/src/language/lexer/macro.c b/src/language/lexer/macro.c index e0b964fd69..05e3b45b83 100644 --- a/src/language/lexer/macro.c +++ b/src/language/lexer/macro.c @@ -18,6 +18,7 @@ #include "language/lexer/macro.h" +#include #include #include "data/settings.h" @@ -65,12 +66,18 @@ macro_tokens_uninit (struct macro_tokens *mts) free (mts->mts); } -void -macro_tokens_add (struct macro_tokens *mts, const struct macro_token *mt) +struct macro_token * +macro_tokens_add_uninit (struct macro_tokens *mts) { if (mts->n >= mts->allocated) mts->mts = x2nrealloc (mts->mts, &mts->allocated, sizeof *mts->mts); - macro_token_copy (&mts->mts[mts->n++], mt); + return &mts->mts[mts->n++]; +} + +void +macro_tokens_add (struct macro_tokens *mts, const struct macro_token *mt) +{ + macro_token_copy (macro_tokens_add_uninit (mts), mt); } void @@ -488,11 +495,24 @@ macro_expander_add (struct macro_expander *me, const struct macro_token *mt) } } +/* Each argument to a macro function is one of: + + - A quoted string or other single literal token. + + - An argument to the macro being expanded, e.g. !1 or a named argument. + + - !*. + + - A function invocation. + + Each function invocation yields a character sequence to be turned into a + sequence of tokens. The case where that character sequence is a single + quoted string is an important special case. +*/ struct parse_macro_function_ctx { - const struct macro_tokens *mts; - size_t *idx; - struct macro_tokens *args; + struct macro_token *input; + size_t n_input; int nesting_countdown; const struct macro_set *macros; const struct macro_expander *me; @@ -504,92 +524,197 @@ macro_expand (const struct macro_tokens *, int nesting_countdown, const struct macro_set *, const struct macro_expander *, bool *expand, struct macro_tokens *exp); +static bool +expand_macro_function (struct parse_macro_function_ctx *ctx, + struct macro_token *output, + size_t *input_consumed); + +static size_t +parse_function_arg (struct parse_macro_function_ctx *ctx, + size_t i, struct macro_token *farg) +{ + struct macro_token *tokens = ctx->input; + const struct token *token = &tokens[i].token; + if (token->type == T_MACRO_ID) + { + const struct macro_param *param = macro_find_parameter_by_name ( + ctx->me->macro, token->string); + if (param) + { + size_t param_idx = param - ctx->me->macro->params; + const struct macro_tokens *marg = ctx->me->args[param_idx]; + if (marg->n == 1) + macro_token_copy (farg, &marg->mts[0]); + else + { + struct string s = DS_EMPTY_INITIALIZER; + for (size_t i = 0; i < marg->n; i++) + { + if (i) + ds_put_byte (&s, ' '); + ds_put_substring (&s, marg->mts[i].representation); + } + + struct substring s_copy; + ss_alloc_substring (&s_copy, s.ss); + + *farg = (struct macro_token) { + .token = { .type = T_MACRO_ID, .string = s.ss }, + .representation = s_copy, + }; + } + return 1; + } + + struct parse_macro_function_ctx subctx = { + .input = &ctx->input[i], + .n_input = ctx->n_input - i, + .nesting_countdown = ctx->nesting_countdown, + .macros = ctx->macros, + .me = ctx->me, + .expand = ctx->expand, + }; + size_t subinput_consumed; + if (expand_macro_function (&subctx, farg, &subinput_consumed)) + return subinput_consumed; + } + + macro_token_copy (farg, &tokens[i]); + return 1; +} + static bool parse_macro_function (struct parse_macro_function_ctx *ctx, + struct macro_tokens *args, struct substring function, - int min_args, int max_args) + int min_args, int max_args, + size_t *input_consumed) { - struct macro_token *tokens = ctx->mts->mts; - size_t n_tokens = ctx->mts->n; + struct macro_token *tokens = ctx->input; + size_t n_tokens = ctx->n_input; - if (!ss_equals_case (tokens[0].token.string, function)) + if (!n_tokens + || tokens[0].token.type != T_MACRO_ID + || !ss_equals_case (tokens[0].token.string, function)) return false; - size_t lparen_idx = *ctx->idx + 1; - if (lparen_idx >= n_tokens || tokens[lparen_idx].token.type != T_LPAREN) + if (n_tokens < 2 || tokens[1].token.type != T_LPAREN) { printf ("`(' expected following %s'\n", function.string); return false; } - *ctx->args = (struct macro_tokens) { .n = 0 }; + *args = (struct macro_tokens) { .n = 0 }; - size_t i = lparen_idx + 1; - for (size_t j = i; ; j++) + for (size_t i = 2;; ) { - if (j >= n_tokens) + if (i >= n_tokens) + goto unexpected_end; + if (tokens[i].token.type == T_RPAREN) { - printf ("Missing closing parenthesis in arguments to %s.\n", - function.string); - goto error; + *input_consumed = i + 1; + if (args->n < min_args || args->n > max_args) + { + printf ("Wrong number of arguments to %s.\n", function.string); + goto error; + } + return true; } - int type = tokens[j].token.type; - if (type == T_LPAREN) + i += parse_function_arg (ctx, i, macro_tokens_add_uninit (args)); + if (i >= n_tokens) + goto unexpected_end; + + if (tokens[i].token.type == T_COMMA) + i++; + else if (tokens[i].token.type != T_RPAREN) { - int paren_nesting_level = 1; - do - { - j++; - if (j >= n_tokens) - { - printf ("Missing closing parenthesis in argument %zu to %s.\n", - ctx->args->n + 1, function.string); - goto error; - } - if (tokens[j].token.type == T_LPAREN) - paren_nesting_level++; - else if (tokens[j].token.type == T_RPAREN) - paren_nesting_level--; - } - while (paren_nesting_level != 0); + printf ("Expecting `,' or `)' in %s invocation.", function.string); + goto error; } - else if (type == T_RPAREN || type == T_COMMA) + } + +unexpected_end: + printf ("Missing closing parenthesis in arguments to %s.\n", + function.string); + /* Fall through. */ +error: + macro_tokens_uninit (args); + return false; +} + +static bool +expand_macro_function (struct parse_macro_function_ctx *ctx, + struct macro_token *output, + size_t *input_consumed) +{ + struct macro_tokens args; + + if (parse_macro_function (ctx, &args, ss_cstr ("!length"), 1, 1, + input_consumed)) + { + size_t length = args.mts[0].representation.length; + *output = (struct macro_token) { + .token = { .type = T_POS_NUM, .number = length }, + .representation = ss_cstr (xasprintf ("%zu", length)), + }; + } + else if (parse_macro_function (ctx, &args, ss_cstr ("!blanks"), 1, 1, + input_consumed)) + { + /* XXX this isn't right, it might be a character string containing a + positive integer, e.g. via !CONCAT. */ + if (args.mts[0].token.type != T_POS_NUM) { - struct macro_tokens expanded_arg = { .n = 0 }; - macro_expand (&(const struct macro_tokens) { .mts = &tokens[i], .n = j - i }, - ctx->nesting_countdown, ctx->macros, - ctx->me, ctx->expand, &expanded_arg); + printf ("argument to !BLANKS must be positive integer\n"); + macro_token_uninit (output); + return false; + } + + struct string s = DS_EMPTY_INITIALIZER; + ds_put_byte_multiple (&s, ' ', args.mts[0].token.number); - if (expanded_arg.n != 1) + *output = (struct macro_token) { + .token = { .type = T_ID, .string = s.ss }, + .representation = s.ss, + }; + } + else if (parse_macro_function (ctx, &args, ss_cstr ("!concat"), 1, INT_MAX, + input_consumed)) + { + struct string s; + bool all_strings = true; + for (size_t i = 0; i < args.n; i++) + { + if (args.mts[i].token.type == T_STRING) + ds_put_substring (&s, args.mts[i].token.string); + else { - printf ("argument %zu to %s must be a single token " - "(not %zu tokens)\n", ctx->args->n + 1, function.string, - expanded_arg.n); - macro_tokens_uninit (&expanded_arg); - goto error; + all_strings = false; + ds_put_substring (&s, args.mts[i].representation); } + } - macro_tokens_add (ctx->args, &expanded_arg.mts[0]); - macro_tokens_uninit (&expanded_arg); - - i = j + 1; - if (type == T_RPAREN) - break; + if (all_strings) + { + *output = (struct macro_token) { + .token = { .type = T_STRING, .string = s.ss }, + }; + output->representation = ss_cstr (token_to_string (&output->token)); + } + else + { + *output = (struct macro_token) { + .token = { .type = T_MACRO_ID /*XXX*/, .string = s.ss }, + }; + ss_alloc_substring (&output->representation, s.ss); } } + else + return false; - if (ctx->args->n < min_args || ctx->args->n > max_args) - { - printf ("Wrong number of argument to %s.\n", function.string); - goto error; - } - *ctx->idx = i; + macro_tokens_uninit (&args); return true; - -error: - macro_tokens_uninit (ctx->args); - return false; } static void @@ -616,9 +741,8 @@ macro_expand (const struct macro_tokens *mts, me->macro, token->string); if (param) { - printf ("expand %s to:\n", param->name); const struct macro_tokens *arg = me->args[param - me->macro->params]; - macro_tokens_print (arg, stdout); + //macro_tokens_print (arg, stdout); if (*expand && param->expand_arg) macro_expand (arg, nesting_countdown, macros, NULL, expand, exp); else @@ -656,73 +780,28 @@ macro_expand (const struct macro_tokens *mts, continue; } -#if 0 - struct macro_function - { - const char *name; - int min_args; - int max_args; - }; - static const struct macro_function functions[] = { - { "!length", 1, 1 }, - { "!concat", 1, INT_MAX }, - { "!substr", 2, 3 }, - { "!index", 2, 2 }, - { "!head", 1, 1 }, - { "!tail", 1, 1 }, - { "!quote", 1, 1 }, - { "!unquote", 1, 1 }, - { "!upcase", 1, 1 }, - { "!blanks", 1, 1 }, - { "!eval", 1, 1 }, - }; -#endif /* Maybe each arg should just be a string, either a quoted string or a non-quoted string containing tokens. */ - struct macro_tokens args; struct parse_macro_function_ctx ctx = { - .mts = mts, - .idx = &i, - .args = &args, + .input = &mts->mts[i], + .n_input = mts->n - i, .nesting_countdown = nesting_countdown, .macros = macros, .me = me, .expand = expand, }; - if (parse_macro_function (&ctx, ss_cstr ("!length"), 1, 1)) - { - size_t length = args.mts[0].representation.length; - struct macro_token mt = { - .token = { .type = T_POS_NUM, .number = length }, - .representation = ss_cstr (xasprintf ("%zu", length)), - }; - macro_tokens_add (exp, &mt); - macro_token_uninit (&mt); - - macro_tokens_uninit (&args); - } - else if (parse_macro_function (&ctx, ss_cstr ("!blanks"), 1, 1)) + struct macro_token function_output; + size_t function_consumed; + if (expand_macro_function (&ctx, &function_output, &function_consumed)) { - /* XXX this isn't right, it might be a character string containing a - positive integer, e.g. via !CONCAT. */ - if (args.mts[0].token.type != T_POS_NUM) - printf ("argument to !BLANKS must be positive integer\n"); - else - { - struct string s = DS_EMPTY_INITIALIZER; - ds_put_byte_multiple (&s, ' ', args.mts[0].token.number); + i += function_consumed - 1; - struct macro_token mt = { - .token = { .type = T_ID, .string = s.ss }, - .representation = s.ss - }; - macro_tokens_add (exp, &mt); + - ds_destroy (&s); - } - macro_tokens_uninit (&args); + continue; } - else if (ss_equals_case (token->string, ss_cstr ("!onexpand"))) + + if (ss_equals_case (token->string, ss_cstr ("!onexpand"))) *expand = true; else if (ss_equals_case (token->string, ss_cstr ("!offexpand"))) *expand = false; @@ -734,17 +813,21 @@ macro_expand (const struct macro_tokens *mts, void macro_expander_get_expansion (struct macro_expander *me, struct macro_tokens *exp) { +#if 0 for (size_t i = 0; i < me->macro->n_params; i++) { printf ("%s:\n", me->macro->params[i].name); macro_tokens_print (me->args[i], stdout); } +#endif bool expand = true; macro_expand (&me->macro->body, settings_get_mnest (), me->macros, me, &expand, exp); +#if 0 printf ("expansion:\n"); macro_tokens_print (exp, stdout); +#endif } diff --git a/src/language/lexer/macro.h b/src/language/lexer/macro.h index 61cfbc0937..6eb239240a 100644 --- a/src/language/lexer/macro.h +++ b/src/language/lexer/macro.h @@ -44,6 +44,7 @@ struct macro_tokens void macro_tokens_copy (struct macro_tokens *, const struct macro_tokens *); void macro_tokens_uninit (struct macro_tokens *); +struct macro_token *macro_tokens_add_uninit (struct macro_tokens *); void macro_tokens_add (struct macro_tokens *, const struct macro_token *); void macro_tokens_print (const struct macro_tokens *, FILE *); -- 2.30.2