#include "language/lexer/macro.h"
+#include <limits.h>
#include <stdlib.h>
#include "data/settings.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
+void
+macro_token_copy (struct macro_token *dst, const struct macro_token *src)
+{
+ token_copy (&dst->token, &src->token);
+ ss_alloc_substring (&dst->representation, src->representation);
+}
+
+void
+macro_token_uninit (struct macro_token *mt)
+{
+ token_uninit (&mt->token);
+ ss_dealloc (&mt->representation);
+}
+
+void
+macro_tokens_copy (struct macro_tokens *dst, const struct macro_tokens *src)
+{
+ *dst = (struct macro_tokens) {
+ .mts = xmalloc (src->n * sizeof *dst->mts),
+ .n = src->n,
+ .allocated = src->n,
+ };
+ for (size_t i = 0; i < src->n; i++)
+ macro_token_copy (&dst->mts[i], &src->mts[i]);
+}
+
+void
+macro_tokens_uninit (struct macro_tokens *mts)
+{
+ for (size_t i = 0; i < mts->n; i++)
+ macro_token_uninit (&mts->mts[i]);
+ free (mts->mts);
+}
+
+struct macro_token *
+macro_tokens_add_uninit (struct macro_tokens *mts)
+{
+ if (mts->n >= mts->allocated)
+ mts->mts = x2nrealloc (mts->mts, &mts->allocated, sizeof *mts->mts);
+ return &mts->mts[mts->n++];
+}
+
+void
+macro_tokens_add (struct macro_tokens *mts, const struct macro_token *mt)
+{
+ macro_token_copy (macro_tokens_add_uninit (mts), mt);
+}
+
+void
+macro_tokens_from_string (struct macro_tokens *mts, const struct substring src,
+ enum segmenter_mode mode)
+{
+ struct state
+ {
+ struct segmenter segmenter;
+ struct substring body;
+ };
+
+ struct state state = {
+ .segmenter = SEGMENTER_INIT (mode),
+ .body = src,
+ };
+ struct state saved = state;
+
+ while (state.body.length > 0)
+ {
+ struct macro_token mt = {
+ .token = { .type = T_STOP },
+ .representation = { .string = state.body.string },
+ };
+ struct token *token = &mt.token;
+
+ struct scanner scanner;
+ scanner_init (&scanner, token);
+
+ for (;;)
+ {
+ enum segment_type type;
+ int seg_len = segmenter_push (&state.segmenter, state.body.string,
+ state.body.length, true, &type);
+ assert (seg_len >= 0);
+
+ struct substring segment = ss_head (state.body, seg_len);
+ ss_advance (&state.body, seg_len);
+
+ enum scan_result result = scanner_push (&scanner, type, segment, token);
+ if (result == SCAN_SAVE)
+ saved = state;
+ else if (result == SCAN_BACK)
+ {
+ state = saved;
+ break;
+ }
+ else if (result == SCAN_DONE)
+ break;
+ }
+
+ /* We have a token in 'token'. */
+ if (is_scan_type (token->type))
+ {
+ if (token->type != SCAN_SKIP)
+ {
+ /* XXX report error */
+ }
+ }
+ else
+ {
+ mt.representation.length = state.body.string - mt.representation.string;
+ macro_tokens_add (mts, &mt);
+ }
+ token_uninit (token);
+ }
+}
+
+void
+macro_tokens_print (const struct macro_tokens *mts, FILE *stream)
+{
+ for (size_t i = 0; i < mts->n; i++)
+ token_print (&mts->mts[i].token, stream);
+}
+
void
macro_destroy (struct macro *m)
{
struct macro_param *p = &m->params[i];
free (p->name);
- tokens_uninit (&p->def);
+ macro_tokens_uninit (&p->def);
switch (p->arg_type)
{
break;
case ARG_CHAREND:
- token_destroy (&p->charend);
+ token_uninit (&p->charend);
break;
case ARG_ENCLOSE:
- token_destroy (&p->enclose[0]);
- token_destroy (&p->enclose[1]);
+ token_uninit (&p->enclose[0]);
+ token_uninit (&p->enclose[1]);
break;
case ARG_CMDEND:
}
}
free (m->params);
- ss_dealloc (&m->body);
- tokens_uninit (&m->body_tokens);
+ macro_tokens_uninit (&m->body);
free (m);
}
\f
size_t n_tokens;
const struct macro *macro;
- struct tokens **args;
+ struct macro_tokens **args;
const struct macro_param *param;
};
if (!me->args[i])
{
me->args[i] = xmalloc (sizeof *me->args[i]);
- tokens_copy (me->args[i], &me->macro->params[i].def);
+ macro_tokens_copy (me->args[i], &me->macro->params[i].def);
}
return me->n_tokens;
}
}
static int
-me_add_arg (struct macro_expander *me, const struct token *token)
+me_add_arg (struct macro_expander *me, const struct macro_token *mt)
{
+ const struct token *token = &mt->token;
if (token->type == T_STOP)
{
msg (SE, _("Unexpected end of file reading argument %s "
me->n_tokens++;
const struct macro_param *p = me->param;
- struct tokens **argp = &me->args[p - me->macro->params];
+ struct macro_tokens **argp = &me->args[p - me->macro->params];
if (!*argp)
*argp = xzalloc (sizeof **argp);
- struct tokens *arg = *argp;
+ struct macro_tokens *arg = *argp;
if (p->arg_type == ARG_N_TOKENS)
{
- tokens_add (arg, token);
+ macro_tokens_add (arg, mt);
if (arg->n >= p->n_tokens)
return me_next_arg (me);
return 0;
{
if (token->type == T_ENDCMD || token->type == T_STOP)
return me_next_arg (me);
- tokens_add (arg, token);
+ macro_tokens_add (arg, mt);
return 0;
}
else
= p->arg_type == ARG_CMDEND ? &p->charend : &p->enclose[1];
if (token_equal (token, end))
return me_next_arg (me);
- tokens_add (arg, token);
+ macro_tokens_add (arg, mt);
return 0;
}
}
static int
-me_expected (struct macro_expander *me, const struct token *token,
- const struct token *wanted)
-{
- char *actual = token_to_string (token);
- if (!actual)
- actual = xstrdup ("<eof>");
- char *expected = token_to_string (wanted);
- msg (SE, _("Found `%s' while expecting `%s' reading argument %s "
+me_expected (struct macro_expander *me, const struct macro_token *actual,
+ const struct token *expected)
+{
+ const struct substring actual_s
+ = (actual->representation.length ? actual->representation
+ : ss_cstr (_("<end of input>")));
+ char *expected_s = token_to_string (expected);
+ msg (SE, _("Found `%.*s' while expecting `%s' reading argument %s "
"to macro %s."),
- actual, expected, me->param->name, me->macro->name);
- free (expected);
- free (actual);
+ (int) actual_s.length, actual_s.string, expected_s,
+ me->param->name, me->macro->name);
+ free (expected_s);
return me_error (me);
}
static int
-me_enclose (struct macro_expander *me, const struct token *token)
+me_enclose (struct macro_expander *me, const struct macro_token *mt)
{
+ const struct token *token = &mt->token;
me->n_tokens++;
if (token_equal (&me->param->enclose[0], token))
return 0;
}
- return me_expected (me, token, &me->param->enclose[0]);
+ return me_expected (me, mt, &me->param->enclose[0]);
}
static const struct macro_param *
}
static int
-me_keyword (struct macro_expander *me, const struct token *token)
+me_keyword (struct macro_expander *me, const struct macro_token *mt)
{
+ const struct token *token = &mt->token;
if (token->type != T_ID)
return me_finished (me);
}
static int
-me_equals (struct macro_expander *me, const struct token *token)
+me_equals (struct macro_expander *me, const struct macro_token *mt)
{
+ const struct token *token = &mt->token;
me->n_tokens++;
if (token->type == T_EQUALS)
return 0;
}
- const struct token equals = { .type = T_EQUALS };
- return me_expected (me, token, &equals);
+ return me_expected (me, mt, &(struct token) { .type = T_EQUALS });
}
int
for (size_t i = 0; i < me->macro->n_params; i++)
if (me->args[i])
{
- tokens_uninit (me->args[i]);
+ macro_tokens_uninit (me->args[i]);
free (me->args[i]);
}
free (me->args);
macro invocation is finished. The caller should call
macro_expander_get_expansion() to obtain the expansion. */
int
-macro_expander_add (struct macro_expander *me, const struct token *token)
+macro_expander_add (struct macro_expander *me, const struct macro_token *mt)
{
switch (me->state)
{
return -1;
case ME_ARG:
- return me_add_arg (me, token);
+ return me_add_arg (me, mt);
case ME_ENCLOSE:
- return me_enclose (me, token);
+ return me_enclose (me, mt);
case ME_KEYWORD:
- return me_keyword (me, token);
+ return me_keyword (me, mt);
case ME_EQUALS:
- return me_equals (me, token);
+ return me_equals (me, mt);
default:
NOT_REACHED ();
}
}
+/* Each argument to a macro function is one of:
+
+ - A quoted string or other single literal token.
+
+ - An argument to the macro being expanded, e.g. !1 or a named argument.
+
+ - !*.
+
+ - A function invocation.
+
+ Each function invocation yields a character sequence to be turned into a
+ sequence of tokens. The case where that character sequence is a single
+ quoted string is an important special case.
+*/
struct parse_macro_function_ctx
{
- const struct tokens *tokens;
- size_t *idx;
- struct tokens *args;
+ struct macro_token *input;
+ size_t n_input;
int nesting_countdown;
const struct macro_set *macros;
const struct macro_expander *me;
};
static void
-macro_expand (const struct tokens *tokens, int nesting_countdown,
- const struct macro_set *macros, const struct macro_expander *me,
- bool *expand, struct tokens *exp);
+macro_expand (const struct macro_tokens *,
+ int nesting_countdown, const struct macro_set *,
+ const struct macro_expander *, bool *expand, struct macro_tokens *exp);
+
+static bool
+expand_macro_function (struct parse_macro_function_ctx *ctx,
+ struct macro_token *output,
+ size_t *input_consumed);
+
+static size_t
+parse_function_arg (struct parse_macro_function_ctx *ctx,
+ size_t i, struct macro_token *farg)
+{
+ struct macro_token *tokens = ctx->input;
+ const struct token *token = &tokens[i].token;
+ if (token->type == T_MACRO_ID)
+ {
+ const struct macro_param *param = macro_find_parameter_by_name (
+ ctx->me->macro, token->string);
+ if (param)
+ {
+ size_t param_idx = param - ctx->me->macro->params;
+ const struct macro_tokens *marg = ctx->me->args[param_idx];
+ if (marg->n == 1)
+ macro_token_copy (farg, &marg->mts[0]);
+ else
+ {
+ struct string s = DS_EMPTY_INITIALIZER;
+ for (size_t i = 0; i < marg->n; i++)
+ {
+ if (i)
+ ds_put_byte (&s, ' ');
+ ds_put_substring (&s, marg->mts[i].representation);
+ }
+
+ struct substring s_copy;
+ ss_alloc_substring (&s_copy, s.ss);
+
+ *farg = (struct macro_token) {
+ .token = { .type = T_MACRO_ID, .string = s.ss },
+ .representation = s_copy,
+ };
+ }
+ return 1;
+ }
+
+ struct parse_macro_function_ctx subctx = {
+ .input = &ctx->input[i],
+ .n_input = ctx->n_input - i,
+ .nesting_countdown = ctx->nesting_countdown,
+ .macros = ctx->macros,
+ .me = ctx->me,
+ .expand = ctx->expand,
+ };
+ size_t subinput_consumed;
+ if (expand_macro_function (&subctx, farg, &subinput_consumed))
+ return subinput_consumed;
+ }
+
+ macro_token_copy (farg, &tokens[i]);
+ return 1;
+}
static bool
parse_macro_function (struct parse_macro_function_ctx *ctx,
+ struct macro_tokens *args,
struct substring function,
- int min_args, int max_args)
+ int min_args, int max_args,
+ size_t *input_consumed)
{
- const struct token *tokens = ctx->tokens->tokens;
- size_t n_tokens = ctx->tokens->n;
+ struct macro_token *tokens = ctx->input;
+ size_t n_tokens = ctx->n_input;
- if (!ss_equals_case (tokens[0].string, function))
+ if (!n_tokens
+ || tokens[0].token.type != T_MACRO_ID
+ || !ss_equals_case (tokens[0].token.string, function))
return false;
- size_t lparen_idx = *ctx->idx + 1;
- if (lparen_idx >= n_tokens || tokens[lparen_idx].type != T_LPAREN)
+ if (n_tokens < 2 || tokens[1].token.type != T_LPAREN)
{
printf ("`(' expected following %s'\n", function.string);
return false;
}
- *ctx->args = (struct tokens) { .n = 0 };
+ *args = (struct macro_tokens) { .n = 0 };
- size_t i = lparen_idx + 1;
- for (size_t j = i; ; j++)
+ for (size_t i = 2;; )
{
- if (j >= n_tokens)
+ if (i >= n_tokens)
+ goto unexpected_end;
+ if (tokens[i].token.type == T_RPAREN)
{
- printf ("Missing closing parenthesis in arguments to %s.\n",
- function.string);
- goto error;
+ *input_consumed = i + 1;
+ if (args->n < min_args || args->n > max_args)
+ {
+ printf ("Wrong number of arguments to %s.\n", function.string);
+ goto error;
+ }
+ return true;
}
- int type = tokens[j].type;
- if (type == T_LPAREN)
+ i += parse_function_arg (ctx, i, macro_tokens_add_uninit (args));
+ if (i >= n_tokens)
+ goto unexpected_end;
+
+ if (tokens[i].token.type == T_COMMA)
+ i++;
+ else if (tokens[i].token.type != T_RPAREN)
{
- int paren_nesting_level = 1;
- do
- {
- j++;
- if (j >= n_tokens)
- {
- printf ("Missing closing parenthesis in argument %zu to %s.\n",
- ctx->args->n + 1, function.string);
- goto error;
- }
- if (tokens[j].type == T_LPAREN)
- paren_nesting_level++;
- else if (tokens[j].type == T_RPAREN)
- paren_nesting_level--;
- }
- while (paren_nesting_level != 0);
+ printf ("Expecting `,' or `)' in %s invocation.", function.string);
+ goto error;
}
- else if (type == T_RPAREN || type == T_COMMA)
+ }
+
+unexpected_end:
+ printf ("Missing closing parenthesis in arguments to %s.\n",
+ function.string);
+ /* Fall through. */
+error:
+ macro_tokens_uninit (args);
+ return false;
+}
+
+static bool
+expand_macro_function (struct parse_macro_function_ctx *ctx,
+ struct macro_token *output,
+ size_t *input_consumed)
+{
+ struct macro_tokens args;
+
+ if (parse_macro_function (ctx, &args, ss_cstr ("!length"), 1, 1,
+ input_consumed))
+ {
+ size_t length = args.mts[0].representation.length;
+ *output = (struct macro_token) {
+ .token = { .type = T_POS_NUM, .number = length },
+ .representation = ss_cstr (xasprintf ("%zu", length)),
+ };
+ }
+ else if (parse_macro_function (ctx, &args, ss_cstr ("!blanks"), 1, 1,
+ input_consumed))
+ {
+ /* XXX this isn't right, it might be a character string containing a
+ positive integer, e.g. via !CONCAT. */
+ if (args.mts[0].token.type != T_POS_NUM)
{
- const struct tokens unexpanded_arg = {
- .tokens = CONST_CAST (struct token *, &tokens[i]),
- .n = j - i,
- };
- struct tokens expanded_arg = { .n = 0 };
- macro_expand (&unexpanded_arg, ctx->nesting_countdown, ctx->macros,
- ctx->me, ctx->expand, &expanded_arg);
+ printf ("argument to !BLANKS must be positive integer\n");
+ macro_tokens_uninit (&args);
+ return false;
+ }
+
+ struct string s = DS_EMPTY_INITIALIZER;
+ ds_put_byte_multiple (&s, ' ', args.mts[0].token.number);
+
+ struct substring s_copy;
+ ss_alloc_substring (&s_copy, s.ss);
- if (expanded_arg.n != 1)
+ *output = (struct macro_token) {
+ .token = { .type = T_ID, .string = s.ss },
+ .representation = s_copy,
+ };
+ }
+ else if (parse_macro_function (ctx, &args, ss_cstr ("!concat"), 1, INT_MAX,
+ input_consumed))
+ {
+ struct string s;
+ bool all_strings = true;
+ for (size_t i = 0; i < args.n; i++)
+ {
+ if (args.mts[i].token.type == T_STRING)
+ ds_put_substring (&s, args.mts[i].token.string);
+ else
{
- printf ("argument %zu to %s must be a single token "
- "(not %zu tokens)\n", ctx->args->n + 1, function.string,
- expanded_arg.n);
- tokens_uninit (&expanded_arg);
- goto error;
+ all_strings = false;
+ ds_put_substring (&s, args.mts[i].representation);
}
+ }
- tokens_add (ctx->args, &expanded_arg.tokens[0]);
- tokens_uninit (&expanded_arg);
-
- i = j + 1;
- if (type == T_RPAREN)
- break;
+ if (all_strings)
+ {
+ *output = (struct macro_token) {
+ .token = { .type = T_STRING, .string = s.ss },
+ };
+ output->representation = ss_cstr (token_to_string (&output->token));
+ }
+ else
+ {
+ *output = (struct macro_token) {
+ .token = { .type = T_MACRO_ID /*XXX*/, .string = s.ss },
+ };
+ ss_alloc_substring (&output->representation, s.ss);
}
}
-
- if (ctx->args->n < min_args || ctx->args->n > max_args)
+ else if (parse_macro_function (ctx, &args, ss_cstr ("!quote"), 1, 1,
+ input_consumed))
{
- printf ("Wrong number of argument to %s.\n", function.string);
- goto error;
+ if (args.mts[0].token.type == T_STRING)
+ macro_token_copy (output, &args.mts[0]);
+ else
+ {
+ *output = (struct macro_token) { .token = { .type = T_STRING } };
+ ss_alloc_substring (&output->token.string, args.mts[0].representation);
+ output->representation = ss_cstr (token_to_string (&output->token));
+ }
}
- *ctx->idx = i;
- return true;
+ else if (parse_macro_function (ctx, &args, ss_cstr ("!unquote"), 1, 1,
+ input_consumed))
+ {
+ if (args.mts[0].token.type == T_STRING)
+ {
+ *output = (struct macro_token) { .token = { .type = T_MACRO_ID } };
+ ss_alloc_substring (&output->token.string, args.mts[0].token.string);
+ output->representation = ss_cstr (token_to_string (&output->token));
+ }
+ else
+ macro_token_copy (output, &args.mts[0]);
+ }
+ else
+ return false;
-error:
- tokens_uninit (ctx->args);
- return false;
+ macro_tokens_uninit (&args);
+ return true;
}
static void
-macro_expand (const struct tokens *tokens, int nesting_countdown,
- const struct macro_set *macros, const struct macro_expander *me,
- bool *expand, struct tokens *exp)
+macro_expand (const struct macro_tokens *mts,
+ int nesting_countdown, const struct macro_set *macros,
+ const struct macro_expander *me, bool *expand,
+ struct macro_tokens *exp)
{
if (nesting_countdown <= 0)
{
printf ("maximum nesting level exceeded\n");
- for (size_t i = 0; i < tokens->n; i++)
- tokens_add (exp, &tokens->tokens[i]);
+ for (size_t i = 0; i < mts->n; i++)
+ macro_tokens_add (exp, &mts->mts[i]);
return;
}
- for (size_t i = 0; i < tokens->n; i++)
+ for (size_t i = 0; i < mts->n; i++)
{
- const struct token *token = &tokens->tokens[i];
+ const struct macro_token *mt = &mts->mts[i];
+ const struct token *token = &mt->token;
if (token->type == T_MACRO_ID && me)
{
const struct macro_param *param = macro_find_parameter_by_name (
me->macro, token->string);
if (param)
{
- printf ("expand %s to:\n", param->name);
- const struct tokens *arg = me->args[param - me->macro->params];
- tokens_print (arg, stdout);
+ const struct macro_tokens *arg = me->args[param - me->macro->params];
+ //macro_tokens_print (arg, stdout);
if (*expand && param->expand_arg)
macro_expand (arg, nesting_countdown, macros, NULL, expand, exp);
else
for (size_t i = 0; i < arg->n; i++)
- tokens_add (exp, &arg->tokens[i]);
+ macro_tokens_add (exp, &arg->mts[i]);
continue;
}
}
int retval = macro_expander_create (macros, token, &subme);
for (size_t j = 1; !retval; j++)
{
- static const struct token stop = { .type = T_STOP };
+ const struct macro_token stop = { .token = { .type = T_STOP } };
retval = macro_expander_add (
- subme, i + j < tokens->n ? &tokens->tokens[i + j] : &stop);
+ subme, i + j < mts->n ? &mts->mts[i + j] : &stop);
}
if (retval > 0)
{
i += retval - 1;
- macro_expand (&subme->macro->body_tokens, nesting_countdown - 1,
- macros, subme, expand, exp);
+ macro_expand (&subme->macro->body, nesting_countdown - 1, macros,
+ subme, expand, exp);
macro_expander_destroy (subme);
continue;
}
if (token->type != T_MACRO_ID)
{
- tokens_add (exp, token);
+ macro_tokens_add (exp, mt);
continue;
}
-#if 0
- struct macro_function
- {
- const char *name;
- int min_args;
- int max_args;
- };
- static const struct macro_function functions[] = {
- { "!length", 1, 1 },
- { "!concat", 1, INT_MAX },
- { "!substr", 2, 3 },
- { "!index", 2, 2 },
- { "!head", 1, 1 },
- { "!tail", 1, 1 },
- { "!quote", 1, 1 },
- { "!unquote", 1, 1 },
- { "!upcase", 1, 1 },
- { "!blanks", 1, 1 },
- { "!eval", 1, 1 },
- };
-#endif
- struct tokens args;
+ /* Maybe each arg should just be a string, either a quoted string or a
+ non-quoted string containing tokens. */
struct parse_macro_function_ctx ctx = {
- .tokens = tokens,
- .idx = &i,
- .args = &args,
+ .input = &mts->mts[i],
+ .n_input = mts->n - i,
.nesting_countdown = nesting_countdown,
.macros = macros,
.me = me,
.expand = expand,
};
- if (parse_macro_function (&ctx, ss_cstr ("!length"), 1, 1))
+ struct macro_token function_output;
+ size_t function_consumed;
+ if (expand_macro_function (&ctx, &function_output, &function_consumed))
{
- char *s = token_to_string (&args.tokens[0]);
- struct token t = { .type = T_POS_NUM, .number = strlen (s) };
- tokens_add (exp, &t);
- free (s);
+ i += function_consumed - 1;
- tokens_uninit (&args);
- }
- else if (parse_macro_function (&ctx, ss_cstr ("!blanks"), 1, 1))
- {
- if (args.tokens[0].type != T_POS_NUM)
- printf ("argument to !BLANKS must be positive integer\n");
+ if (function_output.token.type == T_MACRO_ID)
+ macro_tokens_from_string (exp, function_output.token.string,
+ SEG_MODE_INTERACTIVE /* XXX */);
else
- {
- struct string s = DS_EMPTY_INITIALIZER;
- ds_put_byte_multiple (&s, ' ', args.tokens[0].number);
- struct token t = { .type = T_ID, .string = s.ss };
- tokens_add (exp, &t);
- ds_destroy (&s);
- }
- tokens_uninit (&args);
+ macro_tokens_add (exp, &function_output);
+ macro_token_uninit (&function_output);
+
+ continue;
}
- else if (ss_equals_case (token->string, ss_cstr ("!onexpand")))
+
+ if (ss_equals_case (token->string, ss_cstr ("!onexpand")))
*expand = true;
else if (ss_equals_case (token->string, ss_cstr ("!offexpand")))
*expand = false;
else
- tokens_add (exp, token);
+ macro_tokens_add (exp, mt);
}
}
-
void
-macro_expander_get_expansion (struct macro_expander *me, struct tokens *exp)
+macro_expander_get_expansion (struct macro_expander *me, struct macro_tokens *exp)
{
+#if 0
for (size_t i = 0; i < me->macro->n_params; i++)
{
printf ("%s:\n", me->macro->params[i].name);
- tokens_print (me->args[i], stdout);
+ macro_tokens_print (me->args[i], stdout);
}
+#endif
bool expand = true;
- macro_expand (&me->macro->body_tokens, settings_get_mnest (),
+ macro_expand (&me->macro->body, settings_get_mnest (),
me->macros, me, &expand, exp);
+#if 0
printf ("expansion:\n");
- tokens_print (exp, stdout);
+ macro_tokens_print (exp, stdout);
+#endif
}