X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Flexer%2Fmacro.c;h=a15b7064a5d96537f84998aedd9c0b6ea5ed2bed;hb=refs%2Fheads%2Fdev5;hp=7726e4c3ecd41e86ca08cb3cbc91c16c27f1c34b;hpb=fcb970c24584cf72f46676ff1cb2e08013951392;p=pspp diff --git a/src/language/lexer/macro.c b/src/language/lexer/macro.c index 7726e4c3ec..a15b7064a5 100644 --- a/src/language/lexer/macro.c +++ b/src/language/lexer/macro.c @@ -18,6 +18,7 @@ #include "language/lexer/macro.h" +#include #include #include "data/settings.h" @@ -31,6 +32,127 @@ #include "gettext.h" #define _(msgid) gettext (msgid) +void +macro_token_copy (struct macro_token *dst, const struct macro_token *src) +{ + token_copy (&dst->token, &src->token); + ss_alloc_substring (&dst->representation, src->representation); +} + +void +macro_token_uninit (struct macro_token *mt) +{ + token_uninit (&mt->token); + ss_dealloc (&mt->representation); +} + +void +macro_tokens_copy (struct macro_tokens *dst, const struct macro_tokens *src) +{ + *dst = (struct macro_tokens) { + .mts = xmalloc (src->n * sizeof *dst->mts), + .n = src->n, + .allocated = src->n, + }; + for (size_t i = 0; i < src->n; i++) + macro_token_copy (&dst->mts[i], &src->mts[i]); +} + +void +macro_tokens_uninit (struct macro_tokens *mts) +{ + for (size_t i = 0; i < mts->n; i++) + macro_token_uninit (&mts->mts[i]); + free (mts->mts); +} + +struct macro_token * +macro_tokens_add_uninit (struct macro_tokens *mts) +{ + if (mts->n >= mts->allocated) + mts->mts = x2nrealloc (mts->mts, &mts->allocated, sizeof *mts->mts); + return &mts->mts[mts->n++]; +} + +void +macro_tokens_add (struct macro_tokens *mts, const struct macro_token *mt) +{ + macro_token_copy (macro_tokens_add_uninit (mts), mt); +} + +void +macro_tokens_from_string (struct macro_tokens *mts, const struct substring src, + enum segmenter_mode mode) +{ + struct state + { + struct segmenter segmenter; + struct substring body; + }; + + struct state state = { + .segmenter = SEGMENTER_INIT (mode), + .body = src, + }; + struct state saved = state; + + while (state.body.length > 0) + { + struct macro_token mt = { + .token = { .type = T_STOP }, + .representation = { .string = state.body.string }, + }; + struct token *token = &mt.token; + + struct scanner scanner; + scanner_init (&scanner, token); + + for (;;) + { + enum segment_type type; + int seg_len = segmenter_push (&state.segmenter, state.body.string, + state.body.length, true, &type); + assert (seg_len >= 0); + + struct substring segment = ss_head (state.body, seg_len); + ss_advance (&state.body, seg_len); + + enum scan_result result = scanner_push (&scanner, type, segment, token); + if (result == SCAN_SAVE) + saved = state; + else if (result == SCAN_BACK) + { + state = saved; + break; + } + else if (result == SCAN_DONE) + break; + } + + /* We have a token in 'token'. */ + if (is_scan_type (token->type)) + { + if (token->type != SCAN_SKIP) + { + /* XXX report error */ + } + } + else + { + mt.representation.length = state.body.string - mt.representation.string; + macro_tokens_add (mts, &mt); + } + token_uninit (token); + } +} + +void +macro_tokens_print (const struct macro_tokens *mts, FILE *stream) +{ + for (size_t i = 0; i < mts->n; i++) + token_print (&mts->mts[i].token, stream); +} + void macro_destroy (struct macro *m) { @@ -43,7 +165,7 @@ macro_destroy (struct macro *m) struct macro_param *p = &m->params[i]; free (p->name); - tokens_uninit (&p->def); + macro_tokens_uninit (&p->def); switch (p->arg_type) { @@ -51,12 +173,12 @@ macro_destroy (struct macro *m) break; case ARG_CHAREND: - token_destroy (&p->charend); + token_uninit (&p->charend); break; case ARG_ENCLOSE: - token_destroy (&p->enclose[0]); - token_destroy (&p->enclose[1]); + token_uninit (&p->enclose[0]); + token_uninit (&p->enclose[1]); break; case ARG_CMDEND: @@ -64,8 +186,7 @@ macro_destroy (struct macro *m) } } free (m->params); - ss_dealloc (&m->body); - tokens_uninit (&m->body_tokens); + macro_tokens_uninit (&m->body); free (m); } @@ -162,7 +283,7 @@ struct macro_expander size_t n_tokens; const struct macro *macro; - struct tokens **args; + struct macro_tokens **args; const struct macro_param *param; }; @@ -173,7 +294,7 @@ me_finished (struct macro_expander *me) if (!me->args[i]) { me->args[i] = xmalloc (sizeof *me->args[i]); - tokens_copy (me->args[i], &me->macro->params[i].def); + macro_tokens_copy (me->args[i], &me->macro->params[i].def); } return me->n_tokens; } @@ -217,8 +338,9 @@ me_error (struct macro_expander *me) } static int -me_add_arg (struct macro_expander *me, const struct token *token) +me_add_arg (struct macro_expander *me, const struct macro_token *mt) { + const struct token *token = &mt->token; if (token->type == T_STOP) { msg (SE, _("Unexpected end of file reading argument %s " @@ -230,13 +352,13 @@ me_add_arg (struct macro_expander *me, const struct token *token) me->n_tokens++; const struct macro_param *p = me->param; - struct tokens **argp = &me->args[p - me->macro->params]; + struct macro_tokens **argp = &me->args[p - me->macro->params]; if (!*argp) *argp = xzalloc (sizeof **argp); - struct tokens *arg = *argp; + struct macro_tokens *arg = *argp; if (p->arg_type == ARG_N_TOKENS) { - tokens_add (arg, token); + macro_tokens_add (arg, mt); if (arg->n >= p->n_tokens) return me_next_arg (me); return 0; @@ -245,7 +367,7 @@ me_add_arg (struct macro_expander *me, const struct token *token) { if (token->type == T_ENDCMD || token->type == T_STOP) return me_next_arg (me); - tokens_add (arg, token); + macro_tokens_add (arg, mt); return 0; } else @@ -254,31 +376,32 @@ me_add_arg (struct macro_expander *me, const struct token *token) = p->arg_type == ARG_CMDEND ? &p->charend : &p->enclose[1]; if (token_equal (token, end)) return me_next_arg (me); - tokens_add (arg, token); + macro_tokens_add (arg, mt); return 0; } } static int -me_expected (struct macro_expander *me, const struct token *token, - const struct token *wanted) -{ - char *actual = token_to_string (token); - if (!actual) - actual = xstrdup (""); - char *expected = token_to_string (wanted); - msg (SE, _("Found `%s' while expecting `%s' reading argument %s " +me_expected (struct macro_expander *me, const struct macro_token *actual, + const struct token *expected) +{ + const struct substring actual_s + = (actual->representation.length ? actual->representation + : ss_cstr (_(""))); + char *expected_s = token_to_string (expected); + msg (SE, _("Found `%.*s' while expecting `%s' reading argument %s " "to macro %s."), - actual, expected, me->param->name, me->macro->name); - free (expected); - free (actual); + (int) actual_s.length, actual_s.string, expected_s, + me->param->name, me->macro->name); + free (expected_s); return me_error (me); } static int -me_enclose (struct macro_expander *me, const struct token *token) +me_enclose (struct macro_expander *me, const struct macro_token *mt) { + const struct token *token = &mt->token; me->n_tokens++; if (token_equal (&me->param->enclose[0], token)) @@ -287,7 +410,7 @@ me_enclose (struct macro_expander *me, const struct token *token) return 0; } - return me_expected (me, token, &me->param->enclose[0]); + return me_expected (me, mt, &me->param->enclose[0]); } static const struct macro_param * @@ -305,8 +428,9 @@ macro_find_parameter_by_name (const struct macro *m, struct substring name) } static int -me_keyword (struct macro_expander *me, const struct token *token) +me_keyword (struct macro_expander *me, const struct macro_token *mt) { + const struct token *token = &mt->token; if (token->type != T_ID) return me_finished (me); @@ -333,8 +457,9 @@ me_keyword (struct macro_expander *me, const struct token *token) } static int -me_equals (struct macro_expander *me, const struct token *token) +me_equals (struct macro_expander *me, const struct macro_token *mt) { + const struct token *token = &mt->token; me->n_tokens++; if (token->type == T_EQUALS) @@ -343,8 +468,7 @@ me_equals (struct macro_expander *me, const struct token *token) return 0; } - const struct token equals = { .type = T_EQUALS }; - return me_expected (me, token, &equals); + return me_expected (me, mt, &(struct token) { .type = T_EQUALS }); } int @@ -390,7 +514,7 @@ macro_expander_destroy (struct macro_expander *me) for (size_t i = 0; i < me->macro->n_params; i++) if (me->args[i]) { - tokens_uninit (me->args[i]); + macro_tokens_uninit (me->args[i]); free (me->args[i]); } free (me->args); @@ -413,7 +537,7 @@ macro_expander_destroy (struct macro_expander *me) macro invocation is finished. The caller should call macro_expander_get_expansion() to obtain the expansion. */ int -macro_expander_add (struct macro_expander *me, const struct token *token) +macro_expander_add (struct macro_expander *me, const struct macro_token *mt) { switch (me->state) { @@ -421,27 +545,40 @@ macro_expander_add (struct macro_expander *me, const struct token *token) return -1; case ME_ARG: - return me_add_arg (me, token); + return me_add_arg (me, mt); case ME_ENCLOSE: - return me_enclose (me, token); + return me_enclose (me, mt); case ME_KEYWORD: - return me_keyword (me, token); + return me_keyword (me, mt); case ME_EQUALS: - return me_equals (me, token); + return me_equals (me, mt); default: NOT_REACHED (); } } +/* Each argument to a macro function is one of: + + - A quoted string or other single literal token. + + - An argument to the macro being expanded, e.g. !1 or a named argument. + + - !*. + + - A function invocation. + + Each function invocation yields a character sequence to be turned into a + sequence of tokens. The case where that character sequence is a single + quoted string is an important special case. +*/ struct parse_macro_function_ctx { - const struct tokens *tokens; - size_t *idx; - struct tokens *args; + struct macro_token *input; + size_t n_input; int nesting_countdown; const struct macro_set *macros; const struct macro_expander *me; @@ -449,131 +586,261 @@ struct parse_macro_function_ctx }; static void -macro_expand (const struct tokens *tokens, int nesting_countdown, - const struct macro_set *macros, const struct macro_expander *me, - bool *expand, struct tokens *exp); +macro_expand (const struct macro_tokens *, + int nesting_countdown, const struct macro_set *, + const struct macro_expander *, bool *expand, struct macro_tokens *exp); + +static bool +expand_macro_function (struct parse_macro_function_ctx *ctx, + struct macro_token *output, + size_t *input_consumed); + +static size_t +parse_function_arg (struct parse_macro_function_ctx *ctx, + size_t i, struct macro_token *farg) +{ + struct macro_token *tokens = ctx->input; + const struct token *token = &tokens[i].token; + if (token->type == T_MACRO_ID) + { + const struct macro_param *param = macro_find_parameter_by_name ( + ctx->me->macro, token->string); + if (param) + { + size_t param_idx = param - ctx->me->macro->params; + const struct macro_tokens *marg = ctx->me->args[param_idx]; + if (marg->n == 1) + macro_token_copy (farg, &marg->mts[0]); + else + { + struct string s = DS_EMPTY_INITIALIZER; + for (size_t i = 0; i < marg->n; i++) + { + if (i) + ds_put_byte (&s, ' '); + ds_put_substring (&s, marg->mts[i].representation); + } + + struct substring s_copy; + ss_alloc_substring (&s_copy, s.ss); + + *farg = (struct macro_token) { + .token = { .type = T_MACRO_ID, .string = s.ss }, + .representation = s_copy, + }; + } + return 1; + } + + struct parse_macro_function_ctx subctx = { + .input = &ctx->input[i], + .n_input = ctx->n_input - i, + .nesting_countdown = ctx->nesting_countdown, + .macros = ctx->macros, + .me = ctx->me, + .expand = ctx->expand, + }; + size_t subinput_consumed; + if (expand_macro_function (&subctx, farg, &subinput_consumed)) + return subinput_consumed; + } + + macro_token_copy (farg, &tokens[i]); + return 1; +} static bool parse_macro_function (struct parse_macro_function_ctx *ctx, + struct macro_tokens *args, struct substring function, - int min_args, int max_args) + int min_args, int max_args, + size_t *input_consumed) { - const struct token *tokens = ctx->tokens->tokens; - size_t n_tokens = ctx->tokens->n; + struct macro_token *tokens = ctx->input; + size_t n_tokens = ctx->n_input; - if (!ss_equals_case (tokens[0].string, function)) + if (!n_tokens + || tokens[0].token.type != T_MACRO_ID + || !ss_equals_case (tokens[0].token.string, function)) return false; - size_t lparen_idx = *ctx->idx + 1; - if (lparen_idx >= n_tokens || tokens[lparen_idx].type != T_LPAREN) + if (n_tokens < 2 || tokens[1].token.type != T_LPAREN) { printf ("`(' expected following %s'\n", function.string); return false; } - *ctx->args = (struct tokens) { .n = 0 }; + *args = (struct macro_tokens) { .n = 0 }; - size_t i = lparen_idx + 1; - for (size_t j = i; ; j++) + for (size_t i = 2;; ) { - if (j >= n_tokens) + if (i >= n_tokens) + goto unexpected_end; + if (tokens[i].token.type == T_RPAREN) { - printf ("Missing closing parenthesis in arguments to %s.\n", - function.string); - goto error; + *input_consumed = i + 1; + if (args->n < min_args || args->n > max_args) + { + printf ("Wrong number of arguments to %s.\n", function.string); + goto error; + } + return true; } - int type = tokens[j].type; - if (type == T_LPAREN) + i += parse_function_arg (ctx, i, macro_tokens_add_uninit (args)); + if (i >= n_tokens) + goto unexpected_end; + + if (tokens[i].token.type == T_COMMA) + i++; + else if (tokens[i].token.type != T_RPAREN) { - int paren_nesting_level = 1; - do - { - j++; - if (j >= n_tokens) - { - printf ("Missing closing parenthesis in argument %zu to %s.\n", - ctx->args->n + 1, function.string); - goto error; - } - if (tokens[j].type == T_LPAREN) - paren_nesting_level++; - else if (tokens[j].type == T_RPAREN) - paren_nesting_level--; - } - while (paren_nesting_level != 0); + printf ("Expecting `,' or `)' in %s invocation.", function.string); + goto error; } - else if (type == T_RPAREN || type == T_COMMA) + } + +unexpected_end: + printf ("Missing closing parenthesis in arguments to %s.\n", + function.string); + /* Fall through. */ +error: + macro_tokens_uninit (args); + return false; +} + +static bool +expand_macro_function (struct parse_macro_function_ctx *ctx, + struct macro_token *output, + size_t *input_consumed) +{ + struct macro_tokens args; + + if (parse_macro_function (ctx, &args, ss_cstr ("!length"), 1, 1, + input_consumed)) + { + size_t length = args.mts[0].representation.length; + *output = (struct macro_token) { + .token = { .type = T_POS_NUM, .number = length }, + .representation = ss_cstr (xasprintf ("%zu", length)), + }; + } + else if (parse_macro_function (ctx, &args, ss_cstr ("!blanks"), 1, 1, + input_consumed)) + { + /* XXX this isn't right, it might be a character string containing a + positive integer, e.g. via !CONCAT. */ + if (args.mts[0].token.type != T_POS_NUM) { - const struct tokens unexpanded_arg = { - .tokens = CONST_CAST (struct token *, &tokens[i]), - .n = j - i, - }; - struct tokens expanded_arg = { .n = 0 }; - macro_expand (&unexpanded_arg, ctx->nesting_countdown, ctx->macros, - ctx->me, ctx->expand, &expanded_arg); + printf ("argument to !BLANKS must be positive integer\n"); + macro_tokens_uninit (&args); + return false; + } + + struct string s = DS_EMPTY_INITIALIZER; + ds_put_byte_multiple (&s, ' ', args.mts[0].token.number); + + struct substring s_copy; + ss_alloc_substring (&s_copy, s.ss); - if (expanded_arg.n != 1) + *output = (struct macro_token) { + .token = { .type = T_ID, .string = s.ss }, + .representation = s_copy, + }; + } + else if (parse_macro_function (ctx, &args, ss_cstr ("!concat"), 1, INT_MAX, + input_consumed)) + { + struct string s; + bool all_strings = true; + for (size_t i = 0; i < args.n; i++) + { + if (args.mts[i].token.type == T_STRING) + ds_put_substring (&s, args.mts[i].token.string); + else { - printf ("argument %zu to %s must be a single token " - "(not %zu tokens)\n", ctx->args->n + 1, function.string, - expanded_arg.n); - tokens_uninit (&expanded_arg); - goto error; + all_strings = false; + ds_put_substring (&s, args.mts[i].representation); } + } - tokens_add (ctx->args, &expanded_arg.tokens[0]); - tokens_uninit (&expanded_arg); - - i = j + 1; - if (type == T_RPAREN) - break; + if (all_strings) + { + *output = (struct macro_token) { + .token = { .type = T_STRING, .string = s.ss }, + }; + output->representation = ss_cstr (token_to_string (&output->token)); + } + else + { + *output = (struct macro_token) { + .token = { .type = T_MACRO_ID /*XXX*/, .string = s.ss }, + }; + ss_alloc_substring (&output->representation, s.ss); } } - - if (ctx->args->n < min_args || ctx->args->n > max_args) + else if (parse_macro_function (ctx, &args, ss_cstr ("!quote"), 1, 1, + input_consumed)) { - printf ("Wrong number of argument to %s.\n", function.string); - goto error; + if (args.mts[0].token.type == T_STRING) + macro_token_copy (output, &args.mts[0]); + else + { + *output = (struct macro_token) { .token = { .type = T_STRING } }; + ss_alloc_substring (&output->token.string, args.mts[0].representation); + output->representation = ss_cstr (token_to_string (&output->token)); + } } - *ctx->idx = i; - return true; + else if (parse_macro_function (ctx, &args, ss_cstr ("!unquote"), 1, 1, + input_consumed)) + { + if (args.mts[0].token.type == T_STRING) + { + *output = (struct macro_token) { .token = { .type = T_MACRO_ID } }; + ss_alloc_substring (&output->token.string, args.mts[0].token.string); + output->representation = ss_cstr (token_to_string (&output->token)); + } + else + macro_token_copy (output, &args.mts[0]); + } + else + return false; -error: - tokens_uninit (ctx->args); - return false; + macro_tokens_uninit (&args); + return true; } static void -macro_expand (const struct tokens *tokens, int nesting_countdown, - const struct macro_set *macros, const struct macro_expander *me, - bool *expand, struct tokens *exp) +macro_expand (const struct macro_tokens *mts, + int nesting_countdown, const struct macro_set *macros, + const struct macro_expander *me, bool *expand, + struct macro_tokens *exp) { if (nesting_countdown <= 0) { printf ("maximum nesting level exceeded\n"); - for (size_t i = 0; i < tokens->n; i++) - tokens_add (exp, &tokens->tokens[i]); + for (size_t i = 0; i < mts->n; i++) + macro_tokens_add (exp, &mts->mts[i]); return; } - for (size_t i = 0; i < tokens->n; i++) + for (size_t i = 0; i < mts->n; i++) { - const struct token *token = &tokens->tokens[i]; + const struct macro_token *mt = &mts->mts[i]; + const struct token *token = &mt->token; if (token->type == T_MACRO_ID && me) { const struct macro_param *param = macro_find_parameter_by_name ( me->macro, token->string); if (param) { - printf ("expand %s to:\n", param->name); - const struct tokens *arg = me->args[param - me->macro->params]; - tokens_print (arg, stdout); + const struct macro_tokens *arg = me->args[param - me->macro->params]; + //macro_tokens_print (arg, stdout); if (*expand && param->expand_arg) macro_expand (arg, nesting_countdown, macros, NULL, expand, exp); else for (size_t i = 0; i < arg->n; i++) - tokens_add (exp, &arg->tokens[i]); + macro_tokens_add (exp, &arg->mts[i]); continue; } } @@ -584,15 +851,15 @@ macro_expand (const struct tokens *tokens, int nesting_countdown, int retval = macro_expander_create (macros, token, &subme); for (size_t j = 1; !retval; j++) { - static const struct token stop = { .type = T_STOP }; + const struct macro_token stop = { .token = { .type = T_STOP } }; retval = macro_expander_add ( - subme, i + j < tokens->n ? &tokens->tokens[i + j] : &stop); + subme, i + j < mts->n ? &mts->mts[i + j] : &stop); } if (retval > 0) { i += retval - 1; - macro_expand (&subme->macro->body_tokens, nesting_countdown - 1, - macros, subme, expand, exp); + macro_expand (&subme->macro->body, nesting_countdown - 1, macros, + subme, expand, exp); macro_expander_destroy (subme); continue; } @@ -602,88 +869,63 @@ macro_expand (const struct tokens *tokens, int nesting_countdown, if (token->type != T_MACRO_ID) { - tokens_add (exp, token); + macro_tokens_add (exp, mt); continue; } -#if 0 - struct macro_function - { - const char *name; - int min_args; - int max_args; - }; - static const struct macro_function functions[] = { - { "!length", 1, 1 }, - { "!concat", 1, INT_MAX }, - { "!substr", 2, 3 }, - { "!index", 2, 2 }, - { "!head", 1, 1 }, - { "!tail", 1, 1 }, - { "!quote", 1, 1 }, - { "!unquote", 1, 1 }, - { "!upcase", 1, 1 }, - { "!blanks", 1, 1 }, - { "!eval", 1, 1 }, - }; -#endif - struct tokens args; + /* Maybe each arg should just be a string, either a quoted string or a + non-quoted string containing tokens. */ struct parse_macro_function_ctx ctx = { - .tokens = tokens, - .idx = &i, - .args = &args, + .input = &mts->mts[i], + .n_input = mts->n - i, .nesting_countdown = nesting_countdown, .macros = macros, .me = me, .expand = expand, }; - if (parse_macro_function (&ctx, ss_cstr ("!length"), 1, 1)) + struct macro_token function_output; + size_t function_consumed; + if (expand_macro_function (&ctx, &function_output, &function_consumed)) { - char *s = token_to_string (&args.tokens[0]); - struct token t = { .type = T_POS_NUM, .number = strlen (s) }; - tokens_add (exp, &t); - free (s); + i += function_consumed - 1; - tokens_uninit (&args); - } - else if (parse_macro_function (&ctx, ss_cstr ("!blanks"), 1, 1)) - { - if (args.tokens[0].type != T_POS_NUM) - printf ("argument to !BLANKS must be positive integer\n"); + if (function_output.token.type == T_MACRO_ID) + macro_tokens_from_string (exp, function_output.token.string, + SEG_MODE_INTERACTIVE /* XXX */); else - { - struct string s = DS_EMPTY_INITIALIZER; - ds_put_byte_multiple (&s, ' ', args.tokens[0].number); - struct token t = { .type = T_ID, .string = s.ss }; - tokens_add (exp, &t); - ds_destroy (&s); - } - tokens_uninit (&args); + macro_tokens_add (exp, &function_output); + macro_token_uninit (&function_output); + + continue; } - else if (ss_equals_case (token->string, ss_cstr ("!onexpand"))) + + if (ss_equals_case (token->string, ss_cstr ("!onexpand"))) *expand = true; else if (ss_equals_case (token->string, ss_cstr ("!offexpand"))) *expand = false; else - tokens_add (exp, token); + macro_tokens_add (exp, mt); } } - void -macro_expander_get_expansion (struct macro_expander *me, struct tokens *exp) +macro_expander_get_expansion (struct macro_expander *me, struct macro_tokens *exp) { +#if 0 for (size_t i = 0; i < me->macro->n_params; i++) { printf ("%s:\n", me->macro->params[i].name); - tokens_print (me->args[i], stdout); + macro_tokens_print (me->args[i], stdout); } +#endif bool expand = true; - macro_expand (&me->macro->body_tokens, settings_get_mnest (), + macro_expand (&me->macro->body, settings_get_mnest (), me->macros, me, &expand, exp); +#if 0 printf ("expansion:\n"); - tokens_print (exp, stdout); + macro_tokens_print (exp, stdout); +#endif }