X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Flexer%2Fmacro.c;h=628b83af01bd3d5e80a67c88baf8c5963463c5ee;hb=7024f0edd9193b70c24fcf76d123b15f00a61180;hp=a15b7064a5d96537f84998aedd9c0b6ea5ed2bed;hpb=c94be33beb7085e1cbb1ec47f0e3a49c896d443b;p=pspp diff --git a/src/language/lexer/macro.c b/src/language/lexer/macro.c index a15b7064a5..628b83af01 100644 --- a/src/language/lexer/macro.c +++ b/src/language/lexer/macro.c @@ -46,6 +46,12 @@ macro_token_uninit (struct macro_token *mt) ss_dealloc (&mt->representation); } +void +macro_token_to_representation (struct macro_token *mt, struct string *s) +{ + ds_put_substring (s, mt->representation); +} + void macro_tokens_copy (struct macro_tokens *dst, const struct macro_tokens *src) { @@ -153,6 +159,115 @@ macro_tokens_print (const struct macro_tokens *mts, FILE *stream) token_print (&mts->mts[i].token, stream); } +enum token_class + { + TC_ENDCMD, /* No space before or after (new-line after). */ + TC_BINOP, /* Space on both sides. */ + TC_COMMA, /* Space afterward. */ + TC_ID, /* Don't need spaces except sequentially. */ + TC_PUNCT, /* Don't need spaces except sequentially. */ + }; + +static bool +needs_space (enum token_class prev, enum token_class next) +{ + /* Don't need a space before or after the end of a command. + (A new-line is needed afterward as a special case.) */ + if (prev == TC_ENDCMD || next == TC_ENDCMD) + return false; + + /* Binary operators always have a space on both sides. */ + if (prev == TC_BINOP || next == TC_BINOP) + return true; + + /* A comma always has a space afterward. */ + if (prev == TC_COMMA) + return true; + + /* Otherwise, PREV is TC_ID or TC_PUNCT, which only need a space if there are + two or them in a row. */ + return prev == next; +} + +static enum token_class +classify_token (enum token_type type) +{ + switch (type) + { + case T_ID: + case T_MACRO_ID: + case T_POS_NUM: + case T_NEG_NUM: + case T_STRING: + return TC_ID; + + case T_STOP: + return TC_PUNCT; + + case T_ENDCMD: + return TC_ENDCMD; + + case T_LPAREN: + case T_RPAREN: + case T_LBRACK: + case T_RBRACK: + return TC_PUNCT; + + case T_PLUS: + case T_DASH: + case T_ASTERISK: + case T_SLASH: + case T_EQUALS: + case T_AND: + case T_OR: + case T_NOT: + case T_EQ: + case T_GE: + case T_GT: + case T_LE: + case T_LT: + case T_NE: + case T_ALL: + case T_BY: + case T_TO: + case T_WITH: + case T_EXP: + case T_MACRO_PUNCT: + return TC_BINOP; + + case T_COMMA: + return TC_COMMA; + } + + NOT_REACHED (); +} + +void +macro_tokens_to_representation (struct macro_tokens *mts, struct string *s) +{ + if (!mts->n) + return; + + macro_token_to_representation (&mts->mts[0], s); + for (size_t i = 1; i < mts->n; i++) + { + enum token_type prev = mts->mts[i - 1].token.type; + enum token_type next = mts->mts[i].token.type; + + if (prev == T_ENDCMD) + ds_put_byte (s, '\n'); + else + { + enum token_class pc = classify_token (prev); + enum token_class nc = classify_token (next); + if (needs_space (pc, nc)) + ds_put_byte (s, ' '); + } + + macro_token_to_representation (&mts->mts[i], s); + } +} + void macro_destroy (struct macro *m) { @@ -314,7 +429,9 @@ me_next_arg (struct macro_expander *me) return me_finished (me); else { - me->state = me->param->positional ? ME_ARG : ME_KEYWORD; + me->state = (!me->param->positional ? ME_KEYWORD + : me->param->arg_type == ARG_ENCLOSE ? ME_ENCLOSE + : ME_ARG); return 0; } } @@ -340,10 +457,13 @@ me_error (struct macro_expander *me) static int me_add_arg (struct macro_expander *me, const struct macro_token *mt) { + const struct macro_param *p = me->param; + const struct token *token = &mt->token; - if (token->type == T_STOP) + if ((token->type == T_ENDCMD || token->type == T_STOP) + && p->arg_type != ARG_CMDEND) { - msg (SE, _("Unexpected end of file reading argument %s " + msg (SE, _("Unexpected end of command reading argument %s " "to macro %s."), me->param->name, me->macro->name); return me_error (me); @@ -351,7 +471,6 @@ me_add_arg (struct macro_expander *me, const struct macro_token *mt) me->n_tokens++; - const struct macro_param *p = me->param; struct macro_tokens **argp = &me->args[p - me->macro->params]; if (!*argp) *argp = xzalloc (sizeof **argp); @@ -373,7 +492,7 @@ me_add_arg (struct macro_expander *me, const struct macro_token *mt) else { const struct token *end - = p->arg_type == ARG_CMDEND ? &p->charend : &p->enclose[1]; + = p->arg_type == ARG_CHAREND ? &p->charend : &p->enclose[1]; if (token_equal (token, end)) return me_next_arg (me); macro_tokens_add (arg, mt); @@ -416,10 +535,13 @@ me_enclose (struct macro_expander *me, const struct macro_token *mt) static const struct macro_param * macro_find_parameter_by_name (const struct macro *m, struct substring name) { + if (ss_first (name) == '!') + ss_advance (&name, 1); + for (size_t i = 0; i < m->n_params; i++) { const struct macro_param *p = &m->params[i]; - struct substring p_name = ss_cstr (p->name); + struct substring p_name = ss_cstr (p->name + 1); if (!utf8_strncasecmp (p_name.string, p_name.length, name.string, name.length)) return p; @@ -498,7 +620,9 @@ macro_expander_create (const struct macro_set *macros, return 1; else { - me->state = macro->params[0].positional ? ME_ARG : ME_KEYWORD; + me->state = (!macro->params[0].positional ? ME_KEYWORD + : macro->params[0].arg_type == ARG_ENCLOSE ? ME_ENCLOSE + : ME_ARG); me->args = xcalloc (macro->n_params, sizeof *me->args); me->param = macro->params; return 0; @@ -751,33 +875,19 @@ expand_macro_function (struct parse_macro_function_ctx *ctx, else if (parse_macro_function (ctx, &args, ss_cstr ("!concat"), 1, INT_MAX, input_consumed)) { - struct string s; - bool all_strings = true; + struct string s = DS_EMPTY_INITIALIZER; for (size_t i = 0; i < args.n; i++) { if (args.mts[i].token.type == T_STRING) ds_put_substring (&s, args.mts[i].token.string); else - { - all_strings = false; - ds_put_substring (&s, args.mts[i].representation); - } + ds_put_substring (&s, args.mts[i].representation); } - if (all_strings) - { - *output = (struct macro_token) { - .token = { .type = T_STRING, .string = s.ss }, - }; - output->representation = ss_cstr (token_to_string (&output->token)); - } - else - { - *output = (struct macro_token) { - .token = { .type = T_MACRO_ID /*XXX*/, .string = s.ss }, - }; - ss_alloc_substring (&output->representation, s.ss); - } + *output = (struct macro_token) { + .token = { .type = T_MACRO_ID /*XXX*/, .string = s.ss }, + }; + ss_alloc_substring (&output->representation, s.ss); } else if (parse_macro_function (ctx, &args, ss_cstr ("!quote"), 1, 1, input_consumed)) @@ -803,6 +913,17 @@ expand_macro_function (struct parse_macro_function_ctx *ctx, else macro_token_copy (output, &args.mts[0]); } + else if (ctx->n_input > 0 + && ctx->input[0].token.type == T_MACRO_ID + && ss_equals_case (ctx->input[0].token.string, ss_cstr ("!null"))) + { + *input_consumed = 1; + *output = (struct macro_token) { + .token = { .type = T_MACRO_ID /* XXX*/ }, + }; + ss_alloc_substring (&output->token.string, ss_cstr ("")); + return true; + } else return false; @@ -816,6 +937,13 @@ macro_expand (const struct macro_tokens *mts, const struct macro_expander *me, bool *expand, struct macro_tokens *exp) { + /* Macro expansion: + + - Macro names in macro bodies are not expanded by default. !EVAL() + expands them. + + - Macro names in arguments to macro invocations (outside of macro bodies) + are expanded by default, unless !NOEXPAND. */ if (nesting_countdown <= 0) { printf ("maximum nesting level exceeded\n"); @@ -851,9 +979,9 @@ macro_expand (const struct macro_tokens *mts, int retval = macro_expander_create (macros, token, &subme); for (size_t j = 1; !retval; j++) { - const struct macro_token stop = { .token = { .type = T_STOP } }; + const struct macro_token endcmd = { .token = { .type = T_ENDCMD } }; retval = macro_expander_add ( - subme, i + j < mts->n ? &mts->mts[i + j] : &stop); + subme, i + j < mts->n ? &mts->mts[i + j] : &endcmd); } if (retval > 0) {