From 84e4c83cf9efa6e41f0514c3cb885f54d1030386 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 2 May 2021 22:08:37 -0700 Subject: [PATCH] Macro arguments and the !length function work. --- src/language/control/define.c | 75 +++++++- src/language/control/repeat.c | 5 +- src/language/lexer/lexer.c | 3 + src/language/lexer/macro.c | 285 +++++++++++++++++----------- src/language/lexer/macro.h | 4 +- src/language/lexer/segment.c | 8 +- src/language/lexer/segment.h | 2 + src/language/lexer/token.c | 7 + src/language/lexer/token.h | 2 + tests/language/lexer/segment-test.c | 3 +- 10 files changed, 268 insertions(+), 126 deletions(-) diff --git a/src/language/control/define.c b/src/language/control/define.c index 5532d60b8b..2f5ffd9d6b 100644 --- a/src/language/control/define.c +++ b/src/language/control/define.c @@ -72,6 +72,64 @@ parse_quoted_token (struct lexer *lexer, struct token *token) return true; } +static void +macro_tokenize (struct macro *m, struct lexer *lexer) +{ + struct state + { + struct segmenter segmenter; + struct substring body; + }; + + struct state state = { + .segmenter = SEGMENTER_INIT (lex_get_syntax_mode (lexer)), + .body = m->body, + }; + struct state saved = state; + + struct token token = { .type = T_STOP }; + + while (state.body.length > 0) + { + struct scanner scanner; + scanner_init (&scanner, &token); + + for (;;) + { + enum segment_type type; + int seg_len = segmenter_push (&state.segmenter, state.body.string, + state.body.length, true, &type); + assert (seg_len >= 0); + + struct substring segment = ss_head (state.body, seg_len); + ss_advance (&state.body, seg_len); + + enum scan_result result = scanner_push (&scanner, type, segment, &token); + if (result == SCAN_SAVE) + saved = state; + else if (result == SCAN_BACK) + { + state = saved; + break; + } + else if (result == SCAN_DONE) + break; + } + + /* We have a token in 'token'. */ + if (is_scan_type (token.type)) + { + if (token.type != SCAN_SKIP) + { + /* XXX report error */ + } + } + else + tokens_add (&m->body_tokens, &token); + token_destroy (&token); + } +} + int cmd_define (struct lexer *lexer, struct dataset *ds UNUSED) { @@ -87,35 +145,36 @@ cmd_define (struct lexer *lexer, struct dataset *ds UNUSED) goto error; size_t allocated_params = 0; - size_t n_keywords = 0; while (!lex_match (lexer, T_RPAREN)) { if (m->n_params >= allocated_params) m->params = x2nrealloc (m->params, &allocated_params, sizeof *m->params); - struct macro_param *p = &m->params[m->n_params++]; + size_t param_index = m->n_params++; + struct macro_param *p = &m->params[param_index]; *p = (struct macro_param) { .expand_arg = true }; /* Parse parameter name. */ if (match_macro_id (lexer, "!POSITIONAL")) { - if (n_keywords) + if (param_index > 0 && !m->params[param_index - 1].positional) { lex_error (lexer, _("Positional parameters must precede " "keyword parameters.")); goto error; } - p->name = NULL; + + p->positional = true; + p->name = xasprintf ("!%zu", param_index + 1); } else { - n_keywords++; - if (!lex_force_id (lexer)) goto error; - p->name = ss_xstrdup (lex_tokss (lexer)); + p->positional = false; + p->name = xasprintf ("!%s", lex_tokcstr (lexer)); lex_get (lexer); if (!lex_force_match (lexer, T_EQUALS)) @@ -206,6 +265,8 @@ cmd_define (struct lexer *lexer, struct dataset *ds UNUSED) } m->body = ds_ss (&body); + macro_tokenize (m, lexer); + lex_define_macro (lexer, m); return CMD_SUCCESS; diff --git a/src/language/control/repeat.c b/src/language/control/repeat.c index 118e8d3ccd..0438fa1bd8 100644 --- a/src/language/control/repeat.c +++ b/src/language/control/repeat.c @@ -201,10 +201,7 @@ do_parse_commands (struct substring s, enum segmenter_mode mode, struct hmap *dummies, struct string *outputs, size_t n_outputs) { - struct segmenter segmenter; - - segmenter_init (&segmenter, mode); - + struct segmenter segmenter = SEGMENTER_INIT (mode); while (!ss_is_empty (s)) { enum segment_type type; diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c index 1b5ba2244f..ff3c43e35e 100644 --- a/src/language/lexer/lexer.c +++ b/src/language/lexer/lexer.c @@ -1668,6 +1668,9 @@ lex_source_get (const struct lex_source *src_) if (!lex_source_get__ (src)) return false; + if (!settings_get_mexpand ()) + return true; + struct macro_expander *me; int retval = macro_expander_create (src->lexer->macros, &lex_source_front (src)->token, &me); diff --git a/src/language/lexer/macro.c b/src/language/lexer/macro.c index b94a04ad7f..8633de9857 100644 --- a/src/language/lexer/macro.c +++ b/src/language/lexer/macro.c @@ -20,6 +20,7 @@ #include +#include "data/settings.h" #include "language/lexer/segment.h" #include "language/lexer/scan.h" #include "libpspp/assertion.h" @@ -64,6 +65,7 @@ macro_destroy (struct macro *m) } free (m->params); ss_dealloc (&m->body); + tokens_uninit (&m->body_tokens); free (m); } @@ -134,7 +136,8 @@ macro_set_add (struct macro_set *set, struct macro *m) enum me_state { - ME_START, + /* Error state. */ + ME_ERROR, /* Accumulating tokens in me->params toward the end of any type of argument. */ @@ -160,7 +163,7 @@ struct macro_expander const struct macro *macro; struct tokens **args; - size_t arg_index; + const struct macro_param *param; }; static int @@ -178,22 +181,19 @@ me_finished (struct macro_expander *me) static int me_next_arg (struct macro_expander *me) { - if (me->arg_index >= me->macro->n_params) + if (!me->param) { assert (!me->macro->n_params); return me_finished (me); } - else if (!me->macro->params[me->arg_index].name) + else if (me->param->positional) { - me->arg_index++; - if (me->arg_index >= me->macro->n_params) + me->param++; + if (me->param >= &me->macro->params[me->macro->n_params]) return me_finished (me); else { - if (!me->macro->params[me->arg_index].name) - me->state = ME_ARG; - else - me->state = ME_KEYWORD; + me->state = me->param->positional ? ME_ARG : ME_KEYWORD; return 0; } } @@ -209,48 +209,28 @@ me_next_arg (struct macro_expander *me) } } -static int -me_add_start (struct macro_expander *me, const struct token *token) -{ - if (token->type != T_ID && token->type != T_MACRO_ID) - return -1; - - me->macro = macro_set_find (me->macros, token->string.string); - if (!me->macro) - return -1; - - me->n_tokens = 1; - me->args = xcalloc (me->macro->n_params, sizeof *me->args); - me->arg_index = 0; - return me_next_arg (me); -} - static int me_error (struct macro_expander *me) { - me->state = ME_START; + me->state = ME_ERROR; return -1; } static int me_add_arg (struct macro_expander *me, const struct token *token) { - const struct macro_param *p = &me->macro->params[me->arg_index]; if (token->type == T_STOP) { - char *param_name = (p->name - ? xstrdup (p->name) - : xasprintf ("%zu", me->arg_index)); msg (SE, _("Unexpected end of file reading argument %s " - "to macro %s."), param_name, me->macro->name); - free (param_name); + "to macro %s."), me->param->name, me->macro->name); return me_error (me); } me->n_tokens++; - struct tokens **argp = &me->args[me->arg_index]; + const struct macro_param *p = me->param; + struct tokens **argp = &me->args[p - me->macro->params]; if (!*argp) *argp = xzalloc (sizeof **argp); struct tokens *arg = *argp; @@ -283,20 +263,15 @@ static int me_expected (struct macro_expander *me, const struct token *token, const struct token *wanted) { - const struct macro_param *p = &me->macro->params[me->arg_index]; - char *param_name = (p->name - ? xstrdup (p->name) - : xasprintf ("%zu", me->arg_index)); char *actual = token_to_string (token); if (!actual) actual = xstrdup (""); char *expected = token_to_string (wanted); msg (SE, _("Found `%s' while expecting `%s' reading argument %s " "to macro %s."), - actual, expected, param_name, me->macro->name); + actual, expected, me->param->name, me->macro->name); free (expected); free (actual); - free (param_name); return me_error (me); } @@ -306,14 +281,27 @@ me_enclose (struct macro_expander *me, const struct token *token) { me->n_tokens++; - const struct macro_param *p = &me->macro->params[me->arg_index]; - if (token_equal (&p->enclose[0], token)) + if (token_equal (&me->param->enclose[0], token)) { me->state = ME_ARG; return 0; } - return me_expected (me, token, &p->enclose[0]); + return me_expected (me, token, &me->param->enclose[0]); +} + +static const struct macro_param * +macro_find_parameter_by_name (const struct macro *m, struct substring name) +{ + for (size_t i = 0; i < m->n_params; i++) + { + const struct macro_param *p = &m->params[i]; + struct substring p_name = ss_cstr (p->name); + if (!utf8_strncasecmp (p_name.string, p_name.length, + name.string, name.length)) + return p; + } + return NULL; } static int @@ -322,24 +310,23 @@ me_keyword (struct macro_expander *me, const struct token *token) if (token->type != T_ID) return me_finished (me); - for (size_t i = 0; i < me->macro->n_params; i++) + const struct macro_param *p = macro_find_parameter_by_name (me->macro, + token->string); + if (p) { - const struct macro_param *p = &me->macro->params[i]; - if (p->name && ss_equals_case (ss_cstr (p->name), token->string)) + size_t arg_index = p - me->macro->params; + me->param = p; + if (me->args[arg_index]) { - me->arg_index = i; - if (me->args[i]) - { - msg (SE, - _("Argument %s multiply specified in call to macro %s."), - p->name, me->macro->name); - return me_error (me); - } - - me->n_tokens++; - me->state = ME_EQUALS; - return 0; + msg (SE, + _("Argument %s multiply specified in call to macro %s."), + p->name, me->macro->name); + return me_error (me); } + + me->n_tokens++; + me->state = ME_EQUALS; + return 0; } return me_finished (me); @@ -378,16 +365,20 @@ macro_expander_create (const struct macro_set *macros, struct macro_expander *me = xmalloc (sizeof *me); *me = (struct macro_expander) { .macros = macros, - - .state = ME_START, .n_tokens = 1, - .macro = macro, - .args = xcalloc (macro->n_params, sizeof *me->args), - .arg_index = 0, }; *mep = me; - return me_next_arg (me); + + if (!macro->n_params) + return 1; + else + { + me->state = macro->params[0].positional ? ME_ARG : ME_KEYWORD; + me->args = xcalloc (macro->n_params, sizeof *me->args); + me->param = macro->params; + return 0; + } } void @@ -426,8 +417,8 @@ macro_expander_add (struct macro_expander *me, const struct token *token) { switch (me->state) { - case ME_START: - return me_add_start (me, token); + case ME_ERROR: + return -1; case ME_ARG: return me_add_arg (me, token); @@ -446,63 +437,139 @@ macro_expander_add (struct macro_expander *me, const struct token *token) } } -void -macro_expander_get_expansion (struct macro_expander *me, struct tokens *exp) +static void +macro_expand (const struct tokens *tokens, int nesting_countdown, + const struct macro_set *macros, const struct macro_expander *me, + bool *expand, struct tokens *exp) { - struct state + if (nesting_countdown <= 0) { - struct segmenter segmenter; - struct substring body; - }; - - struct state state; - segmenter_init (&state.segmenter, SEG_MODE_INTERACTIVE /*XXX*/); - state.body = me->macro->body; - - struct state saved = state; - - struct token token = { .type = T_STOP }; + printf ("maximum nesting level exceeded\n"); + for (size_t i = 0; i < tokens->n; i++) + tokens_add (exp, &tokens->tokens[i]); + return; + } - while (state.body.length > 0) + for (size_t i = 0; i < tokens->n; i++) { - struct scanner scanner; - scanner_init (&scanner, &token); + const struct token *token = &tokens->tokens[i]; + if (token->type == T_MACRO_ID && me) + { + const struct macro_param *param = macro_find_parameter_by_name ( + me->macro, token->string); + if (param) + { + printf ("expand %s to:\n", param->name); + const struct tokens *arg = me->args[param - me->macro->params]; + tokens_print (arg, stdout); + if (*expand && param->expand_arg) + macro_expand (arg, nesting_countdown, macros, NULL, expand, exp); + else + for (size_t i = 0; i < arg->n; i++) + tokens_add (exp, &arg->tokens[i]); + continue; + } + } - for (;;) + if (*expand) { - enum segment_type type; - int seg_len = segmenter_push (&state.segmenter, state.body.string, - state.body.length, true, &type); - assert (seg_len >= 0); - - struct substring segment = ss_head (state.body, seg_len); - ss_advance (&state.body, seg_len); - printf ("segment \"%.*s\" %s token.type=%d\n", (int) segment.length, segment.string, segment_type_to_string (type), token.type); - - enum scan_result result = scanner_push (&scanner, type, segment, &token); - if (result == SCAN_SAVE) - saved = state; - else if (result == SCAN_BACK) + struct macro_expander *subme; + int retval = macro_expander_create (macros, token, &subme); + for (size_t j = 1; !retval; j++) { - printf ("back\n"); - state = saved; - break; + static const struct token stop = { .type = T_STOP }; + retval = macro_expander_add ( + subme, i + j < tokens->n ? &tokens->tokens[i + j] : &stop); } - else if (result == SCAN_DONE) + if (retval > 0) { - printf ("done\n"); - break; + i += retval - 1; + macro_expand (&subme->macro->body_tokens, nesting_countdown - 1, + macros, subme, expand, exp); + macro_expander_destroy (subme); + continue; } + + macro_expander_destroy (subme); } - /* We have a token in 'token'. */ - printf ("add token %d %s\n", token.type, token_type_to_name (token.type)); - if (is_scan_type (token.type)) + if (token->type != T_MACRO_ID) { - /* XXX report error if it's not SCAN_SKIP */ + tokens_add (exp, token); + continue; + } + + if (ss_equals_case (token->string, ss_cstr ("!onexpand"))) + *expand = true; + else if (ss_equals_case (token->string, ss_cstr ("!offexpand"))) + *expand = false; + else if (ss_equals_case (token->string, ss_cstr ("!length"))) + { + if (i + 1 >= tokens->n || tokens->tokens[i + 1].type != T_LPAREN) + { + printf ("`(' expected following !LENGTH'\n"); + continue; + } + + int n_parens = 1; + size_t j; + for (j = i + 2; n_parens && j < tokens->n; j++) + if (tokens->tokens[j].type == T_LPAREN) + n_parens++; + else if (tokens->tokens[j].type == T_RPAREN) + n_parens--; + if (n_parens) + { + printf ("Unbalanced parentheses in !LENGTH argument.\n"); + continue; + } + + size_t lparen_idx = i + 1; + size_t rparen_idx = j - 1; + const struct tokens unexpanded_args = { + .tokens = &tokens->tokens[lparen_idx + 1], + .n = rparen_idx - (lparen_idx + 1), + }; + struct tokens args = { .n = 0 }; + macro_expand (&unexpanded_args, nesting_countdown, macros, + me, expand, &args); + + if (args.n != 1) + { + tokens_uninit (&args); + printf ("!LENGTH argument must be a single token (not %zu)\n", args.n); + continue; + } + + char *s = token_to_string (&args.tokens[0]); + struct token t = { .type = T_POS_NUM, .number = strlen (s) }; + tokens_add (exp, &t); + free (s); + + tokens_uninit (&args); + + i = rparen_idx; } else - tokens_add (exp, &token); - token_destroy (&token); + tokens_add (exp, token); } } + + +void +macro_expander_get_expansion (struct macro_expander *me, struct tokens *exp) +{ + for (size_t i = 0; i < me->macro->n_params; i++) + { + printf ("%s:\n", me->macro->params[i].name); + tokens_print (me->args[i], stdout); + } + + bool expand = true; + macro_expand (&me->macro->body_tokens, settings_get_mnest (), + me->macros, me, &expand, exp); + + printf ("expansion:\n"); + tokens_print (exp, stdout); +} + diff --git a/src/language/lexer/macro.h b/src/language/lexer/macro.h index 8b0a7bb1ad..ba0f1fe735 100644 --- a/src/language/lexer/macro.h +++ b/src/language/lexer/macro.h @@ -28,7 +28,8 @@ struct macro_expander; struct macro_param { - char *name; /* NULL for a positional parameter. */ + bool positional; /* Is this a positional parameter? */ + char *name; /* "!1" or "!name". */ struct tokens def; /* Default expansion. */ bool expand_arg; /* Macro-expand the argument? */ @@ -57,6 +58,7 @@ struct macro size_t n_params; struct substring body; + struct tokens body_tokens; }; void macro_destroy (struct macro *); diff --git a/src/language/lexer/segment.c b/src/language/lexer/segment.c index a4fea0b213..ac88117ff5 100644 --- a/src/language/lexer/segment.c +++ b/src/language/lexer/segment.c @@ -28,6 +28,7 @@ #include "gl/c-ctype.h" #include "gl/c-strcase.h" +#include "gl/verify.h" enum segmenter_state { @@ -54,6 +55,9 @@ enum segmenter_state S_TITLE_2 }; +/* S_SHBANG is the start state that SEGMENTER_INIT refers to as just 0. */ +verify (S_SHBANG == 0); + #define SS_START_OF_LINE (1u << 0) #define SS_START_OF_COMMAND (1u << 1) @@ -1806,9 +1810,7 @@ segment_type_to_string (enum segment_type type) void segmenter_init (struct segmenter *s, enum segmenter_mode mode) { - s->state = S_SHBANG; - s->substate = 0; - s->mode = mode; + *s = (struct segmenter) SEGMENTER_INIT (mode); } /* Returns the mode passed to segmenter_init() for S. */ diff --git a/src/language/lexer/segment.h b/src/language/lexer/segment.h index 02a269bdd2..10551066b0 100644 --- a/src/language/lexer/segment.h +++ b/src/language/lexer/segment.h @@ -117,6 +117,8 @@ struct segmenter unsigned char mode; }; +#define SEGMENTER_INIT(MODE) { .mode = MODE } + void segmenter_init (struct segmenter *, enum segmenter_mode); enum segmenter_mode segmenter_get_mode (const struct segmenter *); diff --git a/src/language/lexer/token.c b/src/language/lexer/token.c index 12ab3d8b08..7becaaffad 100644 --- a/src/language/lexer/token.c +++ b/src/language/lexer/token.c @@ -229,3 +229,10 @@ tokens_add (struct tokens *tokens, const struct token *t) token_copy (&tokens->tokens[tokens->n++], t); } + +void +tokens_print (const struct tokens *tokens, FILE *stream) +{ + for (size_t i = 0; i < tokens->n; i++) + token_print (&tokens->tokens[i], stream); +} diff --git a/src/language/lexer/token.h b/src/language/lexer/token.h index 7cce5c1e50..b334edfef6 100644 --- a/src/language/lexer/token.h +++ b/src/language/lexer/token.h @@ -53,4 +53,6 @@ void tokens_copy (struct tokens *, const struct tokens *); void tokens_uninit (struct tokens *); void tokens_add (struct tokens *, const struct token *); +void tokens_print (const struct tokens *, FILE *); + #endif /* token.h */ diff --git a/tests/language/lexer/segment-test.c b/tests/language/lexer/segment-test.c index a3b67b89b2..cb46401b34 100644 --- a/tests/language/lexer/segment-test.c +++ b/tests/language/lexer/segment-test.c @@ -108,8 +108,7 @@ main (int argc, char *argv[]) static void check_segmentation (const char *input, size_t length, bool print_segments) { - struct segmenter s; - segmenter_init (&s, mode); + struct segmenter s = SEGMENTER_INIT (mode); size_t line_number = 1; size_t line_offset = 0; -- 2.30.2