From: Ben Pfaff Date: Mon, 19 Apr 2021 00:36:34 +0000 (-0700) Subject: work on macro calls X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a5f6c8f299c416a9cd0ad778b3988435b66fae33;p=pspp work on macro calls --- diff --git a/src/language/control/define.c b/src/language/control/define.c index 958ae5035c..00c48c3632 100644 --- a/src/language/control/define.c +++ b/src/language/control/define.c @@ -86,6 +86,7 @@ cmd_define (struct lexer *lexer, struct dataset *ds UNUSED) goto error; size_t allocated_params = 0; + size_t n_keywords = 0; while (!lex_match (lexer, T_RPAREN)) { if (m->n_params >= allocated_params) @@ -97,9 +98,19 @@ cmd_define (struct lexer *lexer, struct dataset *ds UNUSED) /* Parse parameter name. */ if (match_macro_id (lexer, "!POSITIONAL")) - p->name = NULL; + { + if (n_keywords) + { + lex_error (lexer, _("Positional parameters must precede " + "keyword parameters.")); + goto error; + } + p->name = NULL; + } else { + n_keywords++; + if (!lex_force_id (lexer) || !lex_force_match (lexer, T_EQUALS)) goto error; @@ -113,7 +124,6 @@ cmd_define (struct lexer *lexer, struct dataset *ds UNUSED) if (!lex_force_match (lexer, T_LPAREN)) goto error; - size_t allocated_tokens = 0; /* XXX Should this handle balanced inner parentheses? */ while (!lex_match (lexer, T_RPAREN)) { @@ -122,12 +132,7 @@ cmd_define (struct lexer *lexer, struct dataset *ds UNUSED) lex_error_expecting (lexer, ")"); goto error; } - if (allocated_tokens >= p->def.n) - p->def.tokens = x2nrealloc (p->def.tokens, &allocated_tokens, - sizeof *p->def.tokens); - - struct token *token = &p->def.tokens[p->def.n++]; - token_copy (token, lex_next (lexer, 0)); + tokens_add (&p->def, lex_next (lexer, 0)); lex_get (lexer); } } diff --git a/src/language/lexer/macro.c b/src/language/lexer/macro.c index ca086fc4df..004714df4a 100644 --- a/src/language/lexer/macro.c +++ b/src/language/lexer/macro.c @@ -20,13 +20,12 @@ #include -void -macro_tokens_uninit (struct macro_tokens *tokens) -{ - for (size_t i = 0; i < tokens->n; i++) - token_destroy (&tokens->tokens[i]); - free (tokens->tokens); -} +#include "libpspp/assertion.h" +#include "libpspp/i18n.h" +#include "libpspp/message.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) void macro_destroy (struct macro *m) @@ -40,7 +39,7 @@ macro_destroy (struct macro *m) struct macro_param *p = &m->params[i]; free (p->name); - macro_tokens_uninit (&p->def); + tokens_uninit (&p->def); switch (p->arg_type) { @@ -66,3 +65,259 @@ macro_destroy (struct macro *m) free (m->body); free (m); } + +const struct macro * +macro_set_find (const struct macro_set *set, const char *name) +{ + struct macro *macro; + + HMAP_FOR_EACH_WITH_HASH (macro, struct macro, hmap_node, + utf8_hash_case_string (name, 0), &set->macros) + { + if (!utf8_strcasecmp (macro->name, name)) + return macro; + } + + return NULL; +} + +enum me_state + { + ME_START, + + /* Accumulating tokens in me->params toward the end of any type of + argument. */ + ME_ARG, + + /* Expecting the opening delimiter of an ARG_ENCLOSE argument. */ + ME_ENCLOSE, + + /* Expecting a keyword for a keyword argument. */ + ME_KEYWORD, + + /* Expecting an equal sign for a keyword argument. */ + ME_EQUALS, + }; + + +struct macro_expander + { + const struct macro_set *macros; + + enum me_state state; + size_t n_tokens; + + const struct macro *macro; + struct tokens **args; + size_t arg_index; + }; + +static int +me_finished (struct macro_expander *me) +{ + for (size_t i = 0; i < me->macro->n_params; i++) + if (!me->args[i]) + { + me->args[i] = xmalloc (sizeof *me->args[i]); + tokens_copy (me->args[i], &me->macro->params[i].def); + } + return me->n_tokens; +} + +static int +me_next_arg (struct macro_expander *me) +{ + if (me->arg_index >= me->macro->n_params) + { + assert (!me->macro->n_params); + return me_finished (me); + } + else if (!me->macro->params[me->arg_index].name) + { + me->arg_index++; + if (me->arg_index >= me->macro->n_params) + return me_finished (me); + else + { + if (!me->macro->params[me->arg_index].name) + me->state = ME_ARG; + else + me->state = ME_KEYWORD; + return 0; + } + } + else + { + for (size_t i = 0; i < me->macro->n_params; i++) + if (!me->args[i]) + { + me->state = ME_KEYWORD; + return 0; + } + return me_finished (me); + } +} + +static int +me_add_start (struct macro_expander *me, const struct token *token) +{ + if (token->type != T_ID && token->type != T_MACRO_ID) + return -1; + + me->macro = macro_set_find (me->macros, token->string.string); + if (!me->macro) + return -1; + + me->n_tokens = 1; + me->args = xcalloc (me->macro->n_params, sizeof *me->args); + me->arg_index = 0; + return me_next_arg (me); +} + +static int +me_add_arg (struct macro_expander *me, const struct token *token) +{ + me->n_tokens++; + + struct tokens **ap = &me->args[me->arg_index]; + if (!*ap) + *ap = xzalloc (sizeof **ap); + struct tokens *a = *ap; + const struct macro_param *p = &me->macro->params[me->arg_index]; + if (p->arg_type == ARG_N_TOKENS) + { + tokens_add (a, token); + if (a->n >= p->n_tokens) + return me_next_arg (me); + return 0; + } + else if (p->arg_type == ARG_CMDEND) + { + if (token->type == T_ENDCMD || token->type == T_STOP) + return me_next_arg (me); + tokens_add (a, token); + return 0; + } + else + { + const struct token *end + = p->arg_type == ARG_CMDEND ? &p->charend : &p->enclose[1]; + if (token_equal (token, end)) + return me_next_arg (me); + tokens_add (a, token); + return 0; + } +} + +static int +me_error (struct macro_expander *me) +{ + me->state = ME_START; + return -1; +} + +static int +me_expected (struct macro_expander *me, const struct token *token, + const struct token *wanted) +{ + const struct macro_param *p = &me->macro->params[me->arg_index]; + char *param_name = (p->name + ? xstrdup (p->name) + : xasprintf ("%zu", me->arg_index)); + char *actual = token_to_string (token); + if (!actual) + actual = xstrdup (""); + char *expected = token_to_string (wanted); + msg (SE, _("Found `%s' while expecting `%s' reading argument %s " + "in call to macro %s."), + actual, expected, param_name, me->macro->name); + free (expected); + free (actual); + free (param_name); + + return me_error (me); +} + +static int +me_enclose (struct macro_expander *me, const struct token *token) +{ + me->n_tokens++; + + const struct macro_param *p = &me->macro->params[me->arg_index]; + if (token_equal (&p->enclose[0], token)) + { + me->state = ME_ARG; + return 0; + } + + return me_expected (me, token, &p->enclose[0]); +} + +static int +me_keyword (struct macro_expander *me, const struct token *token) +{ + if (token->type != T_ID) + return me_finished (me); + + for (size_t i = 0; i < me->macro->n_params; i++) + { + const struct macro_param *p = &me->macro->params[i]; + if (p->name && ss_equals_case (ss_cstr (p->name), token->string)) + { + me->arg_index = i; + if (me->args[i]) + { + msg (SE, + _("Argument %s multiply specified in call to macro %s."), + p->name, me->macro->name); + return me_error (me); + } + + me->n_tokens++; + me->state = ME_EQUALS; + return 0; + } + } + + return me_finished (me); +} + +static int +me_equals (struct macro_expander *me, const struct token *token) +{ + me->n_tokens++; + + if (token->type == T_EQUALS) + { + me->state = ME_ARG; + return 0; + } + + const struct token equals = { .type = T_EQUALS }; + return me_expected (me, token, &equals); +} + +int +macro_expander_add (struct macro_expander *me, const struct token *token) +{ + switch (me->state) + { + case ME_START: + return me_add_start (me, token); + + case ME_ARG: + return me_add_arg (me, token); + + case ME_ENCLOSE: + return me_enclose (me, token); + + case ME_KEYWORD: + return me_keyword (me, token); + + case ME_EQUALS: + return me_equals (me, token); + + default: + NOT_REACHED (); + } +} diff --git a/src/language/lexer/macro.h b/src/language/lexer/macro.h index 2264ad628b..ddd65c8862 100644 --- a/src/language/lexer/macro.h +++ b/src/language/lexer/macro.h @@ -20,20 +20,14 @@ #include #include +#include "libpspp/hmap.h" +#include "libpspp/str.h" #include "language/lexer/token.h" -struct macro_tokens - { - struct token *tokens; - size_t n; - }; - -void macro_tokens_uninit (struct macro_tokens *); - struct macro_param { char *name; /* NULL for a positional parameter. */ - struct macro_tokens def; /* Default expansion. */ + struct tokens def; /* Default expansion. */ bool expand_arg; /* Macro-expand the argument? */ enum @@ -54,6 +48,7 @@ struct macro_param struct macro { + struct hmap_node hmap_node; /* Indexed by 'name'. */ char *name; struct macro_param *params; @@ -65,4 +60,27 @@ struct macro void macro_destroy (struct macro *); +struct macro_set + { + struct hmap macros; + }; + +const struct macro *macro_set_find (const struct macro_set *, + const char *); + +struct macro_expander *macro_expander_create (const struct macro_set *); +void macro_expander_destroy (struct macro_expander *); + +/* Add one token to the input to macro-expand. Returns: + + -1: Advance one token without change. + 0: Needs more tokens. + >0: Expand the given number of tokens. +*/ +int macro_expander_add (struct macro_expander *, const struct token *); +int macro_expander_add_eof (struct macro_expander *); + +void macro_expander_get_expansion (struct macro_expander *, + struct token **tokens, size_t *n); + #endif /* macro.h */ diff --git a/src/language/lexer/token.c b/src/language/lexer/token.c index 69fd48fb62..454bd51868 100644 --- a/src/language/lexer/token.c +++ b/src/language/lexer/token.c @@ -51,6 +51,29 @@ token_uninit (struct token *token) } } +bool +token_equal (const struct token *a, const struct token *b) +{ + if (a->type != b->type) + return false; + + switch (a->type) + { + case T_POS_NUM: + case T_NEG_NUM: + return a->number == b->number; + + case T_ID: + case T_MACRO_ID: + case T_MACRO_PUNCT: + case T_STRING: + return ss_equals (a->string, b->string); + + default: + return true; + } +} + static char * number_token_to_string (const struct token *token) { @@ -175,3 +198,34 @@ token_print (const struct token *token, FILE *stream) (int) token->string.length, token->string.string); putc ('\n', stream); } + +void +tokens_copy (struct tokens *dst, const struct tokens *src) +{ + *dst = (struct tokens) { + .tokens = xnmalloc (src->n, sizeof *dst->tokens), + .n = src->n, + .allocated = src->n, + }; + + for (size_t i = 0; i < src->n; i++) + token_copy (&dst->tokens[i], &src->tokens[i]); +} + +void +tokens_uninit (struct tokens *tokens) +{ + for (size_t i = 0; i < tokens->n; i++) + token_destroy (&tokens->tokens[i]); + free (tokens->tokens); +} + +void +tokens_add (struct tokens *tokens, const struct token *t) +{ + if (tokens->allocated >= tokens->n) + tokens->tokens = x2nrealloc (tokens->tokens, &tokens->allocated, + sizeof *tokens->tokens); + + token_copy (&tokens->tokens[tokens->n++], t); +} diff --git a/src/language/lexer/token.h b/src/language/lexer/token.h index f4614c0e26..7cce5c1e50 100644 --- a/src/language/lexer/token.h +++ b/src/language/lexer/token.h @@ -17,6 +17,7 @@ #ifndef TOKEN_H #define TOKEN_H 1 +#include #include #include "libpspp/str.h" #include "data/identifier.h" @@ -35,8 +36,21 @@ struct token void token_copy (struct token *, const struct token *); void token_uninit (struct token *); +bool token_equal (const struct token *, const struct token *); + char *token_to_string (const struct token *); void token_print (const struct token *, FILE *); + +struct tokens + { + struct token *tokens; + size_t n; + size_t allocated; + }; + +void tokens_copy (struct tokens *, const struct tokens *); +void tokens_uninit (struct tokens *); +void tokens_add (struct tokens *, const struct token *); #endif /* token.h */