From cbec81089743eda13482210cbe6eb373f0651d68 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 1 May 2021 15:47:33 -0700 Subject: [PATCH] work on macros --- src/language/control/define.c | 9 +-- src/language/lexer/lexer.c | 102 ++++++++++++++++++++--------- src/language/lexer/macro.c | 117 ++++++++++++++++++++++++++++------ src/language/lexer/macro.h | 16 ++--- src/language/lexer/token.c | 2 +- 5 files changed, 180 insertions(+), 66 deletions(-) diff --git a/src/language/control/define.c b/src/language/control/define.c index 00c48c3632..3f23ec0b10 100644 --- a/src/language/control/define.c +++ b/src/language/control/define.c @@ -186,20 +186,21 @@ cmd_define (struct lexer *lexer, struct dataset *ds UNUSED) goto error; } - size_t allocated_body = 0; + struct string body = DS_EMPTY_INITIALIZER; while (!match_macro_id (lexer, "!ENDDEFINE")) { if (lex_token (lexer) != T_STRING) { lex_error (lexer, _("Expecting macro body or !ENDDEFINE")); + ds_destroy (&body); goto error; } - if (allocated_body >= m->n_body) - m->body = x2nrealloc (m->body, &allocated_body, sizeof *m->body); - m->body[m->n_body] = ss_xstrdup (lex_tokss (lexer)); + ds_put_substring (&body, lex_tokss (lexer)); + ds_put_byte (&body, '\n'); lex_get (lexer); } + m->body = ds_ss (&body); return CMD_SUCCESS; diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c index 1283c1b784..bdbce65a8a 100644 --- a/src/language/lexer/lexer.c +++ b/src/language/lexer/lexer.c @@ -40,6 +40,7 @@ #include "libpspp/i18n.h" #include "libpspp/ll.h" #include "libpspp/message.h" +#include "libpspp/macro.h" #include "libpspp/misc.h" #include "libpspp/str.h" #include "libpspp/u8-istream.h" @@ -77,6 +78,7 @@ struct lex_source { struct ll ll; /* In lexer's list of sources. */ struct lex_reader *reader; + struct lexer *lexer; struct segmenter segmenter; bool eof; /* True if T_STOP was read from 'reader'. */ @@ -113,7 +115,7 @@ static const struct lex_token *lex_next__ (const struct lexer *, int n); static void lex_source_push_endcmd__ (struct lex_source *); static void lex_source_pop__ (struct lex_source *); -static bool lex_source_get__ (const struct lex_source *); +static bool lex_source_get (const struct lex_source *); static void lex_source_error_valist (struct lex_source *, int n0, int n1, const char *format, va_list) PRINTF_FORMAT (4, 0); @@ -174,7 +176,7 @@ void lex_include (struct lexer *lexer, struct lex_reader *reader) { assert (ll_is_empty (&lexer->sources) || lex_token (lexer) == T_ENDCMD); - ll_push_head (&lexer->sources, &lex_source_create (reader)->ll); + ll_push_head (&lexer->sources, &lex_source_create (lexer, reader)->ll); } /* Appends READER to LEXER, so that it will be read after all other current @@ -182,7 +184,7 @@ lex_include (struct lexer *lexer, struct lex_reader *reader) void lex_append (struct lexer *lexer, struct lex_reader *reader) { - ll_push_tail (&lexer->sources, &lex_source_create (reader)->ll); + ll_push_tail (&lexer->sources, &lex_source_create (lexer, reader)->ll); } /* Advancing. */ @@ -226,7 +228,7 @@ lex_get (struct lexer *lexer) lex_source_pop__ (src); while (deque_is_empty (&src->deque)) - if (!lex_source_get__ (src)) + if (!lex_source_get (src)) { lex_source_destroy (src); src = lex_source__ (lexer); @@ -864,6 +866,12 @@ lex_next__ (const struct lexer *lexer_, int n) } } +static const struct token * +lex_source_front (const struct lex_source *src) +{ + return &src->tokens[deque_front (&src->deque, 0)].token; +} + static const struct lex_token * lex_source_next__ (const struct lex_source *src, int n) { @@ -871,14 +879,12 @@ lex_source_next__ (const struct lex_source *src, int n) { if (!deque_is_empty (&src->deque)) { - struct lex_token *front; - - front = &src->tokens[deque_front (&src->deque, 0)]; - if (front->token.type == T_STOP || front->token.type == T_ENDCMD) + const struct token *front = lex_source_front (src); + if (front->type == T_STOP || front->type == T_ENDCMD) return front; } - lex_source_get__ (src); + lex_source_get (src); } return &src->tokens[deque_back (&src->deque, n)]; @@ -1426,16 +1432,11 @@ lex_get_error (struct lex_source *src, const char *format, ...) } /* Attempts to append an additional token into SRC's deque, reading more from - the underlying lex_reader if necessary. Returns true if successful, false - if the deque already represents (a suffix of) the whole lex_reader's - contents, */ + the underlying lex_reader if necessary. Returns true if a new token was + added to SRC's deque, false otherwise. */ static bool -lex_source_get__ (const struct lex_source *src_) +lex_source_try_get (struct lex_source *src) { - struct lex_source *src = CONST_CAST (struct lex_source *, src_); - if (src->eof) - return false; - /* State maintained while scanning tokens. Usually we only need a single state, but scanner_push() can return SCAN_SAVE to indicate that the state needs to be saved and possibly restored later with SCAN_BACK. */ @@ -1566,59 +1567,101 @@ lex_source_get__ (const struct lex_source *src_) switch (token->token.type) { default: - break; + return true; case T_STOP: token->token.type = T_ENDCMD; src->eof = true; - break; + return true; case SCAN_BAD_HEX_LENGTH: lex_get_error (src, _("String of hex digits has %d characters, which " "is not a multiple of 2"), (int) token->token.number); - break; + return false; case SCAN_BAD_HEX_DIGIT: case SCAN_BAD_UNICODE_DIGIT: lex_get_error (src, _("`%c' is not a valid hex digit"), (int) token->token.number); - break; + return false; case SCAN_BAD_UNICODE_LENGTH: lex_get_error (src, _("Unicode string contains %d bytes, which is " "not in the valid range of 1 to 8 bytes"), (int) token->token.number); - break; + return false; case SCAN_BAD_UNICODE_CODE_POINT: lex_get_error (src, _("U+%04X is not a valid Unicode code point"), (int) token->token.number); - break; + return false; case SCAN_EXPECTED_QUOTE: lex_get_error (src, _("Unterminated string constant")); - break; + return false; case SCAN_EXPECTED_EXPONENT: lex_get_error (src, _("Missing exponent following `%s'"), token->token.string.string); - break; + return false; case SCAN_UNEXPECTED_CHAR: { char c_name[16]; lex_get_error (src, _("Bad character %s in input"), uc_name (token->token.number, c_name)); + return false; } - break; case SCAN_SKIP: lex_source_pop_front (src); - break; + return false; } - return true; + NOT_REACHED (); +} + +static bool +lex_source_get__ (struct lex_source *src) +{ + for (;;) ( + if (src->eof) + return false; + else if (lex_source_try_get (src)) + return true; + } +} + +static bool +lex_source_get (const struct lex_source *src_) +{ + struct lex_source *src = CONST_CAST (struct lex_source *, src_); + + if (!lex_source_get (src)) + return false; + + struct macro_expander *me = macro_expander_create (src->lexer, + lex_source_front (src)); + if (!me) + return true; + + for (;;) + { + if (!lex_source_get (src)) + { + /* This should not be reachable because we always get a T_STOP at the + end of input and the macro_expander should always terminate + expansion on T_STOP. */ + NOT_REACHED (); + } + + int retval = macro_expander_add (me, lex_source_front (src)); + + + } + + } static void @@ -1633,13 +1676,14 @@ lex_source_push_endcmd__ (struct lex_source *src) } static struct lex_source * -lex_source_create (struct lex_reader *reader) +lex_source_create (struct lexer *lexer, struct lex_reader *reader) { struct lex_source *src; src = xzalloc (sizeof *src); src->reader = reader; segmenter_init (&src->segmenter, reader->syntax); + src->lexer = lexer; src->tokens = deque_init (&src->deque, 4, sizeof *src->tokens); lex_source_push_endcmd__ (src); diff --git a/src/language/lexer/macro.c b/src/language/lexer/macro.c index 004714df4a..98320ae6d9 100644 --- a/src/language/lexer/macro.c +++ b/src/language/lexer/macro.c @@ -20,9 +20,12 @@ #include +#include "language/lexer/segment.h" +#include "language/lexer/scan.h" #include "libpspp/assertion.h" #include "libpspp/i18n.h" #include "libpspp/message.h" +#include "libpspp/str.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -60,9 +63,7 @@ macro_destroy (struct macro *m) } } free (m->params); - for (size_t i = 0; i < m->n_body; i++) - free (m->body[i]); - free (m->body); + ss_dealloc (&m->body); free (m); } @@ -174,20 +175,39 @@ me_add_start (struct macro_expander *me, const struct token *token) return me_next_arg (me); } +static int +me_error (struct macro_expander *me) +{ + me->state = ME_START; + return -1; +} + static int me_add_arg (struct macro_expander *me, const struct token *token) { + const struct macro_param *p = &me->macro->params[me->arg_index]; + if (token->type == T_STOP) + { + char *param_name = (p->name + ? xstrdup (p->name) + : xasprintf ("%zu", me->arg_index)); + msg (SE, _("Unexpected end of file reading argument %s " + "to macro %s."), param_name, me->macro->name); + free (param_name); + + return me_error (me); + } + me->n_tokens++; - struct tokens **ap = &me->args[me->arg_index]; - if (!*ap) - *ap = xzalloc (sizeof **ap); - struct tokens *a = *ap; - const struct macro_param *p = &me->macro->params[me->arg_index]; + struct tokens **argp = &me->args[me->arg_index]; + if (!*argp) + *argp = xzalloc (sizeof **argp); + struct tokens *arg = *argp; if (p->arg_type == ARG_N_TOKENS) { - tokens_add (a, token); - if (a->n >= p->n_tokens) + tokens_add (arg, token); + if (arg->n >= p->n_tokens) return me_next_arg (me); return 0; } @@ -195,7 +215,7 @@ me_add_arg (struct macro_expander *me, const struct token *token) { if (token->type == T_ENDCMD || token->type == T_STOP) return me_next_arg (me); - tokens_add (a, token); + tokens_add (arg, token); return 0; } else @@ -204,18 +224,11 @@ me_add_arg (struct macro_expander *me, const struct token *token) = p->arg_type == ARG_CMDEND ? &p->charend : &p->enclose[1]; if (token_equal (token, end)) return me_next_arg (me); - tokens_add (a, token); + tokens_add (arg, token); return 0; } } -static int -me_error (struct macro_expander *me) -{ - me->state = ME_START; - return -1; -} - static int me_expected (struct macro_expander *me, const struct token *token, const struct token *wanted) @@ -229,7 +242,7 @@ me_expected (struct macro_expander *me, const struct token *token, actual = xstrdup (""); char *expected = token_to_string (wanted); msg (SE, _("Found `%s' while expecting `%s' reading argument %s " - "in call to macro %s."), + "to macro %s."), actual, expected, param_name, me->macro->name); free (expected); free (actual); @@ -297,6 +310,19 @@ me_equals (struct macro_expander *me, const struct token *token) return me_expected (me, token, &equals); } +/* Adds TOKEN to the collection of tokens in ME that potentially need to be + macro expanded. + + Return values: + + * -1: The tokens added do not actually invoke a macro. The caller should + consume the first token without expanding it. + + * 0: The macro expander needs more tokens, for macro arguments or to decide + whether this is actually a macro invocation. The caller should call + macro_expander_add() again with the next token. + + * >0: Expand the given number of tokens. */ int macro_expander_add (struct macro_expander *me, const struct token *token) { @@ -321,3 +347,54 @@ macro_expander_add (struct macro_expander *me, const struct token *token) NOT_REACHED (); } } + +void +macro_expander_get_expansion (struct macro_expander *me, struct tokens *exp) +{ + struct state + { + struct segmenter segmenter; + struct substring body; + }; + + struct state state; + segmenter_init (&state.segmenter, SEG_MODE_INTERACTIVE /*XXX*/); + state.body = me->macro->body; + + struct state saved = state; + + struct token token = { .type = T_STOP }; + + while (state.body.length > 0) + { + struct scanner scanner; + scanner_init (&scanner, &token); + + for (;;) + { + enum segment_type type; + int seg_len = segmenter_push (&state.segmenter, state.body.string, + state.body.length, true, &type); + assert (seg_len >= 0); + + struct substring segment = ss_head (state.body, seg_len); + ss_advance (&state.body, seg_len); + + enum scan_result result = scanner_push (&scanner, type, segment, &token); + if (result == SCAN_SAVE) + saved = state; + else if (result == SCAN_BACK) + { + state = saved; + break; + } + else if (result == SCAN_DONE) + break; + } + + /* We have a token in 'token'. */ + tokens_add (exp, &token); + token_destroy (&token); + } +} + diff --git a/src/language/lexer/macro.h b/src/language/lexer/macro.h index ddd65c8862..e829f8acee 100644 --- a/src/language/lexer/macro.h +++ b/src/language/lexer/macro.h @@ -54,8 +54,7 @@ struct macro struct macro_param *params; size_t n_params; - char **body; - size_t n_body; + struct substring body; }; void macro_destroy (struct macro *); @@ -68,19 +67,12 @@ struct macro_set const struct macro *macro_set_find (const struct macro_set *, const char *); -struct macro_expander *macro_expander_create (const struct macro_set *); +struct macro_expander *macro_expander_create (const struct macro_set *, + const struct token *); void macro_expander_destroy (struct macro_expander *); -/* Add one token to the input to macro-expand. Returns: - - -1: Advance one token without change. - 0: Needs more tokens. - >0: Expand the given number of tokens. -*/ int macro_expander_add (struct macro_expander *, const struct token *); -int macro_expander_add_eof (struct macro_expander *); -void macro_expander_get_expansion (struct macro_expander *, - struct token **tokens, size_t *n); +void macro_expander_get_expansion (struct macro_expander *, struct tokens *); #endif /* macro.h */ diff --git a/src/language/lexer/token.c b/src/language/lexer/token.c index 454bd51868..12ab3d8b08 100644 --- a/src/language/lexer/token.c +++ b/src/language/lexer/token.c @@ -176,7 +176,7 @@ token_to_string (const struct token *token) return string_representation (token->string); default: - return xstrdup_if_nonnull (token_type_to_name (token->type)); + return xstrdup_if_nonnull (token_type_to_string (token->type)); } } -- 2.30.2