From: Ben Pfaff Date: Sun, 2 May 2021 20:02:41 +0000 (-0700) Subject: Most basic macros work. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1fbd8d1dcf607e05f04798c498786f5fda6796dc;p=pspp Most basic macros work. --- diff --git a/src/language/control/define.c b/src/language/control/define.c index 3f23ec0b10..5532d60b8b 100644 --- a/src/language/control/define.c +++ b/src/language/control/define.c @@ -68,6 +68,7 @@ parse_quoted_token (struct lexer *lexer, struct token *token) lex_error (lexer, _("String must contain exactly one token.")); return false; } + lex_get (lexer); return true; } @@ -111,11 +112,14 @@ cmd_define (struct lexer *lexer, struct dataset *ds UNUSED) { n_keywords++; - if (!lex_force_id (lexer) || !lex_force_match (lexer, T_EQUALS)) + if (!lex_force_id (lexer)) goto error; p->name = ss_xstrdup (lex_tokss (lexer)); lex_get (lexer); + + if (!lex_force_match (lexer, T_EQUALS)) + goto error; } /* Parse default value. */ @@ -202,6 +206,8 @@ cmd_define (struct lexer *lexer, struct dataset *ds UNUSED) } m->body = ds_ss (&body); + lex_define_macro (lexer, m); + return CMD_SUCCESS; error: diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c index bdbce65a8a..1b5ba2244f 100644 --- a/src/language/lexer/lexer.c +++ b/src/language/lexer/lexer.c @@ -31,6 +31,7 @@ #include #include "language/command.h" +#include "language/lexer/macro.h" #include "language/lexer/scan.h" #include "language/lexer/segment.h" #include "language/lexer/token.h" @@ -40,7 +41,6 @@ #include "libpspp/i18n.h" #include "libpspp/ll.h" #include "libpspp/message.h" -#include "libpspp/macro.h" #include "libpspp/misc.h" #include "libpspp/str.h" #include "libpspp/u8-istream.h" @@ -68,6 +68,7 @@ struct lex_token size_t token_len; /* Length of source for token in bytes. */ size_t line_pos; /* Start of line containing token_pos. */ int first_line; /* Line number at token_pos. */ + bool from_macro; }; /* A source of tokens, corresponding to a syntax file. @@ -101,13 +102,15 @@ struct lex_source struct lex_token *tokens; /* Lookahead tokens for parser. */ }; -static struct lex_source *lex_source_create (struct lex_reader *); +static struct lex_source *lex_source_create (struct lexer *, + struct lex_reader *); static void lex_source_destroy (struct lex_source *); /* Lexer. */ struct lexer { struct ll_list sources; /* Contains "struct lex_source"s. */ + struct macro_set *macros; }; static struct lex_source *lex_source__ (const struct lexer *); @@ -150,8 +153,11 @@ lex_reader_set_file_name (struct lex_reader *reader, const char *file_name) struct lexer * lex_create (void) { - struct lexer *lexer = xzalloc (sizeof *lexer); - ll_init (&lexer->sources); + struct lexer *lexer = xmalloc (sizeof *lexer); + *lexer = (struct lexer) { + .sources = LL_INITIALIZER (lexer->sources), + .macros = macro_set_create (), + }; return lexer; } @@ -165,10 +171,19 @@ lex_destroy (struct lexer *lexer) ll_for_each_safe (source, next, struct lex_source, ll, &lexer->sources) lex_source_destroy (source); + macro_set_destroy (lexer->macros); free (lexer); } } +/* Adds M to LEXER's set of macros. M replaces any existing macro with the + same name. Takes ownership of M. */ +void +lex_define_macro (struct lexer *lexer, struct macro *m) +{ + macro_set_add (lexer->macros, m); +} + /* Inserts READER into LEXER so that the next token read by LEXER comes from READER. Before the caller, LEXER must either be empty or at a T_ENDCMD token. */ @@ -866,10 +881,10 @@ lex_next__ (const struct lexer *lexer_, int n) } } -static const struct token * +static const struct lex_token * lex_source_front (const struct lex_source *src) { - return &src->tokens[deque_front (&src->deque, 0)].token; + return &src->tokens[deque_front (&src->deque, 0)]; } static const struct lex_token * @@ -879,8 +894,8 @@ lex_source_next__ (const struct lex_source *src, int n) { if (!deque_is_empty (&src->deque)) { - const struct token *front = lex_source_front (src); - if (front->type == T_STOP || front->type == T_ENDCMD) + const struct lex_token *front = lex_source_front (src); + if (front->token.type == T_STOP || front->token.type == T_ENDCMD) return front; } @@ -1381,6 +1396,16 @@ lex_source_error_valist (struct lex_source *src, int n0, int n1, token = lex_source_next__ (src, n0); if (token->token.type == T_ENDCMD) ds_put_cstr (&s, _("Syntax error at end of command")); + else if (token->from_macro) + { + /* XXX this isn't ideal, we should get the actual syntax */ + char *syntax = token_to_string (&token->token); + if (syntax) + ds_put_format (&s, _("Syntax error at `%s'"), syntax); + else + ds_put_cstr (&s, _("Syntax error")); + free (syntax); + } else { struct substring syntax = lex_source_get_syntax__ (src, n0, n1); @@ -1625,11 +1650,12 @@ lex_source_try_get (struct lex_source *src) static bool lex_source_get__ (struct lex_source *src) { - for (;;) ( - if (src->eof) - return false; - else if (lex_source_try_get (src)) - return true; + for (;;) + { + if (src->eof) + return false; + else if (lex_source_try_get (src)) + return true; } } @@ -1638,17 +1664,16 @@ lex_source_get (const struct lex_source *src_) { struct lex_source *src = CONST_CAST (struct lex_source *, src_); - if (!lex_source_get (src)) + size_t old_count = deque_count (&src->deque); + if (!lex_source_get__ (src)) return false; - struct macro_expander *me = macro_expander_create (src->lexer, - lex_source_front (src)); - if (!me) - return true; - - for (;;) + struct macro_expander *me; + int retval = macro_expander_create (src->lexer->macros, + &lex_source_front (src)->token, &me); + while (!retval) { - if (!lex_source_get (src)) + if (!lex_source_get__ (src)) { /* This should not be reachable because we always get a T_STOP at the end of input and the macro_expander should always terminate @@ -1656,12 +1681,35 @@ lex_source_get (const struct lex_source *src_) NOT_REACHED (); } - int retval = macro_expander_add (me, lex_source_front (src)); - + retval = macro_expander_add (me, &lex_source_front (src)->token); + } + if (retval < 0) + { + /* XXX handle case where there's a macro invocation starting from some + later token we've already obtained */ + macro_expander_destroy (me); + return true; + } + + /* XXX handle case where the macro invocation doesn't use all the tokens */ + while (deque_count (&src->deque) > old_count) + lex_source_pop_front (src); + struct tokens expansion = { .tokens = NULL }; + macro_expander_get_expansion (me, &expansion); + macro_expander_destroy (me); + + for (size_t i = 0; i < expansion.n; i++) + { + *lex_push_token__ (src) = (struct lex_token) { + .token = expansion.tokens[i], + .from_macro = true, + /* XXX the rest */ + }; } - + free (expansion.tokens); + return true; } static void diff --git a/src/language/lexer/lexer.h b/src/language/lexer/lexer.h index caf5750331..5aa82e75fb 100644 --- a/src/language/lexer/lexer.h +++ b/src/language/lexer/lexer.h @@ -29,6 +29,7 @@ #include "libpspp/prompt.h" struct lexer; +struct macro; /* Handling of errors. */ enum lex_error_mode @@ -90,6 +91,9 @@ struct lex_reader *lex_reader_for_substring_nocopy (struct substring, const char struct lexer *lex_create (void); void lex_destroy (struct lexer *); +/* Macros. */ +void lex_define_macro (struct lexer *, struct macro *); + /* Files. */ void lex_include (struct lexer *, struct lex_reader *); void lex_append (struct lexer *, struct lex_reader *); diff --git a/src/language/lexer/macro.c b/src/language/lexer/macro.c index 98320ae6d9..b94a04ad7f 100644 --- a/src/language/lexer/macro.c +++ b/src/language/lexer/macro.c @@ -67,19 +67,69 @@ macro_destroy (struct macro *m) free (m); } +struct macro_set * +macro_set_create (void) +{ + struct macro_set *set = xmalloc (sizeof *set); + *set = (struct macro_set) { + .macros = HMAP_INITIALIZER (set->macros), + }; + return set; +} + +void +macro_set_destroy (struct macro_set *set) +{ + if (!set) + return; + + struct macro *macro, *next; + HMAP_FOR_EACH_SAFE (macro, next, struct macro, hmap_node, &set->macros) + { + hmap_delete (&set->macros, ¯o->hmap_node); + macro_destroy (macro); + } + hmap_destroy (&set->macros); + free (set); +} + +static unsigned int +hash_macro_name (const char *name) +{ + return utf8_hash_case_string (name, 0); +} + +static struct macro * +macro_set_find__ (struct macro_set *set, const char *name) +{ + struct macro *macro; + HMAP_FOR_EACH_WITH_HASH (macro, struct macro, hmap_node, + hash_macro_name (name), &set->macros) + if (!utf8_strcasecmp (macro->name, name)) + return macro; + + return NULL; +} + const struct macro * macro_set_find (const struct macro_set *set, const char *name) { - struct macro *macro; + return macro_set_find__ (CONST_CAST (struct macro_set *, set), name); +} - HMAP_FOR_EACH_WITH_HASH (macro, struct macro, hmap_node, - utf8_hash_case_string (name, 0), &set->macros) +/* Adds M to SET. M replaces any existing macro with the same name. Takes + ownership of M. */ +void +macro_set_add (struct macro_set *set, struct macro *m) +{ + struct macro *victim = macro_set_find__ (set, m->name); + if (victim) { - if (!utf8_strcasecmp (macro->name, name)) - return macro; + hmap_delete (&set->macros, &victim->hmap_node); + macro_destroy (victim); } - return NULL; + hmap_insert (&set->macros, &m->hmap_node, hash_macro_name (m->name)); } enum me_state @@ -310,19 +360,67 @@ me_equals (struct macro_expander *me, const struct token *token) return me_expected (me, token, &equals); } +int +macro_expander_create (const struct macro_set *macros, + const struct token *token, + struct macro_expander **mep) +{ + *mep = NULL; + if (macro_set_is_empty (macros)) + return -1; + if (token->type != T_ID && token->type != T_MACRO_ID) + return -1; + + const struct macro *macro = macro_set_find (macros, token->string.string); + if (!macro) + return -1; + + struct macro_expander *me = xmalloc (sizeof *me); + *me = (struct macro_expander) { + .macros = macros, + + .state = ME_START, + .n_tokens = 1, + + .macro = macro, + .args = xcalloc (macro->n_params, sizeof *me->args), + .arg_index = 0, + }; + *mep = me; + return me_next_arg (me); +} + +void +macro_expander_destroy (struct macro_expander *me) +{ + if (!me) + return; + + for (size_t i = 0; i < me->macro->n_params; i++) + if (me->args[i]) + { + tokens_uninit (me->args[i]); + free (me->args[i]); + } + free (me->args); + free (me); +} + /* Adds TOKEN to the collection of tokens in ME that potentially need to be macro expanded. - Return values: - - * -1: The tokens added do not actually invoke a macro. The caller should - consume the first token without expanding it. + Returns -1 if the tokens added do not actually invoke a macro. The caller + should consume the first token without expanding it. - * 0: The macro expander needs more tokens, for macro arguments or to decide - whether this is actually a macro invocation. The caller should call - macro_expander_add() again with the next token. + Returns 0 if the macro expander needs more tokens, for macro arguments or to + decide whether this is actually a macro invocation. The caller should call + macro_expander_add() again with the next token. - * >0: Expand the given number of tokens. */ + Returns a positive number to indicate that the returned number of tokens + invoke a macro. The number returned might be less than the number of tokens + added because it can take a few tokens of lookahead to determine whether the + macro invocation is finished. The caller should call + macro_expander_get_expansion() to obtain the expansion. */ int macro_expander_add (struct macro_expander *me, const struct token *token) { @@ -379,22 +477,32 @@ macro_expander_get_expansion (struct macro_expander *me, struct tokens *exp) struct substring segment = ss_head (state.body, seg_len); ss_advance (&state.body, seg_len); + printf ("segment \"%.*s\" %s token.type=%d\n", (int) segment.length, segment.string, segment_type_to_string (type), token.type); enum scan_result result = scanner_push (&scanner, type, segment, &token); if (result == SCAN_SAVE) saved = state; else if (result == SCAN_BACK) { + printf ("back\n"); state = saved; break; } else if (result == SCAN_DONE) - break; + { + printf ("done\n"); + break; + } } /* We have a token in 'token'. */ - tokens_add (exp, &token); + printf ("add token %d %s\n", token.type, token_type_to_name (token.type)); + if (is_scan_type (token.type)) + { + /* XXX report error if it's not SCAN_SKIP */ + } + else + tokens_add (exp, &token); token_destroy (&token); } } - diff --git a/src/language/lexer/macro.h b/src/language/lexer/macro.h index e829f8acee..8b0a7bb1ad 100644 --- a/src/language/lexer/macro.h +++ b/src/language/lexer/macro.h @@ -24,6 +24,8 @@ #include "libpspp/str.h" #include "language/lexer/token.h" +struct macro_expander; + struct macro_param { char *name; /* NULL for a positional parameter. */ @@ -64,11 +66,23 @@ struct macro_set struct hmap macros; }; +struct macro_set *macro_set_create (void); +void macro_set_destroy (struct macro_set *); const struct macro *macro_set_find (const struct macro_set *, const char *); - -struct macro_expander *macro_expander_create (const struct macro_set *, - const struct token *); +void macro_set_add (struct macro_set *, struct macro *); + +static inline bool +macro_set_is_empty (const struct macro_set *set) +{ + return hmap_is_empty (&set->macros); +} + +/* Macro expansion. */ + +int macro_expander_create (const struct macro_set *, + const struct token *, + struct macro_expander **); void macro_expander_destroy (struct macro_expander *); int macro_expander_add (struct macro_expander *, const struct token *);