From: Ben Pfaff Date: Tue, 23 Mar 2021 14:14:48 +0000 (-0700) Subject: Work on DEFINE command. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp;a=commitdiff_plain;h=c94be33beb7085e1cbb1ec47f0e3a49c896d443b Work on DEFINE command. --- diff --git a/src/language/command.def b/src/language/command.def index a97f9b83e7..12f30c7c03 100644 --- a/src/language/command.def +++ b/src/language/command.def @@ -18,6 +18,7 @@ DEF_CMD (S_ANY, F_ENHANCED, "CLOSE FILE HANDLE", cmd_close_file_handle) DEF_CMD (S_ANY, 0, "CACHE", cmd_cache) DEF_CMD (S_ANY, 0, "CD", cmd_cd) +DEF_CMD (S_ANY, 0, "DEFINE", cmd_define) DEF_CMD (S_ANY, 0, "DO REPEAT", cmd_do_repeat) DEF_CMD (S_ANY, 0, "END REPEAT", cmd_end_repeat) DEF_CMD (S_ANY, 0, "ECHO", cmd_echo) @@ -188,7 +189,6 @@ UNIMPL_CMD ("CSTABULATE", "Tabulate complex samples") UNIMPL_CMD ("CTABLES", "Display complex samples") UNIMPL_CMD ("CURVEFIT", "Fit curve to line plot") UNIMPL_CMD ("DATE", "Create time series data") -UNIMPL_CMD ("DEFINE", "Syntax macros") UNIMPL_CMD ("DETECTANOMALY", "Find unusual cases") UNIMPL_CMD ("DISCRIMINANT", "Linear discriminant analysis") UNIMPL_CMD ("EDIT", "obsolete") diff --git a/src/language/control/automake.mk b/src/language/control/automake.mk index 909acd13db..9d09687c81 100644 --- a/src/language/control/automake.mk +++ b/src/language/control/automake.mk @@ -20,6 +20,7 @@ language_control_sources = \ src/language/control/control-stack.c \ src/language/control/control-stack.h \ + src/language/control/define.c \ src/language/control/do-if.c \ src/language/control/loop.c \ src/language/control/repeat.c \ diff --git a/src/language/control/define.c b/src/language/control/define.c new file mode 100644 index 0000000000..686685fc9c --- /dev/null +++ b/src/language/control/define.c @@ -0,0 +1,223 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2021 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include + +#include "language/command.h" +#include "language/lexer/lexer.h" +#include "language/lexer/macro.h" +#include "language/lexer/scan.h" +#include "language/lexer/token.h" + +#include "gl/xalloc.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) + +static bool +force_macro_id (struct lexer *lexer) +{ + return lex_token (lexer) == T_MACRO_ID || lex_force_id (lexer); +} + +static bool +match_macro_id (struct lexer *lexer, const char *id) +{ + if (id[0] != '!') + return lex_match_id (lexer, id); + else if (lex_token (lexer) == T_MACRO_ID + && ss_equals_case (lex_tokss (lexer), ss_cstr (id))) + { + lex_get (lexer); + return true; + } + else + return false; +} + +static bool +parse_quoted_token (struct lexer *lexer, struct token *token) +{ + if (!lex_force_string (lexer)) + return false; + + struct substring s = lex_tokss (lexer); + struct string_lexer slex; + string_lexer_init (&slex, s.string, s.length, SEG_MODE_INTERACTIVE); + struct token another_token; + if (!string_lexer_next (&slex, token) + || string_lexer_next (&slex, &another_token)) + { + token_uninit (token); + token_uninit (&another_token); + lex_error (lexer, _("String must contain exactly one token.")); + return false; + } + lex_get (lexer); + return true; +} + +int +cmd_define (struct lexer *lexer, struct dataset *ds UNUSED) +{ + if (!force_macro_id (lexer)) + return CMD_FAILURE; + + /* Parse macro name. */ + struct macro *m = xmalloc (sizeof *m); + *m = (struct macro) { .name = ss_xstrdup (lex_tokss (lexer)) }; + lex_get (lexer); + + if (!lex_force_match (lexer, T_LPAREN)) + goto error; + + size_t allocated_params = 0; + while (!lex_match (lexer, T_RPAREN)) + { + if (m->n_params >= allocated_params) + m->params = x2nrealloc (m->params, &allocated_params, + sizeof *m->params); + + size_t param_index = m->n_params++; + struct macro_param *p = &m->params[param_index]; + *p = (struct macro_param) { .expand_arg = true }; + + /* Parse parameter name. */ + if (match_macro_id (lexer, "!POSITIONAL")) + { + if (param_index > 0 && !m->params[param_index - 1].positional) + { + lex_error (lexer, _("Positional parameters must precede " + "keyword parameters.")); + goto error; + } + + p->positional = true; + p->name = xasprintf ("!%zu", param_index + 1); + } + else + { + if (!lex_force_id (lexer)) + goto error; + + p->positional = false; + p->name = xasprintf ("!%s", lex_tokcstr (lexer)); + lex_get (lexer); + + if (!lex_force_match (lexer, T_EQUALS)) + goto error; + } + + /* Parse default value. */ + if (match_macro_id (lexer, "!DEFAULT")) + { + if (!lex_force_match (lexer, T_LPAREN)) + goto error; + + /* XXX Should this handle balanced inner parentheses? */ + while (!lex_match (lexer, T_RPAREN)) + { + if (lex_token (lexer) == T_ENDCMD) + { + lex_error_expecting (lexer, ")"); + goto error; + } + const struct macro_token mt = { + .token = *lex_next (lexer, 0), + .representation = lex_next_representation (lexer, 0, 0), + }; + macro_tokens_add (&p->def, &mt); + lex_get (lexer); + } + } + + if (match_macro_id (lexer, "!NOEXPAND")) + p->expand_arg = false; + + if (match_macro_id (lexer, "!TOKENS")) + { + if (!lex_force_match (lexer, T_LPAREN) + || !lex_force_int_range (lexer, "!TOKENS", 1, INT_MAX)) + goto error; + p->arg_type = ARG_N_TOKENS; + p->n_tokens = lex_integer (lexer); + lex_get (lexer); + if (!lex_force_match (lexer, T_RPAREN)) + goto error; + } + else if (match_macro_id (lexer, "!CHAREND")) + { + p->arg_type = ARG_CHAREND; + p->charend = (struct token) { .type = T_STOP }; + + if (!lex_force_match (lexer, T_LPAREN) + || !parse_quoted_token (lexer, &p->charend) + || !lex_force_match (lexer, T_RPAREN)) + goto error; + } + else if (match_macro_id (lexer, "!ENCLOSE")) + { + p->arg_type = ARG_ENCLOSE; + p->enclose[0] = p->enclose[1] = (struct token) { .type = T_STOP }; + + if (!lex_force_match (lexer, T_LPAREN) + || !parse_quoted_token (lexer, &p->enclose[0]) + || !lex_force_match (lexer, T_COMMA) + || !parse_quoted_token (lexer, &p->enclose[1]) + || !lex_force_match (lexer, T_RPAREN)) + goto error; + } + else if (match_macro_id (lexer, "!CMDEND")) + p->arg_type = ARG_CMDEND; + else + { + lex_error_expecting (lexer, "!TOKENS", "!CHAREND", + "!ENCLOSE", "!CMDEND"); + goto error; + } + + if (lex_token (lexer) != T_RPAREN && !lex_force_match (lexer, T_SLASH)) + goto error; + } + + struct string body = DS_EMPTY_INITIALIZER; + while (!match_macro_id (lexer, "!ENDDEFINE")) + { + if (lex_token (lexer) != T_STRING) + { + lex_error (lexer, _("Expecting macro body or !ENDDEFINE")); + ds_destroy (&body); + goto error; + } + + ds_put_substring (&body, lex_tokss (lexer)); + ds_put_byte (&body, '\n'); + lex_get (lexer); + } + + macro_tokens_from_string (&m->body, body.ss, lex_get_syntax_mode (lexer)); + ds_destroy (&body); + + lex_define_macro (lexer, m); + + return CMD_SUCCESS; + +error: + macro_destroy (m); + return CMD_FAILURE; +} diff --git a/src/language/control/repeat.c b/src/language/control/repeat.c index 118e8d3ccd..0438fa1bd8 100644 --- a/src/language/control/repeat.c +++ b/src/language/control/repeat.c @@ -201,10 +201,7 @@ do_parse_commands (struct substring s, enum segmenter_mode mode, struct hmap *dummies, struct string *outputs, size_t n_outputs) { - struct segmenter segmenter; - - segmenter_init (&segmenter, mode); - + struct segmenter segmenter = SEGMENTER_INIT (mode); while (!ss_is_empty (s)) { enum segment_type type; diff --git a/src/language/lexer/automake.mk b/src/language/lexer/automake.mk index 4387c3dd22..01b3df49c6 100644 --- a/src/language/lexer/automake.mk +++ b/src/language/lexer/automake.mk @@ -24,6 +24,8 @@ language_lexer_sources = \ src/language/lexer/include-path.h \ src/language/lexer/lexer.c \ src/language/lexer/lexer.h \ + src/language/lexer/macro.c \ + src/language/lexer/macro.h \ src/language/lexer/format-parser.c \ src/language/lexer/format-parser.h \ src/language/lexer/scan.c \ diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c index 7f2d0290a6..5ff5099652 100644 --- a/src/language/lexer/lexer.c +++ b/src/language/lexer/lexer.c @@ -31,6 +31,7 @@ #include #include "language/command.h" +#include "language/lexer/macro.h" #include "language/lexer/scan.h" #include "language/lexer/segment.h" #include "language/lexer/token.h" @@ -67,6 +68,7 @@ struct lex_token size_t token_len; /* Length of source for token in bytes. */ size_t line_pos; /* Start of line containing token_pos. */ int first_line; /* Line number at token_pos. */ + bool from_macro; }; /* A source of tokens, corresponding to a syntax file. @@ -77,6 +79,7 @@ struct lex_source { struct ll ll; /* In lexer's list of sources. */ struct lex_reader *reader; + struct lexer *lexer; struct segmenter segmenter; bool eof; /* True if T_STOP was read from 'reader'. */ @@ -99,21 +102,25 @@ struct lex_source struct lex_token *tokens; /* Lookahead tokens for parser. */ }; -static struct lex_source *lex_source_create (struct lex_reader *); +static struct lex_source *lex_source_create (struct lexer *, + struct lex_reader *); static void lex_source_destroy (struct lex_source *); /* Lexer. */ struct lexer { struct ll_list sources; /* Contains "struct lex_source"s. */ + struct macro_set *macros; }; static struct lex_source *lex_source__ (const struct lexer *); +static struct substring lex_source_get_syntax__ (const struct lex_source *, + int n0, int n1); static const struct lex_token *lex_next__ (const struct lexer *, int n); static void lex_source_push_endcmd__ (struct lex_source *); static void lex_source_pop__ (struct lex_source *); -static bool lex_source_get__ (const struct lex_source *); +static bool lex_source_get (const struct lex_source *); static void lex_source_error_valist (struct lex_source *, int n0, int n1, const char *format, va_list) PRINTF_FORMAT (4, 0); @@ -148,8 +155,11 @@ lex_reader_set_file_name (struct lex_reader *reader, const char *file_name) struct lexer * lex_create (void) { - struct lexer *lexer = xzalloc (sizeof *lexer); - ll_init (&lexer->sources); + struct lexer *lexer = xmalloc (sizeof *lexer); + *lexer = (struct lexer) { + .sources = LL_INITIALIZER (lexer->sources), + .macros = macro_set_create (), + }; return lexer; } @@ -163,10 +173,19 @@ lex_destroy (struct lexer *lexer) ll_for_each_safe (source, next, struct lex_source, ll, &lexer->sources) lex_source_destroy (source); + macro_set_destroy (lexer->macros); free (lexer); } } +/* Adds M to LEXER's set of macros. M replaces any existing macro with the + same name. Takes ownership of M. */ +void +lex_define_macro (struct lexer *lexer, struct macro *m) +{ + macro_set_add (lexer->macros, m); +} + /* Inserts READER into LEXER so that the next token read by LEXER comes from READER. Before the caller, LEXER must either be empty or at a T_ENDCMD token. */ @@ -174,7 +193,7 @@ void lex_include (struct lexer *lexer, struct lex_reader *reader) { assert (ll_is_empty (&lexer->sources) || lex_token (lexer) == T_ENDCMD); - ll_push_head (&lexer->sources, &lex_source_create (reader)->ll); + ll_push_head (&lexer->sources, &lex_source_create (lexer, reader)->ll); } /* Appends READER to LEXER, so that it will be read after all other current @@ -182,7 +201,7 @@ lex_include (struct lexer *lexer, struct lex_reader *reader) void lex_append (struct lexer *lexer, struct lex_reader *reader) { - ll_push_tail (&lexer->sources, &lex_source_create (reader)->ll); + ll_push_tail (&lexer->sources, &lex_source_create (lexer, reader)->ll); } /* Advancing. */ @@ -196,7 +215,7 @@ lex_push_token__ (struct lex_source *src) src->tokens = deque_expand (&src->deque, src->tokens, sizeof *src->tokens); token = &src->tokens[deque_push_front (&src->deque)]; - token_init (&token->token); + token->token = (struct token) { .type = T_STOP }; return token; } @@ -226,7 +245,7 @@ lex_get (struct lexer *lexer) lex_source_pop__ (src); while (deque_is_empty (&src->deque)) - if (!lex_source_get__ (src)) + if (!lex_source_get (src)) { lex_source_destroy (src); src = lex_source__ (lexer); @@ -859,13 +878,17 @@ lex_next__ (const struct lexer *lexer_, int n) return lex_source_next__ (src, n); else { - static const struct lex_token stop_token = - { TOKEN_INITIALIZER (T_STOP, 0.0, ""), 0, 0, 0, 0 }; - + static const struct lex_token stop_token = { .token = { .type = T_STOP } }; return &stop_token; } } +static const struct lex_token * +lex_source_front (const struct lex_source *src) +{ + return &src->tokens[deque_front (&src->deque, 0)]; +} + static const struct lex_token * lex_source_next__ (const struct lex_source *src, int n) { @@ -873,14 +896,12 @@ lex_source_next__ (const struct lex_source *src, int n) { if (!deque_is_empty (&src->deque)) { - struct lex_token *front; - - front = &src->tokens[deque_front (&src->deque, 0)]; + const struct lex_token *front = lex_source_front (src); if (front->token.type == T_STOP || front->token.type == T_ENDCMD) return front; } - lex_source_get__ (src); + lex_source_get (src); } return &src->tokens[deque_back (&src->deque, n)]; @@ -945,6 +966,12 @@ lex_next_tokss (const struct lexer *lexer, int n) return lex_next (lexer, n)->string; } +struct substring +lex_next_representation (const struct lexer *lexer, int n0, int n1) +{ + return lex_source_get_syntax__ (lex_source__ (lexer), n0, n1); +} + static bool lex_tokens_match (const struct token *actual, const struct token *expected) { @@ -1160,7 +1187,6 @@ lex_get_encoding (const struct lexer *lexer) return src == NULL ? NULL : src->reader->encoding; } - /* Returns the syntax mode for the syntax file from which the current drawn is drawn. Returns SEG_MODE_AUTO for a T_STOP token or if the command's source does not have line numbers. @@ -1320,16 +1346,24 @@ lex_source__ (const struct lexer *lexer) } static struct substring -lex_source_get_syntax__ (const struct lex_source *src, int n0, int n1) +lex_tokens_get_syntax__ (const struct lex_source *src, + const struct lex_token *token0, + const struct lex_token *token1) { - const struct lex_token *token0 = lex_source_next__ (src, n0); - const struct lex_token *token1 = lex_source_next__ (src, MAX (n0, n1)); size_t start = token0->token_pos; size_t end = token1->token_pos + token1->token_len; return ss_buffer (&src->buffer[start - src->tail], end - start); } +static struct substring +lex_source_get_syntax__ (const struct lex_source *src, int n0, int n1) +{ + return lex_tokens_get_syntax__ (src, + lex_source_next__ (src, n0), + lex_source_next__ (src, MAX (n0, n1))); +} + static void lex_ellipsize__ (struct substring in, char *out, size_t out_size) { @@ -1377,6 +1411,16 @@ lex_source_error_valist (struct lex_source *src, int n0, int n1, token = lex_source_next__ (src, n0); if (token->token.type == T_ENDCMD) ds_put_cstr (&s, _("Syntax error at end of command")); + else if (token->from_macro) + { + /* XXX this isn't ideal, we should get the actual syntax */ + char *syntax = token_to_string (&token->token); + if (syntax) + ds_put_format (&s, _("Syntax error at `%s'"), syntax); + else + ds_put_cstr (&s, _("Syntax error")); + free (syntax); + } else { struct substring syntax = lex_source_get_syntax__ (src, n0, n1); @@ -1428,16 +1472,11 @@ lex_get_error (struct lex_source *src, const char *format, ...) } /* Attempts to append an additional token into SRC's deque, reading more from - the underlying lex_reader if necessary. Returns true if successful, false - if the deque already represents (a suffix of) the whole lex_reader's - contents, */ + the underlying lex_reader if necessary. Returns true if a new token was + added to SRC's deque, false otherwise. */ static bool -lex_source_get__ (const struct lex_source *src_) +lex_source_try_get (struct lex_source *src) { - struct lex_source *src = CONST_CAST (struct lex_source *, src_); - if (src->eof) - return false; - /* State maintained while scanning tokens. Usually we only need a single state, but scanner_push() can return SCAN_SAVE to indicate that the state needs to be saved and possibly restored later with SCAN_BACK. */ @@ -1568,57 +1607,133 @@ lex_source_get__ (const struct lex_source *src_) switch (token->token.type) { default: - break; + return true; case T_STOP: token->token.type = T_ENDCMD; src->eof = true; - break; + return true; case SCAN_BAD_HEX_LENGTH: lex_get_error (src, _("String of hex digits has %d characters, which " "is not a multiple of 2"), (int) token->token.number); - break; + return false; case SCAN_BAD_HEX_DIGIT: case SCAN_BAD_UNICODE_DIGIT: lex_get_error (src, _("`%c' is not a valid hex digit"), (int) token->token.number); - break; + return false; case SCAN_BAD_UNICODE_LENGTH: lex_get_error (src, _("Unicode string contains %d bytes, which is " "not in the valid range of 1 to 8 bytes"), (int) token->token.number); - break; + return false; case SCAN_BAD_UNICODE_CODE_POINT: lex_get_error (src, _("U+%04X is not a valid Unicode code point"), (int) token->token.number); - break; + return false; case SCAN_EXPECTED_QUOTE: lex_get_error (src, _("Unterminated string constant")); - break; + return false; case SCAN_EXPECTED_EXPONENT: lex_get_error (src, _("Missing exponent following `%s'"), token->token.string.string); - break; + return false; case SCAN_UNEXPECTED_CHAR: { char c_name[16]; lex_get_error (src, _("Bad character %s in input"), uc_name (token->token.number, c_name)); + return false; } - break; case SCAN_SKIP: lex_source_pop_front (src); - break; + return false; + } + + NOT_REACHED (); +} + +static bool +lex_source_get__ (struct lex_source *src) +{ + for (;;) + { + if (src->eof) + return false; + else if (lex_source_try_get (src)) + return true; + } +} + +static bool +lex_source_get (const struct lex_source *src_) +{ + struct lex_source *src = CONST_CAST (struct lex_source *, src_); + + size_t old_count = deque_count (&src->deque); + if (!lex_source_get__ (src)) + return false; + + if (!settings_get_mexpand ()) + return true; + + struct macro_expander *me; + int retval = macro_expander_create (src->lexer->macros, + &lex_source_front (src)->token, + &me); + while (!retval) + { + if (!lex_source_get__ (src)) + { + /* This should not be reachable because we always get a T_STOP at the + end of input and the macro_expander should always terminate + expansion on T_STOP. */ + NOT_REACHED (); + } + + const struct lex_token *front = lex_source_front (src); + const struct macro_token mt = { + .token = front->token, + .representation = lex_tokens_get_syntax__ (src, front, front) + }; + retval = macro_expander_add (me, &mt); + } + if (retval < 0) + { + /* XXX handle case where there's a macro invocation starting from some + later token we've already obtained */ + macro_expander_destroy (me); + return true; + } + + /* XXX handle case where the macro invocation doesn't use all the tokens */ + while (deque_count (&src->deque) > old_count) + lex_source_pop_front (src); + + struct macro_tokens expansion = { .n = 0 }; + macro_expander_get_expansion (me, &expansion); + macro_expander_destroy (me); + + for (size_t i = 0; i < expansion.n; i++) + { + *lex_push_token__ (src) = (struct lex_token) { + .token = expansion.mts[i].token, + .from_macro = true, + /* XXX the rest */ + }; + + ss_dealloc (&expansion.mts[i].representation); /* XXX should feed into lexer */ } + free (expansion.mts); return true; } @@ -1635,13 +1750,14 @@ lex_source_push_endcmd__ (struct lex_source *src) } static struct lex_source * -lex_source_create (struct lex_reader *reader) +lex_source_create (struct lexer *lexer, struct lex_reader *reader) { struct lex_source *src; src = xzalloc (sizeof *src); src->reader = reader; segmenter_init (&src->segmenter, reader->syntax); + src->lexer = lexer; src->tokens = deque_init (&src->deque, 4, sizeof *src->tokens); lex_source_push_endcmd__ (src); diff --git a/src/language/lexer/lexer.h b/src/language/lexer/lexer.h index caf5750331..86bb4f2b5d 100644 --- a/src/language/lexer/lexer.h +++ b/src/language/lexer/lexer.h @@ -29,6 +29,7 @@ #include "libpspp/prompt.h" struct lexer; +struct macro; /* Handling of errors. */ enum lex_error_mode @@ -90,6 +91,9 @@ struct lex_reader *lex_reader_for_substring_nocopy (struct substring, const char struct lexer *lex_create (void); void lex_destroy (struct lexer *); +/* Macros. */ +void lex_define_macro (struct lexer *, struct macro *); + /* Files. */ void lex_include (struct lexer *, struct lex_reader *); void lex_append (struct lexer *, struct lex_reader *); @@ -142,6 +146,10 @@ const char *lex_next_tokcstr (const struct lexer *, int n); double lex_next_tokval (const struct lexer *, int n); struct substring lex_next_tokss (const struct lexer *, int n); +/* Token representation. */ +struct substring lex_next_representation (const struct lexer *, + int n0, int n1); + /* Current position. */ int lex_get_first_line_number (const struct lexer *, int n); int lex_get_last_line_number (const struct lexer *, int n); diff --git a/src/language/lexer/macro.c b/src/language/lexer/macro.c new file mode 100644 index 0000000000..a15b7064a5 --- /dev/null +++ b/src/language/lexer/macro.c @@ -0,0 +1,931 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2021 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "language/lexer/macro.h" + +#include +#include + +#include "data/settings.h" +#include "language/lexer/segment.h" +#include "language/lexer/scan.h" +#include "libpspp/assertion.h" +#include "libpspp/i18n.h" +#include "libpspp/message.h" +#include "libpspp/str.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) + +void +macro_token_copy (struct macro_token *dst, const struct macro_token *src) +{ + token_copy (&dst->token, &src->token); + ss_alloc_substring (&dst->representation, src->representation); +} + +void +macro_token_uninit (struct macro_token *mt) +{ + token_uninit (&mt->token); + ss_dealloc (&mt->representation); +} + +void +macro_tokens_copy (struct macro_tokens *dst, const struct macro_tokens *src) +{ + *dst = (struct macro_tokens) { + .mts = xmalloc (src->n * sizeof *dst->mts), + .n = src->n, + .allocated = src->n, + }; + for (size_t i = 0; i < src->n; i++) + macro_token_copy (&dst->mts[i], &src->mts[i]); +} + +void +macro_tokens_uninit (struct macro_tokens *mts) +{ + for (size_t i = 0; i < mts->n; i++) + macro_token_uninit (&mts->mts[i]); + free (mts->mts); +} + +struct macro_token * +macro_tokens_add_uninit (struct macro_tokens *mts) +{ + if (mts->n >= mts->allocated) + mts->mts = x2nrealloc (mts->mts, &mts->allocated, sizeof *mts->mts); + return &mts->mts[mts->n++]; +} + +void +macro_tokens_add (struct macro_tokens *mts, const struct macro_token *mt) +{ + macro_token_copy (macro_tokens_add_uninit (mts), mt); +} + +void +macro_tokens_from_string (struct macro_tokens *mts, const struct substring src, + enum segmenter_mode mode) +{ + struct state + { + struct segmenter segmenter; + struct substring body; + }; + + struct state state = { + .segmenter = SEGMENTER_INIT (mode), + .body = src, + }; + struct state saved = state; + + while (state.body.length > 0) + { + struct macro_token mt = { + .token = { .type = T_STOP }, + .representation = { .string = state.body.string }, + }; + struct token *token = &mt.token; + + struct scanner scanner; + scanner_init (&scanner, token); + + for (;;) + { + enum segment_type type; + int seg_len = segmenter_push (&state.segmenter, state.body.string, + state.body.length, true, &type); + assert (seg_len >= 0); + + struct substring segment = ss_head (state.body, seg_len); + ss_advance (&state.body, seg_len); + + enum scan_result result = scanner_push (&scanner, type, segment, token); + if (result == SCAN_SAVE) + saved = state; + else if (result == SCAN_BACK) + { + state = saved; + break; + } + else if (result == SCAN_DONE) + break; + } + + /* We have a token in 'token'. */ + if (is_scan_type (token->type)) + { + if (token->type != SCAN_SKIP) + { + /* XXX report error */ + } + } + else + { + mt.representation.length = state.body.string - mt.representation.string; + macro_tokens_add (mts, &mt); + } + token_uninit (token); + } +} + +void +macro_tokens_print (const struct macro_tokens *mts, FILE *stream) +{ + for (size_t i = 0; i < mts->n; i++) + token_print (&mts->mts[i].token, stream); +} + +void +macro_destroy (struct macro *m) +{ + if (!m) + return; + + free (m->name); + for (size_t i = 0; i < m->n_params; i++) + { + struct macro_param *p = &m->params[i]; + free (p->name); + + macro_tokens_uninit (&p->def); + + switch (p->arg_type) + { + case ARG_N_TOKENS: + break; + + case ARG_CHAREND: + token_uninit (&p->charend); + break; + + case ARG_ENCLOSE: + token_uninit (&p->enclose[0]); + token_uninit (&p->enclose[1]); + break; + + case ARG_CMDEND: + break; + } + } + free (m->params); + macro_tokens_uninit (&m->body); + free (m); +} + +struct macro_set * +macro_set_create (void) +{ + struct macro_set *set = xmalloc (sizeof *set); + *set = (struct macro_set) { + .macros = HMAP_INITIALIZER (set->macros), + }; + return set; +} + +void +macro_set_destroy (struct macro_set *set) +{ + if (!set) + return; + + struct macro *macro, *next; + HMAP_FOR_EACH_SAFE (macro, next, struct macro, hmap_node, &set->macros) + { + hmap_delete (&set->macros, ¯o->hmap_node); + macro_destroy (macro); + } + hmap_destroy (&set->macros); + free (set); +} + +static unsigned int +hash_macro_name (const char *name) +{ + return utf8_hash_case_string (name, 0); +} + +static struct macro * +macro_set_find__ (struct macro_set *set, const char *name) +{ + struct macro *macro; + HMAP_FOR_EACH_WITH_HASH (macro, struct macro, hmap_node, + hash_macro_name (name), &set->macros) + if (!utf8_strcasecmp (macro->name, name)) + return macro; + + return NULL; +} + +const struct macro * +macro_set_find (const struct macro_set *set, const char *name) +{ + return macro_set_find__ (CONST_CAST (struct macro_set *, set), name); +} + +/* Adds M to SET. M replaces any existing macro with the same name. Takes + ownership of M. */ +void +macro_set_add (struct macro_set *set, struct macro *m) +{ + struct macro *victim = macro_set_find__ (set, m->name); + if (victim) + { + hmap_delete (&set->macros, &victim->hmap_node); + macro_destroy (victim); + } + + hmap_insert (&set->macros, &m->hmap_node, hash_macro_name (m->name)); +} + +enum me_state + { + /* Error state. */ + ME_ERROR, + + /* Accumulating tokens in me->params toward the end of any type of + argument. */ + ME_ARG, + + /* Expecting the opening delimiter of an ARG_ENCLOSE argument. */ + ME_ENCLOSE, + + /* Expecting a keyword for a keyword argument. */ + ME_KEYWORD, + + /* Expecting an equal sign for a keyword argument. */ + ME_EQUALS, + }; + + +struct macro_expander + { + const struct macro_set *macros; + + enum me_state state; + size_t n_tokens; + + const struct macro *macro; + struct macro_tokens **args; + const struct macro_param *param; + }; + +static int +me_finished (struct macro_expander *me) +{ + for (size_t i = 0; i < me->macro->n_params; i++) + if (!me->args[i]) + { + me->args[i] = xmalloc (sizeof *me->args[i]); + macro_tokens_copy (me->args[i], &me->macro->params[i].def); + } + return me->n_tokens; +} + +static int +me_next_arg (struct macro_expander *me) +{ + if (!me->param) + { + assert (!me->macro->n_params); + return me_finished (me); + } + else if (me->param->positional) + { + me->param++; + if (me->param >= &me->macro->params[me->macro->n_params]) + return me_finished (me); + else + { + me->state = me->param->positional ? ME_ARG : ME_KEYWORD; + return 0; + } + } + else + { + for (size_t i = 0; i < me->macro->n_params; i++) + if (!me->args[i]) + { + me->state = ME_KEYWORD; + return 0; + } + return me_finished (me); + } +} + +static int +me_error (struct macro_expander *me) +{ + me->state = ME_ERROR; + return -1; +} + +static int +me_add_arg (struct macro_expander *me, const struct macro_token *mt) +{ + const struct token *token = &mt->token; + if (token->type == T_STOP) + { + msg (SE, _("Unexpected end of file reading argument %s " + "to macro %s."), me->param->name, me->macro->name); + + return me_error (me); + } + + me->n_tokens++; + + const struct macro_param *p = me->param; + struct macro_tokens **argp = &me->args[p - me->macro->params]; + if (!*argp) + *argp = xzalloc (sizeof **argp); + struct macro_tokens *arg = *argp; + if (p->arg_type == ARG_N_TOKENS) + { + macro_tokens_add (arg, mt); + if (arg->n >= p->n_tokens) + return me_next_arg (me); + return 0; + } + else if (p->arg_type == ARG_CMDEND) + { + if (token->type == T_ENDCMD || token->type == T_STOP) + return me_next_arg (me); + macro_tokens_add (arg, mt); + return 0; + } + else + { + const struct token *end + = p->arg_type == ARG_CMDEND ? &p->charend : &p->enclose[1]; + if (token_equal (token, end)) + return me_next_arg (me); + macro_tokens_add (arg, mt); + return 0; + } +} + +static int +me_expected (struct macro_expander *me, const struct macro_token *actual, + const struct token *expected) +{ + const struct substring actual_s + = (actual->representation.length ? actual->representation + : ss_cstr (_(""))); + char *expected_s = token_to_string (expected); + msg (SE, _("Found `%.*s' while expecting `%s' reading argument %s " + "to macro %s."), + (int) actual_s.length, actual_s.string, expected_s, + me->param->name, me->macro->name); + free (expected_s); + + return me_error (me); +} + +static int +me_enclose (struct macro_expander *me, const struct macro_token *mt) +{ + const struct token *token = &mt->token; + me->n_tokens++; + + if (token_equal (&me->param->enclose[0], token)) + { + me->state = ME_ARG; + return 0; + } + + return me_expected (me, mt, &me->param->enclose[0]); +} + +static const struct macro_param * +macro_find_parameter_by_name (const struct macro *m, struct substring name) +{ + for (size_t i = 0; i < m->n_params; i++) + { + const struct macro_param *p = &m->params[i]; + struct substring p_name = ss_cstr (p->name); + if (!utf8_strncasecmp (p_name.string, p_name.length, + name.string, name.length)) + return p; + } + return NULL; +} + +static int +me_keyword (struct macro_expander *me, const struct macro_token *mt) +{ + const struct token *token = &mt->token; + if (token->type != T_ID) + return me_finished (me); + + const struct macro_param *p = macro_find_parameter_by_name (me->macro, + token->string); + if (p) + { + size_t arg_index = p - me->macro->params; + me->param = p; + if (me->args[arg_index]) + { + msg (SE, + _("Argument %s multiply specified in call to macro %s."), + p->name, me->macro->name); + return me_error (me); + } + + me->n_tokens++; + me->state = ME_EQUALS; + return 0; + } + + return me_finished (me); +} + +static int +me_equals (struct macro_expander *me, const struct macro_token *mt) +{ + const struct token *token = &mt->token; + me->n_tokens++; + + if (token->type == T_EQUALS) + { + me->state = ME_ARG; + return 0; + } + + return me_expected (me, mt, &(struct token) { .type = T_EQUALS }); +} + +int +macro_expander_create (const struct macro_set *macros, + const struct token *token, + struct macro_expander **mep) +{ + *mep = NULL; + if (macro_set_is_empty (macros)) + return -1; + if (token->type != T_ID && token->type != T_MACRO_ID) + return -1; + + const struct macro *macro = macro_set_find (macros, token->string.string); + if (!macro) + return -1; + + struct macro_expander *me = xmalloc (sizeof *me); + *me = (struct macro_expander) { + .macros = macros, + .n_tokens = 1, + .macro = macro, + }; + *mep = me; + + if (!macro->n_params) + return 1; + else + { + me->state = macro->params[0].positional ? ME_ARG : ME_KEYWORD; + me->args = xcalloc (macro->n_params, sizeof *me->args); + me->param = macro->params; + return 0; + } +} + +void +macro_expander_destroy (struct macro_expander *me) +{ + if (!me) + return; + + for (size_t i = 0; i < me->macro->n_params; i++) + if (me->args[i]) + { + macro_tokens_uninit (me->args[i]); + free (me->args[i]); + } + free (me->args); + free (me); +} + +/* Adds TOKEN to the collection of tokens in ME that potentially need to be + macro expanded. + + Returns -1 if the tokens added do not actually invoke a macro. The caller + should consume the first token without expanding it. + + Returns 0 if the macro expander needs more tokens, for macro arguments or to + decide whether this is actually a macro invocation. The caller should call + macro_expander_add() again with the next token. + + Returns a positive number to indicate that the returned number of tokens + invoke a macro. The number returned might be less than the number of tokens + added because it can take a few tokens of lookahead to determine whether the + macro invocation is finished. The caller should call + macro_expander_get_expansion() to obtain the expansion. */ +int +macro_expander_add (struct macro_expander *me, const struct macro_token *mt) +{ + switch (me->state) + { + case ME_ERROR: + return -1; + + case ME_ARG: + return me_add_arg (me, mt); + + case ME_ENCLOSE: + return me_enclose (me, mt); + + case ME_KEYWORD: + return me_keyword (me, mt); + + case ME_EQUALS: + return me_equals (me, mt); + + default: + NOT_REACHED (); + } +} + +/* Each argument to a macro function is one of: + + - A quoted string or other single literal token. + + - An argument to the macro being expanded, e.g. !1 or a named argument. + + - !*. + + - A function invocation. + + Each function invocation yields a character sequence to be turned into a + sequence of tokens. The case where that character sequence is a single + quoted string is an important special case. +*/ +struct parse_macro_function_ctx + { + struct macro_token *input; + size_t n_input; + int nesting_countdown; + const struct macro_set *macros; + const struct macro_expander *me; + bool *expand; + }; + +static void +macro_expand (const struct macro_tokens *, + int nesting_countdown, const struct macro_set *, + const struct macro_expander *, bool *expand, struct macro_tokens *exp); + +static bool +expand_macro_function (struct parse_macro_function_ctx *ctx, + struct macro_token *output, + size_t *input_consumed); + +static size_t +parse_function_arg (struct parse_macro_function_ctx *ctx, + size_t i, struct macro_token *farg) +{ + struct macro_token *tokens = ctx->input; + const struct token *token = &tokens[i].token; + if (token->type == T_MACRO_ID) + { + const struct macro_param *param = macro_find_parameter_by_name ( + ctx->me->macro, token->string); + if (param) + { + size_t param_idx = param - ctx->me->macro->params; + const struct macro_tokens *marg = ctx->me->args[param_idx]; + if (marg->n == 1) + macro_token_copy (farg, &marg->mts[0]); + else + { + struct string s = DS_EMPTY_INITIALIZER; + for (size_t i = 0; i < marg->n; i++) + { + if (i) + ds_put_byte (&s, ' '); + ds_put_substring (&s, marg->mts[i].representation); + } + + struct substring s_copy; + ss_alloc_substring (&s_copy, s.ss); + + *farg = (struct macro_token) { + .token = { .type = T_MACRO_ID, .string = s.ss }, + .representation = s_copy, + }; + } + return 1; + } + + struct parse_macro_function_ctx subctx = { + .input = &ctx->input[i], + .n_input = ctx->n_input - i, + .nesting_countdown = ctx->nesting_countdown, + .macros = ctx->macros, + .me = ctx->me, + .expand = ctx->expand, + }; + size_t subinput_consumed; + if (expand_macro_function (&subctx, farg, &subinput_consumed)) + return subinput_consumed; + } + + macro_token_copy (farg, &tokens[i]); + return 1; +} + +static bool +parse_macro_function (struct parse_macro_function_ctx *ctx, + struct macro_tokens *args, + struct substring function, + int min_args, int max_args, + size_t *input_consumed) +{ + struct macro_token *tokens = ctx->input; + size_t n_tokens = ctx->n_input; + + if (!n_tokens + || tokens[0].token.type != T_MACRO_ID + || !ss_equals_case (tokens[0].token.string, function)) + return false; + + if (n_tokens < 2 || tokens[1].token.type != T_LPAREN) + { + printf ("`(' expected following %s'\n", function.string); + return false; + } + + *args = (struct macro_tokens) { .n = 0 }; + + for (size_t i = 2;; ) + { + if (i >= n_tokens) + goto unexpected_end; + if (tokens[i].token.type == T_RPAREN) + { + *input_consumed = i + 1; + if (args->n < min_args || args->n > max_args) + { + printf ("Wrong number of arguments to %s.\n", function.string); + goto error; + } + return true; + } + + i += parse_function_arg (ctx, i, macro_tokens_add_uninit (args)); + if (i >= n_tokens) + goto unexpected_end; + + if (tokens[i].token.type == T_COMMA) + i++; + else if (tokens[i].token.type != T_RPAREN) + { + printf ("Expecting `,' or `)' in %s invocation.", function.string); + goto error; + } + } + +unexpected_end: + printf ("Missing closing parenthesis in arguments to %s.\n", + function.string); + /* Fall through. */ +error: + macro_tokens_uninit (args); + return false; +} + +static bool +expand_macro_function (struct parse_macro_function_ctx *ctx, + struct macro_token *output, + size_t *input_consumed) +{ + struct macro_tokens args; + + if (parse_macro_function (ctx, &args, ss_cstr ("!length"), 1, 1, + input_consumed)) + { + size_t length = args.mts[0].representation.length; + *output = (struct macro_token) { + .token = { .type = T_POS_NUM, .number = length }, + .representation = ss_cstr (xasprintf ("%zu", length)), + }; + } + else if (parse_macro_function (ctx, &args, ss_cstr ("!blanks"), 1, 1, + input_consumed)) + { + /* XXX this isn't right, it might be a character string containing a + positive integer, e.g. via !CONCAT. */ + if (args.mts[0].token.type != T_POS_NUM) + { + printf ("argument to !BLANKS must be positive integer\n"); + macro_tokens_uninit (&args); + return false; + } + + struct string s = DS_EMPTY_INITIALIZER; + ds_put_byte_multiple (&s, ' ', args.mts[0].token.number); + + struct substring s_copy; + ss_alloc_substring (&s_copy, s.ss); + + *output = (struct macro_token) { + .token = { .type = T_ID, .string = s.ss }, + .representation = s_copy, + }; + } + else if (parse_macro_function (ctx, &args, ss_cstr ("!concat"), 1, INT_MAX, + input_consumed)) + { + struct string s; + bool all_strings = true; + for (size_t i = 0; i < args.n; i++) + { + if (args.mts[i].token.type == T_STRING) + ds_put_substring (&s, args.mts[i].token.string); + else + { + all_strings = false; + ds_put_substring (&s, args.mts[i].representation); + } + } + + if (all_strings) + { + *output = (struct macro_token) { + .token = { .type = T_STRING, .string = s.ss }, + }; + output->representation = ss_cstr (token_to_string (&output->token)); + } + else + { + *output = (struct macro_token) { + .token = { .type = T_MACRO_ID /*XXX*/, .string = s.ss }, + }; + ss_alloc_substring (&output->representation, s.ss); + } + } + else if (parse_macro_function (ctx, &args, ss_cstr ("!quote"), 1, 1, + input_consumed)) + { + if (args.mts[0].token.type == T_STRING) + macro_token_copy (output, &args.mts[0]); + else + { + *output = (struct macro_token) { .token = { .type = T_STRING } }; + ss_alloc_substring (&output->token.string, args.mts[0].representation); + output->representation = ss_cstr (token_to_string (&output->token)); + } + } + else if (parse_macro_function (ctx, &args, ss_cstr ("!unquote"), 1, 1, + input_consumed)) + { + if (args.mts[0].token.type == T_STRING) + { + *output = (struct macro_token) { .token = { .type = T_MACRO_ID } }; + ss_alloc_substring (&output->token.string, args.mts[0].token.string); + output->representation = ss_cstr (token_to_string (&output->token)); + } + else + macro_token_copy (output, &args.mts[0]); + } + else + return false; + + macro_tokens_uninit (&args); + return true; +} + +static void +macro_expand (const struct macro_tokens *mts, + int nesting_countdown, const struct macro_set *macros, + const struct macro_expander *me, bool *expand, + struct macro_tokens *exp) +{ + if (nesting_countdown <= 0) + { + printf ("maximum nesting level exceeded\n"); + for (size_t i = 0; i < mts->n; i++) + macro_tokens_add (exp, &mts->mts[i]); + return; + } + + for (size_t i = 0; i < mts->n; i++) + { + const struct macro_token *mt = &mts->mts[i]; + const struct token *token = &mt->token; + if (token->type == T_MACRO_ID && me) + { + const struct macro_param *param = macro_find_parameter_by_name ( + me->macro, token->string); + if (param) + { + const struct macro_tokens *arg = me->args[param - me->macro->params]; + //macro_tokens_print (arg, stdout); + if (*expand && param->expand_arg) + macro_expand (arg, nesting_countdown, macros, NULL, expand, exp); + else + for (size_t i = 0; i < arg->n; i++) + macro_tokens_add (exp, &arg->mts[i]); + continue; + } + } + + if (*expand) + { + struct macro_expander *subme; + int retval = macro_expander_create (macros, token, &subme); + for (size_t j = 1; !retval; j++) + { + const struct macro_token stop = { .token = { .type = T_STOP } }; + retval = macro_expander_add ( + subme, i + j < mts->n ? &mts->mts[i + j] : &stop); + } + if (retval > 0) + { + i += retval - 1; + macro_expand (&subme->macro->body, nesting_countdown - 1, macros, + subme, expand, exp); + macro_expander_destroy (subme); + continue; + } + + macro_expander_destroy (subme); + } + + if (token->type != T_MACRO_ID) + { + macro_tokens_add (exp, mt); + continue; + } + + /* Maybe each arg should just be a string, either a quoted string or a + non-quoted string containing tokens. */ + struct parse_macro_function_ctx ctx = { + .input = &mts->mts[i], + .n_input = mts->n - i, + .nesting_countdown = nesting_countdown, + .macros = macros, + .me = me, + .expand = expand, + }; + struct macro_token function_output; + size_t function_consumed; + if (expand_macro_function (&ctx, &function_output, &function_consumed)) + { + i += function_consumed - 1; + + if (function_output.token.type == T_MACRO_ID) + macro_tokens_from_string (exp, function_output.token.string, + SEG_MODE_INTERACTIVE /* XXX */); + else + macro_tokens_add (exp, &function_output); + macro_token_uninit (&function_output); + + continue; + } + + if (ss_equals_case (token->string, ss_cstr ("!onexpand"))) + *expand = true; + else if (ss_equals_case (token->string, ss_cstr ("!offexpand"))) + *expand = false; + else + macro_tokens_add (exp, mt); + } +} + +void +macro_expander_get_expansion (struct macro_expander *me, struct macro_tokens *exp) +{ +#if 0 + for (size_t i = 0; i < me->macro->n_params; i++) + { + printf ("%s:\n", me->macro->params[i].name); + macro_tokens_print (me->args[i], stdout); + } +#endif + + bool expand = true; + macro_expand (&me->macro->body, settings_get_mnest (), + me->macros, me, &expand, exp); + +#if 0 + printf ("expansion:\n"); + macro_tokens_print (exp, stdout); +#endif +} + diff --git a/src/language/lexer/macro.h b/src/language/lexer/macro.h new file mode 100644 index 0000000000..23ae1d9a18 --- /dev/null +++ b/src/language/lexer/macro.h @@ -0,0 +1,120 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2021 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef MACRO_H +#define MACRO_H 1 + +#include +#include + +#include "libpspp/hmap.h" +#include "libpspp/str.h" +#include "language/lexer/segment.h" +#include "language/lexer/token.h" + +struct macro_expander; + +struct macro_token + { + struct token token; + struct substring representation; + }; + +void macro_token_copy (struct macro_token *, const struct macro_token *); +void macro_token_uninit (struct macro_token *); + +struct macro_tokens + { + struct macro_token *mts; + size_t n; + size_t allocated; + }; + +void macro_tokens_copy (struct macro_tokens *, const struct macro_tokens *); +void macro_tokens_uninit (struct macro_tokens *); +struct macro_token *macro_tokens_add_uninit (struct macro_tokens *); +void macro_tokens_add (struct macro_tokens *, const struct macro_token *); + +void macro_tokens_from_string (struct macro_tokens *, const struct substring, + enum segmenter_mode); + +void macro_tokens_print (const struct macro_tokens *, FILE *); + +struct macro_param + { + bool positional; /* Is this a positional parameter? */ + char *name; /* "!1" or "!name". */ + struct macro_tokens def; /* Default expansion. */ + bool expand_arg; /* Macro-expand the argument? */ + + enum + { + ARG_N_TOKENS, + ARG_CHAREND, + ARG_ENCLOSE, + ARG_CMDEND + } + arg_type; + union + { + int n_tokens; + struct token charend; + struct token enclose[2]; + }; + }; + +struct macro + { + struct hmap_node hmap_node; /* Indexed by 'name'. */ + char *name; + + struct macro_param *params; + size_t n_params; + + struct macro_tokens body; + }; + +void macro_destroy (struct macro *); + +struct macro_set + { + struct hmap macros; + }; + +struct macro_set *macro_set_create (void); +void macro_set_destroy (struct macro_set *); +const struct macro *macro_set_find (const struct macro_set *, + const char *); +void macro_set_add (struct macro_set *, struct macro *); + +static inline bool +macro_set_is_empty (const struct macro_set *set) +{ + return hmap_is_empty (&set->macros); +} + +/* Macro expansion. */ + +int macro_expander_create (const struct macro_set *, + const struct token *, + struct macro_expander **); +void macro_expander_destroy (struct macro_expander *); + +int macro_expander_add (struct macro_expander *, const struct macro_token *); + +void macro_expander_get_expansion (struct macro_expander *, struct macro_tokens *); + +#endif /* macro.h */ diff --git a/src/language/lexer/scan.c b/src/language/lexer/scan.c index 86ebb7d006..0e29dc9e71 100644 --- a/src/language/lexer/scan.c +++ b/src/language/lexer/scan.c @@ -548,7 +548,7 @@ void scanner_init (struct scanner *scanner, struct token *token) { scanner->state = S_START; - token_init (token); + *token = (struct token) { .type = T_STOP }; } /* Adds the segment with type TYPE and UTF-8 text S to SCANNER. TOKEN must be diff --git a/src/language/lexer/segment.c b/src/language/lexer/segment.c index a4fea0b213..ac88117ff5 100644 --- a/src/language/lexer/segment.c +++ b/src/language/lexer/segment.c @@ -28,6 +28,7 @@ #include "gl/c-ctype.h" #include "gl/c-strcase.h" +#include "gl/verify.h" enum segmenter_state { @@ -54,6 +55,9 @@ enum segmenter_state S_TITLE_2 }; +/* S_SHBANG is the start state that SEGMENTER_INIT refers to as just 0. */ +verify (S_SHBANG == 0); + #define SS_START_OF_LINE (1u << 0) #define SS_START_OF_COMMAND (1u << 1) @@ -1806,9 +1810,7 @@ segment_type_to_string (enum segment_type type) void segmenter_init (struct segmenter *s, enum segmenter_mode mode) { - s->state = S_SHBANG; - s->substate = 0; - s->mode = mode; + *s = (struct segmenter) SEGMENTER_INIT (mode); } /* Returns the mode passed to segmenter_init() for S. */ diff --git a/src/language/lexer/segment.h b/src/language/lexer/segment.h index 02a269bdd2..10551066b0 100644 --- a/src/language/lexer/segment.h +++ b/src/language/lexer/segment.h @@ -117,6 +117,8 @@ struct segmenter unsigned char mode; }; +#define SEGMENTER_INIT(MODE) { .mode = MODE } + void segmenter_init (struct segmenter *, enum segmenter_mode); enum segmenter_mode segmenter_get_mode (const struct segmenter *); diff --git a/src/language/lexer/token.c b/src/language/lexer/token.c index 718f3d07f3..9fa5bbb6ba 100644 --- a/src/language/lexer/token.c +++ b/src/language/lexer/token.c @@ -27,17 +27,17 @@ #include "libpspp/cast.h" #include "libpspp/misc.h" - #include "gl/ftoastr.h" #include "gl/xalloc.h" -/* Initializes TOKEN with an arbitrary type, number 0, and a null string. */ void -token_init (struct token *token) +token_copy (struct token *dst, const struct token *src) { - token->type = 0; - token->number = 0.0; - token->string = ss_empty (); + *dst = (struct token) { + .type = src->type, + .number = src->number, + }; + ss_alloc_substring (&dst->string, src->string); } /* Frees the string that TOKEN contains. */ @@ -45,7 +45,33 @@ void token_uninit (struct token *token) { if (token != NULL) - ss_dealloc (&token->string); + { + ss_dealloc (&token->string); + *token = (struct token) { .type = T_STOP }; + } +} + +bool +token_equal (const struct token *a, const struct token *b) +{ + if (a->type != b->type) + return false; + + switch (a->type) + { + case T_POS_NUM: + case T_NEG_NUM: + return a->number == b->number; + + case T_ID: + case T_MACRO_ID: + case T_MACRO_PUNCT: + case T_STRING: + return ss_equals (a->string, b->string); + + default: + return true; + } } static char * @@ -150,7 +176,7 @@ token_to_string (const struct token *token) return string_representation (token->string); default: - return xstrdup_if_nonnull (token_type_to_name (token->type)); + return xstrdup_if_nonnull (token_type_to_string (token->type)); } } @@ -172,3 +198,41 @@ token_print (const struct token *token, FILE *stream) (int) token->string.length, token->string.string); putc ('\n', stream); } + +void +tokens_copy (struct tokens *dst, const struct tokens *src) +{ + *dst = (struct tokens) { + .tokens = xnmalloc (src->n, sizeof *dst->tokens), + .n = src->n, + .allocated = src->n, + }; + + for (size_t i = 0; i < src->n; i++) + token_copy (&dst->tokens[i], &src->tokens[i]); +} + +void +tokens_uninit (struct tokens *tokens) +{ + for (size_t i = 0; i < tokens->n; i++) + token_uninit (&tokens->tokens[i]); + free (tokens->tokens); +} + +void +tokens_add (struct tokens *tokens, const struct token *t) +{ + if (tokens->allocated >= tokens->n) + tokens->tokens = x2nrealloc (tokens->tokens, &tokens->allocated, + sizeof *tokens->tokens); + + token_copy (&tokens->tokens[tokens->n++], t); +} + +void +tokens_print (const struct tokens *tokens, FILE *stream) +{ + for (size_t i = 0; i < tokens->n; i++) + token_print (&tokens->tokens[i], stream); +} diff --git a/src/language/lexer/token.h b/src/language/lexer/token.h index cab1a8cf9c..b334edfef6 100644 --- a/src/language/lexer/token.h +++ b/src/language/lexer/token.h @@ -17,6 +17,7 @@ #ifndef TOKEN_H #define TOKEN_H 1 +#include #include #include "libpspp/str.h" #include "data/identifier.h" @@ -32,14 +33,26 @@ struct token struct substring string; }; -#define TOKEN_INITIALIZER(TYPE, NUMBER, STRING) \ - { TYPE, NUMBER, SS_LITERAL_INITIALIZER (STRING) } - -void token_init (struct token *); +void token_copy (struct token *, const struct token *); void token_uninit (struct token *); +bool token_equal (const struct token *, const struct token *); + char *token_to_string (const struct token *); void token_print (const struct token *, FILE *); + +struct tokens + { + struct token *tokens; + size_t n; + size_t allocated; + }; + +void tokens_copy (struct tokens *, const struct tokens *); +void tokens_uninit (struct tokens *); +void tokens_add (struct tokens *, const struct token *); + +void tokens_print (const struct tokens *, FILE *); #endif /* token.h */ diff --git a/tests/automake.mk b/tests/automake.mk index ec81e52881..4de61417b2 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -339,6 +339,7 @@ TESTSUITE_AT = \ tests/data/sys-file.at \ tests/data/encrypted-file.at \ tests/language/command.at \ + tests/language/control/define.at \ tests/language/control/do-if.at \ tests/language/control/do-repeat.at \ tests/language/control/loop.at \ diff --git a/tests/language/control/define.at b/tests/language/control/define.at new file mode 100644 index 0000000000..d187b046d5 --- /dev/null +++ b/tests/language/control/define.at @@ -0,0 +1,26 @@ +dnl PSPP - a program for statistical analysis. +dnl Copyright (C) 2017 Free Software Foundation, Inc. +dnl +dnl This program is free software: you can redistribute it and/or modify +dnl it under the terms of the GNU General Public License as published by +dnl the Free Software Foundation, either version 3 of the License, or +dnl (at your option) any later version. +dnl +dnl This program is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +dnl GNU General Public License for more details. +dnl +dnl You should have received a copy of the GNU General Public License +dnl along with this program. If not, see . +dnl +AT_BANNER([DEFINE]) + +AT_SETUP([DEFINE]) +AT_DATA([define.sps], [dnl +DEFINE !variables() + brand model license color +!ENDDEFINE. +]) +AT_CHECK([pspp define.sps]) +AT_CLEANUP diff --git a/tests/language/lexer/segment-test.c b/tests/language/lexer/segment-test.c index a3b67b89b2..cb46401b34 100644 --- a/tests/language/lexer/segment-test.c +++ b/tests/language/lexer/segment-test.c @@ -108,8 +108,7 @@ main (int argc, char *argv[]) static void check_segmentation (const char *input, size_t length, bool print_segments) { - struct segmenter s; - segmenter_init (&s, mode); + struct segmenter s = SEGMENTER_INIT (mode); size_t line_number = 1; size_t line_offset = 0;