From dde9b7b4e92fd1221de01e429343ea72ae444e33 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 23 Mar 2021 07:14:48 -0700 Subject: [PATCH] DEFINE command can now be parsed. --- src/language/command.def | 2 +- src/language/control/automake.mk | 1 + src/language/control/define.c | 292 +++++++++++++++++++++++++++++++ src/language/lexer/lexer.c | 6 +- src/language/lexer/scan.c | 2 +- src/language/lexer/token.c | 17 +- src/language/lexer/token.h | 5 +- tests/automake.mk | 1 + tests/language/control/define.at | 26 +++ 9 files changed, 335 insertions(+), 17 deletions(-) create mode 100644 src/language/control/define.c create mode 100644 tests/language/control/define.at diff --git a/src/language/command.def b/src/language/command.def index a97f9b83e7..12f30c7c03 100644 --- a/src/language/command.def +++ b/src/language/command.def @@ -18,6 +18,7 @@ DEF_CMD (S_ANY, F_ENHANCED, "CLOSE FILE HANDLE", cmd_close_file_handle) DEF_CMD (S_ANY, 0, "CACHE", cmd_cache) DEF_CMD (S_ANY, 0, "CD", cmd_cd) +DEF_CMD (S_ANY, 0, "DEFINE", cmd_define) DEF_CMD (S_ANY, 0, "DO REPEAT", cmd_do_repeat) DEF_CMD (S_ANY, 0, "END REPEAT", cmd_end_repeat) DEF_CMD (S_ANY, 0, "ECHO", cmd_echo) @@ -188,7 +189,6 @@ UNIMPL_CMD ("CSTABULATE", "Tabulate complex samples") UNIMPL_CMD ("CTABLES", "Display complex samples") UNIMPL_CMD ("CURVEFIT", "Fit curve to line plot") UNIMPL_CMD ("DATE", "Create time series data") -UNIMPL_CMD ("DEFINE", "Syntax macros") UNIMPL_CMD ("DETECTANOMALY", "Find unusual cases") UNIMPL_CMD ("DISCRIMINANT", "Linear discriminant analysis") UNIMPL_CMD ("EDIT", "obsolete") diff --git a/src/language/control/automake.mk b/src/language/control/automake.mk index 909acd13db..9d09687c81 100644 --- a/src/language/control/automake.mk +++ b/src/language/control/automake.mk @@ -20,6 +20,7 @@ language_control_sources = \ src/language/control/control-stack.c \ src/language/control/control-stack.h \ + src/language/control/define.c \ src/language/control/do-if.c \ src/language/control/loop.c \ src/language/control/repeat.c \ diff --git a/src/language/control/define.c b/src/language/control/define.c new file mode 100644 index 0000000000..b718ae4420 --- /dev/null +++ b/src/language/control/define.c @@ -0,0 +1,292 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2021 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include + +#include "language/command.h" +#include "language/lexer/lexer.h" +#include "language/lexer/scan.h" +#include "language/lexer/token.h" + +#include "gl/xalloc.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) + +static bool +force_macro_id (struct lexer *lexer) +{ + return lex_token (lexer) == T_MACRO_ID || lex_force_id (lexer); +} + +static bool +match_macro_id (struct lexer *lexer, const char *id) +{ + if (id[0] != '!') + return lex_match_id (lexer, id); + else if (lex_token (lexer) == T_MACRO_ID + && ss_equals_case (lex_tokss (lexer), ss_cstr (id))) + { + lex_get (lexer); + return true; + } + else + return false; +} + +struct tokens + { + struct token *tokens; + size_t n; + }; + +struct macro_param + { + char *name; /* NULL for a positional parameter. */ + struct tokens def; /* Default expansion. */ + bool expand_arg; /* Macro-expand the argument? */ + + enum + { + ARG_N_TOKENS, + ARG_CHAREND, + ARG_ENCLOSE, + ARG_CMDEND + } + arg_type; + union + { + int n_tokens; + struct token charend; + struct token enclose[2]; + }; + }; + +struct macro + { + char *name; + + struct macro_param *params; + size_t n_params; + + char **body; + size_t n_body; + }; + +static void macro_destroy (struct macro *); + +static bool +parse_quoted_token (struct lexer *lexer, struct token *token) +{ + if (!lex_force_string (lexer)) + return false; + + struct substring s = lex_tokss (lexer); + struct string_lexer slex; + string_lexer_init (&slex, s.string, s.length, SEG_MODE_INTERACTIVE); + struct token another_token; + if (!string_lexer_next (&slex, token) + || string_lexer_next (&slex, &another_token)) + { + token_destroy (token); + token_destroy (&another_token); + lex_error (lexer, _("String must contain exactly one token.")); + return false; + } + return true; +} + +int +cmd_define (struct lexer *lexer, struct dataset *ds UNUSED) +{ + if (!force_macro_id (lexer)) + return CMD_FAILURE; + + /* Parse macro name. */ + struct macro *m = xmalloc (sizeof *m); + *m = (struct macro) { .name = ss_xstrdup (lex_tokss (lexer)) }; + lex_get (lexer); + + if (!lex_force_match (lexer, T_LPAREN)) + goto error; + + size_t allocated_params = 0; + while (!lex_match (lexer, T_RPAREN)) + { + if (m->n_params >= allocated_params) + m->params = x2nrealloc (m->params, &allocated_params, + sizeof *m->params); + + struct macro_param *p = &m->params[m->n_params++]; + *p = (struct macro_param) { .expand_arg = true }; + + /* Parse parameter name. */ + if (match_macro_id (lexer, "!POSITIONAL")) + p->name = NULL; + else + { + if (!lex_force_id (lexer) || !lex_force_match (lexer, T_EQUALS)) + goto error; + + p->name = ss_xstrdup (lex_tokss (lexer)); + lex_get (lexer); + } + + /* Parse default value. */ + if (match_macro_id (lexer, "!DEFAULT")) + { + if (!lex_force_match (lexer, T_LPAREN)) + goto error; + + size_t allocated_tokens = 0; + /* XXX Should this handle balanced inner parentheses? */ + while (!lex_match (lexer, T_RPAREN)) + { + if (lex_token (lexer) == T_ENDCMD) + { + lex_error_expecting (lexer, ")"); + goto error; + } + if (allocated_tokens >= p->def.n) + p->def.tokens = x2nrealloc (p->def.tokens, &allocated_tokens, + sizeof *p->def.tokens); + + struct token *token = &p->def.tokens[p->def.n++]; + token_copy (token, lex_next (lexer, 0)); + lex_get (lexer); + } + } + + if (match_macro_id (lexer, "!NOEXPAND")) + p->expand_arg = false; + + if (match_macro_id (lexer, "!TOKENS")) + { + if (!lex_force_match (lexer, T_LPAREN) + || !lex_force_int_range (lexer, "!TOKENS", 1, INT_MAX)) + goto error; + p->arg_type = ARG_N_TOKENS; + p->n_tokens = lex_integer (lexer); + lex_get (lexer); + if (!lex_force_match (lexer, T_RPAREN)) + goto error; + } + else if (match_macro_id (lexer, "!CHAREND")) + { + p->arg_type = ARG_CHAREND; + p->charend = (struct token) { .type = T_STOP }; + + if (!lex_force_match (lexer, T_LPAREN) + || !parse_quoted_token (lexer, &p->charend) + || !lex_force_match (lexer, T_RPAREN)) + goto error; + } + else if (match_macro_id (lexer, "!ENCLOSE")) + { + p->arg_type = ARG_ENCLOSE; + p->enclose[0] = p->enclose[1] = (struct token) { .type = T_STOP }; + + if (!lex_force_match (lexer, T_LPAREN) + || !parse_quoted_token (lexer, &p->enclose[0]) + || !lex_force_match (lexer, T_COMMA) + || !parse_quoted_token (lexer, &p->enclose[1]) + || !lex_force_match (lexer, T_RPAREN)) + goto error; + } + else if (match_macro_id (lexer, "!CMDEND")) + p->arg_type = ARG_CMDEND; + else + { + lex_error_expecting (lexer, "!TOKENS", "!CHAREND", + "!ENCLOSE", "!CMDEND"); + goto error; + } + + if (lex_token (lexer) != T_RPAREN && !lex_force_match (lexer, T_SLASH)) + goto error; + } + + size_t allocated_body = 0; + while (!match_macro_id (lexer, "!ENDDEFINE")) + { + if (lex_token (lexer) != T_STRING) + { + lex_error (lexer, _("Expecting macro body or !ENDDEFINE")); + goto error; + } + + if (allocated_body >= m->n_body) + m->body = x2nrealloc (m->body, &allocated_body, sizeof *m->body); + m->body[m->n_body] = ss_xstrdup (lex_tokss (lexer)); + lex_get (lexer); + } + + return CMD_SUCCESS; + +error: + macro_destroy (m); + return CMD_FAILURE; +} + +static void +tokens_uninit (struct tokens *tokens) +{ + for (size_t i = 0; i < tokens->n; i++) + token_destroy (&tokens->tokens[i]); + free (tokens->tokens); +} + +static void +macro_destroy (struct macro *m) +{ + if (!m) + return; + + free (m->name); + for (size_t i = 0; i < m->n_params; i++) + { + struct macro_param *p = &m->params[i]; + free (p->name); + + tokens_uninit (&p->def); + + switch (p->arg_type) + { + case ARG_N_TOKENS: + break; + + case ARG_CHAREND: + token_destroy (&p->charend); + break; + + case ARG_ENCLOSE: + token_destroy (&p->enclose[0]); + token_destroy (&p->enclose[1]); + break; + + case ARG_CMDEND: + break; + } + } + free (m->params); + for (size_t i = 0; i < m->n_body; i++) + free (m->body[i]); + free (m->body); + free (m); +} + diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c index 7f2d0290a6..1283c1b784 100644 --- a/src/language/lexer/lexer.c +++ b/src/language/lexer/lexer.c @@ -196,7 +196,7 @@ lex_push_token__ (struct lex_source *src) src->tokens = deque_expand (&src->deque, src->tokens, sizeof *src->tokens); token = &src->tokens[deque_push_front (&src->deque)]; - token_init (&token->token); + token->token = (struct token) { .type = T_STOP }; return token; } @@ -859,9 +859,7 @@ lex_next__ (const struct lexer *lexer_, int n) return lex_source_next__ (src, n); else { - static const struct lex_token stop_token = - { TOKEN_INITIALIZER (T_STOP, 0.0, ""), 0, 0, 0, 0 }; - + static const struct lex_token stop_token = { .token = { .type = T_STOP } }; return &stop_token; } } diff --git a/src/language/lexer/scan.c b/src/language/lexer/scan.c index 86ebb7d006..0e29dc9e71 100644 --- a/src/language/lexer/scan.c +++ b/src/language/lexer/scan.c @@ -548,7 +548,7 @@ void scanner_init (struct scanner *scanner, struct token *token) { scanner->state = S_START; - token_init (token); + *token = (struct token) { .type = T_STOP }; } /* Adds the segment with type TYPE and UTF-8 text S to SCANNER. TOKEN must be diff --git a/src/language/lexer/token.c b/src/language/lexer/token.c index 718f3d07f3..69fd48fb62 100644 --- a/src/language/lexer/token.c +++ b/src/language/lexer/token.c @@ -27,17 +27,17 @@ #include "libpspp/cast.h" #include "libpspp/misc.h" - #include "gl/ftoastr.h" #include "gl/xalloc.h" -/* Initializes TOKEN with an arbitrary type, number 0, and a null string. */ void -token_init (struct token *token) +token_copy (struct token *dst, const struct token *src) { - token->type = 0; - token->number = 0.0; - token->string = ss_empty (); + *dst = (struct token) { + .type = src->type, + .number = src->number, + }; + ss_alloc_substring (&dst->string, src->string); } /* Frees the string that TOKEN contains. */ @@ -45,7 +45,10 @@ void token_uninit (struct token *token) { if (token != NULL) - ss_dealloc (&token->string); + { + ss_dealloc (&token->string); + *token = (struct token) { .type = T_STOP }; + } } static char * diff --git a/src/language/lexer/token.h b/src/language/lexer/token.h index cab1a8cf9c..f4614c0e26 100644 --- a/src/language/lexer/token.h +++ b/src/language/lexer/token.h @@ -32,10 +32,7 @@ struct token struct substring string; }; -#define TOKEN_INITIALIZER(TYPE, NUMBER, STRING) \ - { TYPE, NUMBER, SS_LITERAL_INITIALIZER (STRING) } - -void token_init (struct token *); +void token_copy (struct token *, const struct token *); void token_uninit (struct token *); char *token_to_string (const struct token *); diff --git a/tests/automake.mk b/tests/automake.mk index ec81e52881..4de61417b2 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -339,6 +339,7 @@ TESTSUITE_AT = \ tests/data/sys-file.at \ tests/data/encrypted-file.at \ tests/language/command.at \ + tests/language/control/define.at \ tests/language/control/do-if.at \ tests/language/control/do-repeat.at \ tests/language/control/loop.at \ diff --git a/tests/language/control/define.at b/tests/language/control/define.at new file mode 100644 index 0000000000..d187b046d5 --- /dev/null +++ b/tests/language/control/define.at @@ -0,0 +1,26 @@ +dnl PSPP - a program for statistical analysis. +dnl Copyright (C) 2017 Free Software Foundation, Inc. +dnl +dnl This program is free software: you can redistribute it and/or modify +dnl it under the terms of the GNU General Public License as published by +dnl the Free Software Foundation, either version 3 of the License, or +dnl (at your option) any later version. +dnl +dnl This program is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +dnl GNU General Public License for more details. +dnl +dnl You should have received a copy of the GNU General Public License +dnl along with this program. If not, see . +dnl +AT_BANNER([DEFINE]) + +AT_SETUP([DEFINE]) +AT_DATA([define.sps], [dnl +DEFINE !variables() + brand model license color +!ENDDEFINE. +]) +AT_CHECK([pspp define.sps]) +AT_CLEANUP -- 2.30.2