DEF_CMD (S_ANY, F_ENHANCED, "CLOSE FILE HANDLE", cmd_close_file_handle)
DEF_CMD (S_ANY, 0, "CACHE", cmd_cache)
DEF_CMD (S_ANY, 0, "CD", cmd_cd)
+DEF_CMD (S_ANY, 0, "DEFINE", cmd_define)
DEF_CMD (S_ANY, 0, "DO REPEAT", cmd_do_repeat)
DEF_CMD (S_ANY, 0, "END REPEAT", cmd_end_repeat)
DEF_CMD (S_ANY, 0, "ECHO", cmd_echo)
UNIMPL_CMD ("CTABLES", "Display complex samples")
UNIMPL_CMD ("CURVEFIT", "Fit curve to line plot")
UNIMPL_CMD ("DATE", "Create time series data")
-UNIMPL_CMD ("DEFINE", "Syntax macros")
UNIMPL_CMD ("DETECTANOMALY", "Find unusual cases")
UNIMPL_CMD ("DISCRIMINANT", "Linear discriminant analysis")
UNIMPL_CMD ("EDIT", "obsolete")
language_control_sources = \
src/language/control/control-stack.c \
src/language/control/control-stack.h \
+ src/language/control/define.c \
src/language/control/do-if.c \
src/language/control/loop.c \
src/language/control/repeat.c \
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include <limits.h>
+
+#include "language/command.h"
+#include "language/lexer/lexer.h"
+#include "language/lexer/macro.h"
+#include "language/lexer/scan.h"
+#include "language/lexer/token.h"
+
+#include "gl/xalloc.h"
+
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+
+static bool
+force_macro_id (struct lexer *lexer)
+{
+ return lex_token (lexer) == T_MACRO_ID || lex_force_id (lexer);
+}
+
+static bool
+match_macro_id (struct lexer *lexer, const char *id)
+{
+ if (id[0] != '!')
+ return lex_match_id (lexer, id);
+ else if (lex_token (lexer) == T_MACRO_ID
+ && ss_equals_case (lex_tokss (lexer), ss_cstr (id)))
+ {
+ lex_get (lexer);
+ return true;
+ }
+ else
+ return false;
+}
+
+static bool
+parse_quoted_token (struct lexer *lexer, struct token *token)
+{
+ if (!lex_force_string (lexer))
+ return false;
+
+ struct substring s = lex_tokss (lexer);
+ struct string_lexer slex;
+ string_lexer_init (&slex, s.string, s.length, SEG_MODE_INTERACTIVE);
+ struct token another_token;
+ if (!string_lexer_next (&slex, token)
+ || string_lexer_next (&slex, &another_token))
+ {
+ token_uninit (token);
+ token_uninit (&another_token);
+ lex_error (lexer, _("String must contain exactly one token."));
+ return false;
+ }
+ lex_get (lexer);
+ return true;
+}
+
+int
+cmd_define (struct lexer *lexer, struct dataset *ds UNUSED)
+{
+ if (!force_macro_id (lexer))
+ return CMD_FAILURE;
+
+ /* Parse macro name. */
+ struct macro *m = xmalloc (sizeof *m);
+ *m = (struct macro) { .name = ss_xstrdup (lex_tokss (lexer)) };
+ lex_get (lexer);
+
+ if (!lex_force_match (lexer, T_LPAREN))
+ goto error;
+
+ size_t allocated_params = 0;
+ while (!lex_match (lexer, T_RPAREN))
+ {
+ if (m->n_params >= allocated_params)
+ m->params = x2nrealloc (m->params, &allocated_params,
+ sizeof *m->params);
+
+ size_t param_index = m->n_params++;
+ struct macro_param *p = &m->params[param_index];
+ *p = (struct macro_param) { .expand_arg = true };
+
+ /* Parse parameter name. */
+ if (match_macro_id (lexer, "!POSITIONAL"))
+ {
+ if (param_index > 0 && !m->params[param_index - 1].positional)
+ {
+ lex_error (lexer, _("Positional parameters must precede "
+ "keyword parameters."));
+ goto error;
+ }
+
+ p->positional = true;
+ p->name = xasprintf ("!%zu", param_index + 1);
+ }
+ else
+ {
+ if (!lex_force_id (lexer))
+ goto error;
+
+ p->positional = false;
+ p->name = xasprintf ("!%s", lex_tokcstr (lexer));
+ lex_get (lexer);
+
+ if (!lex_force_match (lexer, T_EQUALS))
+ goto error;
+ }
+
+ /* Parse default value. */
+ if (match_macro_id (lexer, "!DEFAULT"))
+ {
+ if (!lex_force_match (lexer, T_LPAREN))
+ goto error;
+
+ /* XXX Should this handle balanced inner parentheses? */
+ while (!lex_match (lexer, T_RPAREN))
+ {
+ if (lex_token (lexer) == T_ENDCMD)
+ {
+ lex_error_expecting (lexer, ")");
+ goto error;
+ }
+ const struct macro_token mt = {
+ .token = *lex_next (lexer, 0),
+ .representation = lex_next_representation (lexer, 0, 0),
+ };
+ macro_tokens_add (&p->def, &mt);
+ lex_get (lexer);
+ }
+ }
+
+ if (match_macro_id (lexer, "!NOEXPAND"))
+ p->expand_arg = false;
+
+ if (match_macro_id (lexer, "!TOKENS"))
+ {
+ if (!lex_force_match (lexer, T_LPAREN)
+ || !lex_force_int_range (lexer, "!TOKENS", 1, INT_MAX))
+ goto error;
+ p->arg_type = ARG_N_TOKENS;
+ p->n_tokens = lex_integer (lexer);
+ lex_get (lexer);
+ if (!lex_force_match (lexer, T_RPAREN))
+ goto error;
+ }
+ else if (match_macro_id (lexer, "!CHAREND"))
+ {
+ p->arg_type = ARG_CHAREND;
+ p->charend = (struct token) { .type = T_STOP };
+
+ if (!lex_force_match (lexer, T_LPAREN)
+ || !parse_quoted_token (lexer, &p->charend)
+ || !lex_force_match (lexer, T_RPAREN))
+ goto error;
+ }
+ else if (match_macro_id (lexer, "!ENCLOSE"))
+ {
+ p->arg_type = ARG_ENCLOSE;
+ p->enclose[0] = p->enclose[1] = (struct token) { .type = T_STOP };
+
+ if (!lex_force_match (lexer, T_LPAREN)
+ || !parse_quoted_token (lexer, &p->enclose[0])
+ || !lex_force_match (lexer, T_COMMA)
+ || !parse_quoted_token (lexer, &p->enclose[1])
+ || !lex_force_match (lexer, T_RPAREN))
+ goto error;
+ }
+ else if (match_macro_id (lexer, "!CMDEND"))
+ p->arg_type = ARG_CMDEND;
+ else
+ {
+ lex_error_expecting (lexer, "!TOKENS", "!CHAREND",
+ "!ENCLOSE", "!CMDEND");
+ goto error;
+ }
+
+ if (lex_token (lexer) != T_RPAREN && !lex_force_match (lexer, T_SLASH))
+ goto error;
+ }
+
+ struct string body = DS_EMPTY_INITIALIZER;
+ while (!match_macro_id (lexer, "!ENDDEFINE"))
+ {
+ if (lex_token (lexer) != T_STRING)
+ {
+ lex_error (lexer, _("Expecting macro body or !ENDDEFINE"));
+ ds_destroy (&body);
+ goto error;
+ }
+
+ ds_put_substring (&body, lex_tokss (lexer));
+ ds_put_byte (&body, '\n');
+ lex_get (lexer);
+ }
+
+ macro_tokens_from_string (&m->body, body.ss, lex_get_syntax_mode (lexer));
+ ds_destroy (&body);
+
+ lex_define_macro (lexer, m);
+
+ return CMD_SUCCESS;
+
+error:
+ macro_destroy (m);
+ return CMD_FAILURE;
+}
struct hmap *dummies,
struct string *outputs, size_t n_outputs)
{
- struct segmenter segmenter;
-
- segmenter_init (&segmenter, mode);
-
+ struct segmenter segmenter = SEGMENTER_INIT (mode);
while (!ss_is_empty (s))
{
enum segment_type type;
src/language/lexer/include-path.h \
src/language/lexer/lexer.c \
src/language/lexer/lexer.h \
+ src/language/lexer/macro.c \
+ src/language/lexer/macro.h \
src/language/lexer/format-parser.c \
src/language/lexer/format-parser.h \
src/language/lexer/scan.c \
#include <uniwidth.h>
#include "language/command.h"
+#include "language/lexer/macro.h"
#include "language/lexer/scan.h"
#include "language/lexer/segment.h"
#include "language/lexer/token.h"
size_t token_len; /* Length of source for token in bytes. */
size_t line_pos; /* Start of line containing token_pos. */
int first_line; /* Line number at token_pos. */
+ bool from_macro;
};
/* A source of tokens, corresponding to a syntax file.
{
struct ll ll; /* In lexer's list of sources. */
struct lex_reader *reader;
+ struct lexer *lexer;
struct segmenter segmenter;
bool eof; /* True if T_STOP was read from 'reader'. */
struct lex_token *tokens; /* Lookahead tokens for parser. */
};
-static struct lex_source *lex_source_create (struct lex_reader *);
+static struct lex_source *lex_source_create (struct lexer *,
+ struct lex_reader *);
static void lex_source_destroy (struct lex_source *);
/* Lexer. */
struct lexer
{
struct ll_list sources; /* Contains "struct lex_source"s. */
+ struct macro_set *macros;
};
static struct lex_source *lex_source__ (const struct lexer *);
+static struct substring lex_source_get_syntax__ (const struct lex_source *,
+ int n0, int n1);
static const struct lex_token *lex_next__ (const struct lexer *, int n);
static void lex_source_push_endcmd__ (struct lex_source *);
static void lex_source_pop__ (struct lex_source *);
-static bool lex_source_get__ (const struct lex_source *);
+static bool lex_source_get (const struct lex_source *);
static void lex_source_error_valist (struct lex_source *, int n0, int n1,
const char *format, va_list)
PRINTF_FORMAT (4, 0);
struct lexer *
lex_create (void)
{
- struct lexer *lexer = xzalloc (sizeof *lexer);
- ll_init (&lexer->sources);
+ struct lexer *lexer = xmalloc (sizeof *lexer);
+ *lexer = (struct lexer) {
+ .sources = LL_INITIALIZER (lexer->sources),
+ .macros = macro_set_create (),
+ };
return lexer;
}
ll_for_each_safe (source, next, struct lex_source, ll, &lexer->sources)
lex_source_destroy (source);
+ macro_set_destroy (lexer->macros);
free (lexer);
}
}
+/* Adds M to LEXER's set of macros. M replaces any existing macro with the
+ same name. Takes ownership of M. */
+void
+lex_define_macro (struct lexer *lexer, struct macro *m)
+{
+ macro_set_add (lexer->macros, m);
+}
+
/* Inserts READER into LEXER so that the next token read by LEXER comes from
READER. Before the caller, LEXER must either be empty or at a T_ENDCMD
token. */
lex_include (struct lexer *lexer, struct lex_reader *reader)
{
assert (ll_is_empty (&lexer->sources) || lex_token (lexer) == T_ENDCMD);
- ll_push_head (&lexer->sources, &lex_source_create (reader)->ll);
+ ll_push_head (&lexer->sources, &lex_source_create (lexer, reader)->ll);
}
/* Appends READER to LEXER, so that it will be read after all other current
void
lex_append (struct lexer *lexer, struct lex_reader *reader)
{
- ll_push_tail (&lexer->sources, &lex_source_create (reader)->ll);
+ ll_push_tail (&lexer->sources, &lex_source_create (lexer, reader)->ll);
}
\f
/* Advancing. */
src->tokens = deque_expand (&src->deque, src->tokens, sizeof *src->tokens);
token = &src->tokens[deque_push_front (&src->deque)];
- token_init (&token->token);
+ token->token = (struct token) { .type = T_STOP };
return token;
}
lex_source_pop__ (src);
while (deque_is_empty (&src->deque))
- if (!lex_source_get__ (src))
+ if (!lex_source_get (src))
{
lex_source_destroy (src);
src = lex_source__ (lexer);
return lex_source_next__ (src, n);
else
{
- static const struct lex_token stop_token =
- { TOKEN_INITIALIZER (T_STOP, 0.0, ""), 0, 0, 0, 0 };
-
+ static const struct lex_token stop_token = { .token = { .type = T_STOP } };
return &stop_token;
}
}
+static const struct lex_token *
+lex_source_front (const struct lex_source *src)
+{
+ return &src->tokens[deque_front (&src->deque, 0)];
+}
+
static const struct lex_token *
lex_source_next__ (const struct lex_source *src, int n)
{
{
if (!deque_is_empty (&src->deque))
{
- struct lex_token *front;
-
- front = &src->tokens[deque_front (&src->deque, 0)];
+ const struct lex_token *front = lex_source_front (src);
if (front->token.type == T_STOP || front->token.type == T_ENDCMD)
return front;
}
- lex_source_get__ (src);
+ lex_source_get (src);
}
return &src->tokens[deque_back (&src->deque, n)];
return lex_next (lexer, n)->string;
}
+struct substring
+lex_next_representation (const struct lexer *lexer, int n0, int n1)
+{
+ return lex_source_get_syntax__ (lex_source__ (lexer), n0, n1);
+}
+
static bool
lex_tokens_match (const struct token *actual, const struct token *expected)
{
return src == NULL ? NULL : src->reader->encoding;
}
-
/* Returns the syntax mode for the syntax file from which the current drawn is
drawn. Returns SEG_MODE_AUTO for a T_STOP token or if the command's source
does not have line numbers.
}
static struct substring
-lex_source_get_syntax__ (const struct lex_source *src, int n0, int n1)
+lex_tokens_get_syntax__ (const struct lex_source *src,
+ const struct lex_token *token0,
+ const struct lex_token *token1)
{
- const struct lex_token *token0 = lex_source_next__ (src, n0);
- const struct lex_token *token1 = lex_source_next__ (src, MAX (n0, n1));
size_t start = token0->token_pos;
size_t end = token1->token_pos + token1->token_len;
return ss_buffer (&src->buffer[start - src->tail], end - start);
}
+static struct substring
+lex_source_get_syntax__ (const struct lex_source *src, int n0, int n1)
+{
+ return lex_tokens_get_syntax__ (src,
+ lex_source_next__ (src, n0),
+ lex_source_next__ (src, MAX (n0, n1)));
+}
+
static void
lex_ellipsize__ (struct substring in, char *out, size_t out_size)
{
token = lex_source_next__ (src, n0);
if (token->token.type == T_ENDCMD)
ds_put_cstr (&s, _("Syntax error at end of command"));
+ else if (token->from_macro)
+ {
+ /* XXX this isn't ideal, we should get the actual syntax */
+ char *syntax = token_to_string (&token->token);
+ if (syntax)
+ ds_put_format (&s, _("Syntax error at `%s'"), syntax);
+ else
+ ds_put_cstr (&s, _("Syntax error"));
+ free (syntax);
+ }
else
{
struct substring syntax = lex_source_get_syntax__ (src, n0, n1);
}
/* Attempts to append an additional token into SRC's deque, reading more from
- the underlying lex_reader if necessary. Returns true if successful, false
- if the deque already represents (a suffix of) the whole lex_reader's
- contents, */
+ the underlying lex_reader if necessary. Returns true if a new token was
+ added to SRC's deque, false otherwise. */
static bool
-lex_source_get__ (const struct lex_source *src_)
+lex_source_try_get (struct lex_source *src)
{
- struct lex_source *src = CONST_CAST (struct lex_source *, src_);
- if (src->eof)
- return false;
-
/* State maintained while scanning tokens. Usually we only need a single
state, but scanner_push() can return SCAN_SAVE to indicate that the state
needs to be saved and possibly restored later with SCAN_BACK. */
switch (token->token.type)
{
default:
- break;
+ return true;
case T_STOP:
token->token.type = T_ENDCMD;
src->eof = true;
- break;
+ return true;
case SCAN_BAD_HEX_LENGTH:
lex_get_error (src, _("String of hex digits has %d characters, which "
"is not a multiple of 2"),
(int) token->token.number);
- break;
+ return false;
case SCAN_BAD_HEX_DIGIT:
case SCAN_BAD_UNICODE_DIGIT:
lex_get_error (src, _("`%c' is not a valid hex digit"),
(int) token->token.number);
- break;
+ return false;
case SCAN_BAD_UNICODE_LENGTH:
lex_get_error (src, _("Unicode string contains %d bytes, which is "
"not in the valid range of 1 to 8 bytes"),
(int) token->token.number);
- break;
+ return false;
case SCAN_BAD_UNICODE_CODE_POINT:
lex_get_error (src, _("U+%04X is not a valid Unicode code point"),
(int) token->token.number);
- break;
+ return false;
case SCAN_EXPECTED_QUOTE:
lex_get_error (src, _("Unterminated string constant"));
- break;
+ return false;
case SCAN_EXPECTED_EXPONENT:
lex_get_error (src, _("Missing exponent following `%s'"),
token->token.string.string);
- break;
+ return false;
case SCAN_UNEXPECTED_CHAR:
{
char c_name[16];
lex_get_error (src, _("Bad character %s in input"),
uc_name (token->token.number, c_name));
+ return false;
}
- break;
case SCAN_SKIP:
lex_source_pop_front (src);
- break;
+ return false;
+ }
+
+ NOT_REACHED ();
+}
+
+static bool
+lex_source_get__ (struct lex_source *src)
+{
+ for (;;)
+ {
+ if (src->eof)
+ return false;
+ else if (lex_source_try_get (src))
+ return true;
+ }
+}
+
+static bool
+lex_source_get (const struct lex_source *src_)
+{
+ struct lex_source *src = CONST_CAST (struct lex_source *, src_);
+
+ size_t old_count = deque_count (&src->deque);
+ if (!lex_source_get__ (src))
+ return false;
+
+ if (!settings_get_mexpand ())
+ return true;
+
+ struct macro_expander *me;
+ int retval = macro_expander_create (src->lexer->macros,
+ &lex_source_front (src)->token,
+ &me);
+ while (!retval)
+ {
+ if (!lex_source_get__ (src))
+ {
+ /* This should not be reachable because we always get a T_STOP at the
+ end of input and the macro_expander should always terminate
+ expansion on T_STOP. */
+ NOT_REACHED ();
+ }
+
+ const struct lex_token *front = lex_source_front (src);
+ const struct macro_token mt = {
+ .token = front->token,
+ .representation = lex_tokens_get_syntax__ (src, front, front)
+ };
+ retval = macro_expander_add (me, &mt);
+ }
+ if (retval < 0)
+ {
+ /* XXX handle case where there's a macro invocation starting from some
+ later token we've already obtained */
+ macro_expander_destroy (me);
+ return true;
+ }
+
+ /* XXX handle case where the macro invocation doesn't use all the tokens */
+ while (deque_count (&src->deque) > old_count)
+ lex_source_pop_front (src);
+
+ struct macro_tokens expansion = { .n = 0 };
+ macro_expander_get_expansion (me, &expansion);
+ macro_expander_destroy (me);
+
+ for (size_t i = 0; i < expansion.n; i++)
+ {
+ *lex_push_token__ (src) = (struct lex_token) {
+ .token = expansion.mts[i].token,
+ .from_macro = true,
+ /* XXX the rest */
+ };
+
+ ss_dealloc (&expansion.mts[i].representation); /* XXX should feed into lexer */
}
+ free (expansion.mts);
return true;
}
}
static struct lex_source *
-lex_source_create (struct lex_reader *reader)
+lex_source_create (struct lexer *lexer, struct lex_reader *reader)
{
struct lex_source *src;
src = xzalloc (sizeof *src);
src->reader = reader;
segmenter_init (&src->segmenter, reader->syntax);
+ src->lexer = lexer;
src->tokens = deque_init (&src->deque, 4, sizeof *src->tokens);
lex_source_push_endcmd__ (src);
#include "libpspp/prompt.h"
struct lexer;
+struct macro;
/* Handling of errors. */
enum lex_error_mode
struct lexer *lex_create (void);
void lex_destroy (struct lexer *);
+/* Macros. */
+void lex_define_macro (struct lexer *, struct macro *);
+
/* Files. */
void lex_include (struct lexer *, struct lex_reader *);
void lex_append (struct lexer *, struct lex_reader *);
double lex_next_tokval (const struct lexer *, int n);
struct substring lex_next_tokss (const struct lexer *, int n);
+/* Token representation. */
+struct substring lex_next_representation (const struct lexer *,
+ int n0, int n1);
+
/* Current position. */
int lex_get_first_line_number (const struct lexer *, int n);
int lex_get_last_line_number (const struct lexer *, int n);
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include "language/lexer/macro.h"
+
+#include <limits.h>
+#include <stdlib.h>
+
+#include "data/settings.h"
+#include "language/lexer/segment.h"
+#include "language/lexer/scan.h"
+#include "libpspp/assertion.h"
+#include "libpspp/i18n.h"
+#include "libpspp/message.h"
+#include "libpspp/str.h"
+
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+
+void
+macro_token_copy (struct macro_token *dst, const struct macro_token *src)
+{
+ token_copy (&dst->token, &src->token);
+ ss_alloc_substring (&dst->representation, src->representation);
+}
+
+void
+macro_token_uninit (struct macro_token *mt)
+{
+ token_uninit (&mt->token);
+ ss_dealloc (&mt->representation);
+}
+
+void
+macro_tokens_copy (struct macro_tokens *dst, const struct macro_tokens *src)
+{
+ *dst = (struct macro_tokens) {
+ .mts = xmalloc (src->n * sizeof *dst->mts),
+ .n = src->n,
+ .allocated = src->n,
+ };
+ for (size_t i = 0; i < src->n; i++)
+ macro_token_copy (&dst->mts[i], &src->mts[i]);
+}
+
+void
+macro_tokens_uninit (struct macro_tokens *mts)
+{
+ for (size_t i = 0; i < mts->n; i++)
+ macro_token_uninit (&mts->mts[i]);
+ free (mts->mts);
+}
+
+struct macro_token *
+macro_tokens_add_uninit (struct macro_tokens *mts)
+{
+ if (mts->n >= mts->allocated)
+ mts->mts = x2nrealloc (mts->mts, &mts->allocated, sizeof *mts->mts);
+ return &mts->mts[mts->n++];
+}
+
+void
+macro_tokens_add (struct macro_tokens *mts, const struct macro_token *mt)
+{
+ macro_token_copy (macro_tokens_add_uninit (mts), mt);
+}
+
+void
+macro_tokens_from_string (struct macro_tokens *mts, const struct substring src,
+ enum segmenter_mode mode)
+{
+ struct state
+ {
+ struct segmenter segmenter;
+ struct substring body;
+ };
+
+ struct state state = {
+ .segmenter = SEGMENTER_INIT (mode),
+ .body = src,
+ };
+ struct state saved = state;
+
+ while (state.body.length > 0)
+ {
+ struct macro_token mt = {
+ .token = { .type = T_STOP },
+ .representation = { .string = state.body.string },
+ };
+ struct token *token = &mt.token;
+
+ struct scanner scanner;
+ scanner_init (&scanner, token);
+
+ for (;;)
+ {
+ enum segment_type type;
+ int seg_len = segmenter_push (&state.segmenter, state.body.string,
+ state.body.length, true, &type);
+ assert (seg_len >= 0);
+
+ struct substring segment = ss_head (state.body, seg_len);
+ ss_advance (&state.body, seg_len);
+
+ enum scan_result result = scanner_push (&scanner, type, segment, token);
+ if (result == SCAN_SAVE)
+ saved = state;
+ else if (result == SCAN_BACK)
+ {
+ state = saved;
+ break;
+ }
+ else if (result == SCAN_DONE)
+ break;
+ }
+
+ /* We have a token in 'token'. */
+ if (is_scan_type (token->type))
+ {
+ if (token->type != SCAN_SKIP)
+ {
+ /* XXX report error */
+ }
+ }
+ else
+ {
+ mt.representation.length = state.body.string - mt.representation.string;
+ macro_tokens_add (mts, &mt);
+ }
+ token_uninit (token);
+ }
+}
+
+void
+macro_tokens_print (const struct macro_tokens *mts, FILE *stream)
+{
+ for (size_t i = 0; i < mts->n; i++)
+ token_print (&mts->mts[i].token, stream);
+}
+
+void
+macro_destroy (struct macro *m)
+{
+ if (!m)
+ return;
+
+ free (m->name);
+ for (size_t i = 0; i < m->n_params; i++)
+ {
+ struct macro_param *p = &m->params[i];
+ free (p->name);
+
+ macro_tokens_uninit (&p->def);
+
+ switch (p->arg_type)
+ {
+ case ARG_N_TOKENS:
+ break;
+
+ case ARG_CHAREND:
+ token_uninit (&p->charend);
+ break;
+
+ case ARG_ENCLOSE:
+ token_uninit (&p->enclose[0]);
+ token_uninit (&p->enclose[1]);
+ break;
+
+ case ARG_CMDEND:
+ break;
+ }
+ }
+ free (m->params);
+ macro_tokens_uninit (&m->body);
+ free (m);
+}
+\f
+struct macro_set *
+macro_set_create (void)
+{
+ struct macro_set *set = xmalloc (sizeof *set);
+ *set = (struct macro_set) {
+ .macros = HMAP_INITIALIZER (set->macros),
+ };
+ return set;
+}
+
+void
+macro_set_destroy (struct macro_set *set)
+{
+ if (!set)
+ return;
+
+ struct macro *macro, *next;
+ HMAP_FOR_EACH_SAFE (macro, next, struct macro, hmap_node, &set->macros)
+ {
+ hmap_delete (&set->macros, ¯o->hmap_node);
+ macro_destroy (macro);
+ }
+ hmap_destroy (&set->macros);
+ free (set);
+}
+
+static unsigned int
+hash_macro_name (const char *name)
+{
+ return utf8_hash_case_string (name, 0);
+}
+
+static struct macro *
+macro_set_find__ (struct macro_set *set, const char *name)
+{
+ struct macro *macro;
+ HMAP_FOR_EACH_WITH_HASH (macro, struct macro, hmap_node,
+ hash_macro_name (name), &set->macros)
+ if (!utf8_strcasecmp (macro->name, name))
+ return macro;
+
+ return NULL;
+}
+
+const struct macro *
+macro_set_find (const struct macro_set *set, const char *name)
+{
+ return macro_set_find__ (CONST_CAST (struct macro_set *, set), name);
+}
+
+/* Adds M to SET. M replaces any existing macro with the same name. Takes
+ ownership of M. */
+void
+macro_set_add (struct macro_set *set, struct macro *m)
+{
+ struct macro *victim = macro_set_find__ (set, m->name);
+ if (victim)
+ {
+ hmap_delete (&set->macros, &victim->hmap_node);
+ macro_destroy (victim);
+ }
+
+ hmap_insert (&set->macros, &m->hmap_node, hash_macro_name (m->name));
+}
+\f
+enum me_state
+ {
+ /* Error state. */
+ ME_ERROR,
+
+ /* Accumulating tokens in me->params toward the end of any type of
+ argument. */
+ ME_ARG,
+
+ /* Expecting the opening delimiter of an ARG_ENCLOSE argument. */
+ ME_ENCLOSE,
+
+ /* Expecting a keyword for a keyword argument. */
+ ME_KEYWORD,
+
+ /* Expecting an equal sign for a keyword argument. */
+ ME_EQUALS,
+ };
+
+
+struct macro_expander
+ {
+ const struct macro_set *macros;
+
+ enum me_state state;
+ size_t n_tokens;
+
+ const struct macro *macro;
+ struct macro_tokens **args;
+ const struct macro_param *param;
+ };
+
+static int
+me_finished (struct macro_expander *me)
+{
+ for (size_t i = 0; i < me->macro->n_params; i++)
+ if (!me->args[i])
+ {
+ me->args[i] = xmalloc (sizeof *me->args[i]);
+ macro_tokens_copy (me->args[i], &me->macro->params[i].def);
+ }
+ return me->n_tokens;
+}
+
+static int
+me_next_arg (struct macro_expander *me)
+{
+ if (!me->param)
+ {
+ assert (!me->macro->n_params);
+ return me_finished (me);
+ }
+ else if (me->param->positional)
+ {
+ me->param++;
+ if (me->param >= &me->macro->params[me->macro->n_params])
+ return me_finished (me);
+ else
+ {
+ me->state = me->param->positional ? ME_ARG : ME_KEYWORD;
+ return 0;
+ }
+ }
+ else
+ {
+ for (size_t i = 0; i < me->macro->n_params; i++)
+ if (!me->args[i])
+ {
+ me->state = ME_KEYWORD;
+ return 0;
+ }
+ return me_finished (me);
+ }
+}
+
+static int
+me_error (struct macro_expander *me)
+{
+ me->state = ME_ERROR;
+ return -1;
+}
+
+static int
+me_add_arg (struct macro_expander *me, const struct macro_token *mt)
+{
+ const struct token *token = &mt->token;
+ if (token->type == T_STOP)
+ {
+ msg (SE, _("Unexpected end of file reading argument %s "
+ "to macro %s."), me->param->name, me->macro->name);
+
+ return me_error (me);
+ }
+
+ me->n_tokens++;
+
+ const struct macro_param *p = me->param;
+ struct macro_tokens **argp = &me->args[p - me->macro->params];
+ if (!*argp)
+ *argp = xzalloc (sizeof **argp);
+ struct macro_tokens *arg = *argp;
+ if (p->arg_type == ARG_N_TOKENS)
+ {
+ macro_tokens_add (arg, mt);
+ if (arg->n >= p->n_tokens)
+ return me_next_arg (me);
+ return 0;
+ }
+ else if (p->arg_type == ARG_CMDEND)
+ {
+ if (token->type == T_ENDCMD || token->type == T_STOP)
+ return me_next_arg (me);
+ macro_tokens_add (arg, mt);
+ return 0;
+ }
+ else
+ {
+ const struct token *end
+ = p->arg_type == ARG_CMDEND ? &p->charend : &p->enclose[1];
+ if (token_equal (token, end))
+ return me_next_arg (me);
+ macro_tokens_add (arg, mt);
+ return 0;
+ }
+}
+
+static int
+me_expected (struct macro_expander *me, const struct macro_token *actual,
+ const struct token *expected)
+{
+ const struct substring actual_s
+ = (actual->representation.length ? actual->representation
+ : ss_cstr (_("<end of input>")));
+ char *expected_s = token_to_string (expected);
+ msg (SE, _("Found `%.*s' while expecting `%s' reading argument %s "
+ "to macro %s."),
+ (int) actual_s.length, actual_s.string, expected_s,
+ me->param->name, me->macro->name);
+ free (expected_s);
+
+ return me_error (me);
+}
+
+static int
+me_enclose (struct macro_expander *me, const struct macro_token *mt)
+{
+ const struct token *token = &mt->token;
+ me->n_tokens++;
+
+ if (token_equal (&me->param->enclose[0], token))
+ {
+ me->state = ME_ARG;
+ return 0;
+ }
+
+ return me_expected (me, mt, &me->param->enclose[0]);
+}
+
+static const struct macro_param *
+macro_find_parameter_by_name (const struct macro *m, struct substring name)
+{
+ for (size_t i = 0; i < m->n_params; i++)
+ {
+ const struct macro_param *p = &m->params[i];
+ struct substring p_name = ss_cstr (p->name);
+ if (!utf8_strncasecmp (p_name.string, p_name.length,
+ name.string, name.length))
+ return p;
+ }
+ return NULL;
+}
+
+static int
+me_keyword (struct macro_expander *me, const struct macro_token *mt)
+{
+ const struct token *token = &mt->token;
+ if (token->type != T_ID)
+ return me_finished (me);
+
+ const struct macro_param *p = macro_find_parameter_by_name (me->macro,
+ token->string);
+ if (p)
+ {
+ size_t arg_index = p - me->macro->params;
+ me->param = p;
+ if (me->args[arg_index])
+ {
+ msg (SE,
+ _("Argument %s multiply specified in call to macro %s."),
+ p->name, me->macro->name);
+ return me_error (me);
+ }
+
+ me->n_tokens++;
+ me->state = ME_EQUALS;
+ return 0;
+ }
+
+ return me_finished (me);
+}
+
+static int
+me_equals (struct macro_expander *me, const struct macro_token *mt)
+{
+ const struct token *token = &mt->token;
+ me->n_tokens++;
+
+ if (token->type == T_EQUALS)
+ {
+ me->state = ME_ARG;
+ return 0;
+ }
+
+ return me_expected (me, mt, &(struct token) { .type = T_EQUALS });
+}
+
+int
+macro_expander_create (const struct macro_set *macros,
+ const struct token *token,
+ struct macro_expander **mep)
+{
+ *mep = NULL;
+ if (macro_set_is_empty (macros))
+ return -1;
+ if (token->type != T_ID && token->type != T_MACRO_ID)
+ return -1;
+
+ const struct macro *macro = macro_set_find (macros, token->string.string);
+ if (!macro)
+ return -1;
+
+ struct macro_expander *me = xmalloc (sizeof *me);
+ *me = (struct macro_expander) {
+ .macros = macros,
+ .n_tokens = 1,
+ .macro = macro,
+ };
+ *mep = me;
+
+ if (!macro->n_params)
+ return 1;
+ else
+ {
+ me->state = macro->params[0].positional ? ME_ARG : ME_KEYWORD;
+ me->args = xcalloc (macro->n_params, sizeof *me->args);
+ me->param = macro->params;
+ return 0;
+ }
+}
+
+void
+macro_expander_destroy (struct macro_expander *me)
+{
+ if (!me)
+ return;
+
+ for (size_t i = 0; i < me->macro->n_params; i++)
+ if (me->args[i])
+ {
+ macro_tokens_uninit (me->args[i]);
+ free (me->args[i]);
+ }
+ free (me->args);
+ free (me);
+}
+
+/* Adds TOKEN to the collection of tokens in ME that potentially need to be
+ macro expanded.
+
+ Returns -1 if the tokens added do not actually invoke a macro. The caller
+ should consume the first token without expanding it.
+
+ Returns 0 if the macro expander needs more tokens, for macro arguments or to
+ decide whether this is actually a macro invocation. The caller should call
+ macro_expander_add() again with the next token.
+
+ Returns a positive number to indicate that the returned number of tokens
+ invoke a macro. The number returned might be less than the number of tokens
+ added because it can take a few tokens of lookahead to determine whether the
+ macro invocation is finished. The caller should call
+ macro_expander_get_expansion() to obtain the expansion. */
+int
+macro_expander_add (struct macro_expander *me, const struct macro_token *mt)
+{
+ switch (me->state)
+ {
+ case ME_ERROR:
+ return -1;
+
+ case ME_ARG:
+ return me_add_arg (me, mt);
+
+ case ME_ENCLOSE:
+ return me_enclose (me, mt);
+
+ case ME_KEYWORD:
+ return me_keyword (me, mt);
+
+ case ME_EQUALS:
+ return me_equals (me, mt);
+
+ default:
+ NOT_REACHED ();
+ }
+}
+
+/* Each argument to a macro function is one of:
+
+ - A quoted string or other single literal token.
+
+ - An argument to the macro being expanded, e.g. !1 or a named argument.
+
+ - !*.
+
+ - A function invocation.
+
+ Each function invocation yields a character sequence to be turned into a
+ sequence of tokens. The case where that character sequence is a single
+ quoted string is an important special case.
+*/
+struct parse_macro_function_ctx
+ {
+ struct macro_token *input;
+ size_t n_input;
+ int nesting_countdown;
+ const struct macro_set *macros;
+ const struct macro_expander *me;
+ bool *expand;
+ };
+
+static void
+macro_expand (const struct macro_tokens *,
+ int nesting_countdown, const struct macro_set *,
+ const struct macro_expander *, bool *expand, struct macro_tokens *exp);
+
+static bool
+expand_macro_function (struct parse_macro_function_ctx *ctx,
+ struct macro_token *output,
+ size_t *input_consumed);
+
+static size_t
+parse_function_arg (struct parse_macro_function_ctx *ctx,
+ size_t i, struct macro_token *farg)
+{
+ struct macro_token *tokens = ctx->input;
+ const struct token *token = &tokens[i].token;
+ if (token->type == T_MACRO_ID)
+ {
+ const struct macro_param *param = macro_find_parameter_by_name (
+ ctx->me->macro, token->string);
+ if (param)
+ {
+ size_t param_idx = param - ctx->me->macro->params;
+ const struct macro_tokens *marg = ctx->me->args[param_idx];
+ if (marg->n == 1)
+ macro_token_copy (farg, &marg->mts[0]);
+ else
+ {
+ struct string s = DS_EMPTY_INITIALIZER;
+ for (size_t i = 0; i < marg->n; i++)
+ {
+ if (i)
+ ds_put_byte (&s, ' ');
+ ds_put_substring (&s, marg->mts[i].representation);
+ }
+
+ struct substring s_copy;
+ ss_alloc_substring (&s_copy, s.ss);
+
+ *farg = (struct macro_token) {
+ .token = { .type = T_MACRO_ID, .string = s.ss },
+ .representation = s_copy,
+ };
+ }
+ return 1;
+ }
+
+ struct parse_macro_function_ctx subctx = {
+ .input = &ctx->input[i],
+ .n_input = ctx->n_input - i,
+ .nesting_countdown = ctx->nesting_countdown,
+ .macros = ctx->macros,
+ .me = ctx->me,
+ .expand = ctx->expand,
+ };
+ size_t subinput_consumed;
+ if (expand_macro_function (&subctx, farg, &subinput_consumed))
+ return subinput_consumed;
+ }
+
+ macro_token_copy (farg, &tokens[i]);
+ return 1;
+}
+
+static bool
+parse_macro_function (struct parse_macro_function_ctx *ctx,
+ struct macro_tokens *args,
+ struct substring function,
+ int min_args, int max_args,
+ size_t *input_consumed)
+{
+ struct macro_token *tokens = ctx->input;
+ size_t n_tokens = ctx->n_input;
+
+ if (!n_tokens
+ || tokens[0].token.type != T_MACRO_ID
+ || !ss_equals_case (tokens[0].token.string, function))
+ return false;
+
+ if (n_tokens < 2 || tokens[1].token.type != T_LPAREN)
+ {
+ printf ("`(' expected following %s'\n", function.string);
+ return false;
+ }
+
+ *args = (struct macro_tokens) { .n = 0 };
+
+ for (size_t i = 2;; )
+ {
+ if (i >= n_tokens)
+ goto unexpected_end;
+ if (tokens[i].token.type == T_RPAREN)
+ {
+ *input_consumed = i + 1;
+ if (args->n < min_args || args->n > max_args)
+ {
+ printf ("Wrong number of arguments to %s.\n", function.string);
+ goto error;
+ }
+ return true;
+ }
+
+ i += parse_function_arg (ctx, i, macro_tokens_add_uninit (args));
+ if (i >= n_tokens)
+ goto unexpected_end;
+
+ if (tokens[i].token.type == T_COMMA)
+ i++;
+ else if (tokens[i].token.type != T_RPAREN)
+ {
+ printf ("Expecting `,' or `)' in %s invocation.", function.string);
+ goto error;
+ }
+ }
+
+unexpected_end:
+ printf ("Missing closing parenthesis in arguments to %s.\n",
+ function.string);
+ /* Fall through. */
+error:
+ macro_tokens_uninit (args);
+ return false;
+}
+
+static bool
+expand_macro_function (struct parse_macro_function_ctx *ctx,
+ struct macro_token *output,
+ size_t *input_consumed)
+{
+ struct macro_tokens args;
+
+ if (parse_macro_function (ctx, &args, ss_cstr ("!length"), 1, 1,
+ input_consumed))
+ {
+ size_t length = args.mts[0].representation.length;
+ *output = (struct macro_token) {
+ .token = { .type = T_POS_NUM, .number = length },
+ .representation = ss_cstr (xasprintf ("%zu", length)),
+ };
+ }
+ else if (parse_macro_function (ctx, &args, ss_cstr ("!blanks"), 1, 1,
+ input_consumed))
+ {
+ /* XXX this isn't right, it might be a character string containing a
+ positive integer, e.g. via !CONCAT. */
+ if (args.mts[0].token.type != T_POS_NUM)
+ {
+ printf ("argument to !BLANKS must be positive integer\n");
+ macro_tokens_uninit (&args);
+ return false;
+ }
+
+ struct string s = DS_EMPTY_INITIALIZER;
+ ds_put_byte_multiple (&s, ' ', args.mts[0].token.number);
+
+ struct substring s_copy;
+ ss_alloc_substring (&s_copy, s.ss);
+
+ *output = (struct macro_token) {
+ .token = { .type = T_ID, .string = s.ss },
+ .representation = s_copy,
+ };
+ }
+ else if (parse_macro_function (ctx, &args, ss_cstr ("!concat"), 1, INT_MAX,
+ input_consumed))
+ {
+ struct string s;
+ bool all_strings = true;
+ for (size_t i = 0; i < args.n; i++)
+ {
+ if (args.mts[i].token.type == T_STRING)
+ ds_put_substring (&s, args.mts[i].token.string);
+ else
+ {
+ all_strings = false;
+ ds_put_substring (&s, args.mts[i].representation);
+ }
+ }
+
+ if (all_strings)
+ {
+ *output = (struct macro_token) {
+ .token = { .type = T_STRING, .string = s.ss },
+ };
+ output->representation = ss_cstr (token_to_string (&output->token));
+ }
+ else
+ {
+ *output = (struct macro_token) {
+ .token = { .type = T_MACRO_ID /*XXX*/, .string = s.ss },
+ };
+ ss_alloc_substring (&output->representation, s.ss);
+ }
+ }
+ else if (parse_macro_function (ctx, &args, ss_cstr ("!quote"), 1, 1,
+ input_consumed))
+ {
+ if (args.mts[0].token.type == T_STRING)
+ macro_token_copy (output, &args.mts[0]);
+ else
+ {
+ *output = (struct macro_token) { .token = { .type = T_STRING } };
+ ss_alloc_substring (&output->token.string, args.mts[0].representation);
+ output->representation = ss_cstr (token_to_string (&output->token));
+ }
+ }
+ else if (parse_macro_function (ctx, &args, ss_cstr ("!unquote"), 1, 1,
+ input_consumed))
+ {
+ if (args.mts[0].token.type == T_STRING)
+ {
+ *output = (struct macro_token) { .token = { .type = T_MACRO_ID } };
+ ss_alloc_substring (&output->token.string, args.mts[0].token.string);
+ output->representation = ss_cstr (token_to_string (&output->token));
+ }
+ else
+ macro_token_copy (output, &args.mts[0]);
+ }
+ else
+ return false;
+
+ macro_tokens_uninit (&args);
+ return true;
+}
+
+static void
+macro_expand (const struct macro_tokens *mts,
+ int nesting_countdown, const struct macro_set *macros,
+ const struct macro_expander *me, bool *expand,
+ struct macro_tokens *exp)
+{
+ if (nesting_countdown <= 0)
+ {
+ printf ("maximum nesting level exceeded\n");
+ for (size_t i = 0; i < mts->n; i++)
+ macro_tokens_add (exp, &mts->mts[i]);
+ return;
+ }
+
+ for (size_t i = 0; i < mts->n; i++)
+ {
+ const struct macro_token *mt = &mts->mts[i];
+ const struct token *token = &mt->token;
+ if (token->type == T_MACRO_ID && me)
+ {
+ const struct macro_param *param = macro_find_parameter_by_name (
+ me->macro, token->string);
+ if (param)
+ {
+ const struct macro_tokens *arg = me->args[param - me->macro->params];
+ //macro_tokens_print (arg, stdout);
+ if (*expand && param->expand_arg)
+ macro_expand (arg, nesting_countdown, macros, NULL, expand, exp);
+ else
+ for (size_t i = 0; i < arg->n; i++)
+ macro_tokens_add (exp, &arg->mts[i]);
+ continue;
+ }
+ }
+
+ if (*expand)
+ {
+ struct macro_expander *subme;
+ int retval = macro_expander_create (macros, token, &subme);
+ for (size_t j = 1; !retval; j++)
+ {
+ const struct macro_token stop = { .token = { .type = T_STOP } };
+ retval = macro_expander_add (
+ subme, i + j < mts->n ? &mts->mts[i + j] : &stop);
+ }
+ if (retval > 0)
+ {
+ i += retval - 1;
+ macro_expand (&subme->macro->body, nesting_countdown - 1, macros,
+ subme, expand, exp);
+ macro_expander_destroy (subme);
+ continue;
+ }
+
+ macro_expander_destroy (subme);
+ }
+
+ if (token->type != T_MACRO_ID)
+ {
+ macro_tokens_add (exp, mt);
+ continue;
+ }
+
+ /* Maybe each arg should just be a string, either a quoted string or a
+ non-quoted string containing tokens. */
+ struct parse_macro_function_ctx ctx = {
+ .input = &mts->mts[i],
+ .n_input = mts->n - i,
+ .nesting_countdown = nesting_countdown,
+ .macros = macros,
+ .me = me,
+ .expand = expand,
+ };
+ struct macro_token function_output;
+ size_t function_consumed;
+ if (expand_macro_function (&ctx, &function_output, &function_consumed))
+ {
+ i += function_consumed - 1;
+
+ if (function_output.token.type == T_MACRO_ID)
+ macro_tokens_from_string (exp, function_output.token.string,
+ SEG_MODE_INTERACTIVE /* XXX */);
+ else
+ macro_tokens_add (exp, &function_output);
+ macro_token_uninit (&function_output);
+
+ continue;
+ }
+
+ if (ss_equals_case (token->string, ss_cstr ("!onexpand")))
+ *expand = true;
+ else if (ss_equals_case (token->string, ss_cstr ("!offexpand")))
+ *expand = false;
+ else
+ macro_tokens_add (exp, mt);
+ }
+}
+
+void
+macro_expander_get_expansion (struct macro_expander *me, struct macro_tokens *exp)
+{
+#if 0
+ for (size_t i = 0; i < me->macro->n_params; i++)
+ {
+ printf ("%s:\n", me->macro->params[i].name);
+ macro_tokens_print (me->args[i], stdout);
+ }
+#endif
+
+ bool expand = true;
+ macro_expand (&me->macro->body, settings_get_mnest (),
+ me->macros, me, &expand, exp);
+
+#if 0
+ printf ("expansion:\n");
+ macro_tokens_print (exp, stdout);
+#endif
+}
+
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef MACRO_H
+#define MACRO_H 1
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "libpspp/hmap.h"
+#include "libpspp/str.h"
+#include "language/lexer/segment.h"
+#include "language/lexer/token.h"
+
+struct macro_expander;
+
+struct macro_token
+ {
+ struct token token;
+ struct substring representation;
+ };
+
+void macro_token_copy (struct macro_token *, const struct macro_token *);
+void macro_token_uninit (struct macro_token *);
+
+struct macro_tokens
+ {
+ struct macro_token *mts;
+ size_t n;
+ size_t allocated;
+ };
+
+void macro_tokens_copy (struct macro_tokens *, const struct macro_tokens *);
+void macro_tokens_uninit (struct macro_tokens *);
+struct macro_token *macro_tokens_add_uninit (struct macro_tokens *);
+void macro_tokens_add (struct macro_tokens *, const struct macro_token *);
+
+void macro_tokens_from_string (struct macro_tokens *, const struct substring,
+ enum segmenter_mode);
+
+void macro_tokens_print (const struct macro_tokens *, FILE *);
+
+struct macro_param
+ {
+ bool positional; /* Is this a positional parameter? */
+ char *name; /* "!1" or "!name". */
+ struct macro_tokens def; /* Default expansion. */
+ bool expand_arg; /* Macro-expand the argument? */
+
+ enum
+ {
+ ARG_N_TOKENS,
+ ARG_CHAREND,
+ ARG_ENCLOSE,
+ ARG_CMDEND
+ }
+ arg_type;
+ union
+ {
+ int n_tokens;
+ struct token charend;
+ struct token enclose[2];
+ };
+ };
+
+struct macro
+ {
+ struct hmap_node hmap_node; /* Indexed by 'name'. */
+ char *name;
+
+ struct macro_param *params;
+ size_t n_params;
+
+ struct macro_tokens body;
+ };
+
+void macro_destroy (struct macro *);
+
+struct macro_set
+ {
+ struct hmap macros;
+ };
+
+struct macro_set *macro_set_create (void);
+void macro_set_destroy (struct macro_set *);
+const struct macro *macro_set_find (const struct macro_set *,
+ const char *);
+void macro_set_add (struct macro_set *, struct macro *);
+
+static inline bool
+macro_set_is_empty (const struct macro_set *set)
+{
+ return hmap_is_empty (&set->macros);
+}
+\f
+/* Macro expansion. */
+
+int macro_expander_create (const struct macro_set *,
+ const struct token *,
+ struct macro_expander **);
+void macro_expander_destroy (struct macro_expander *);
+
+int macro_expander_add (struct macro_expander *, const struct macro_token *);
+
+void macro_expander_get_expansion (struct macro_expander *, struct macro_tokens *);
+
+#endif /* macro.h */
scanner_init (struct scanner *scanner, struct token *token)
{
scanner->state = S_START;
- token_init (token);
+ *token = (struct token) { .type = T_STOP };
}
/* Adds the segment with type TYPE and UTF-8 text S to SCANNER. TOKEN must be
#include "gl/c-ctype.h"
#include "gl/c-strcase.h"
+#include "gl/verify.h"
enum segmenter_state
{
S_TITLE_2
};
+/* S_SHBANG is the start state that SEGMENTER_INIT refers to as just 0. */
+verify (S_SHBANG == 0);
+
#define SS_START_OF_LINE (1u << 0)
#define SS_START_OF_COMMAND (1u << 1)
void
segmenter_init (struct segmenter *s, enum segmenter_mode mode)
{
- s->state = S_SHBANG;
- s->substate = 0;
- s->mode = mode;
+ *s = (struct segmenter) SEGMENTER_INIT (mode);
}
/* Returns the mode passed to segmenter_init() for S. */
unsigned char mode;
};
+#define SEGMENTER_INIT(MODE) { .mode = MODE }
+
void segmenter_init (struct segmenter *, enum segmenter_mode);
enum segmenter_mode segmenter_get_mode (const struct segmenter *);
#include "libpspp/cast.h"
#include "libpspp/misc.h"
-
#include "gl/ftoastr.h"
#include "gl/xalloc.h"
-/* Initializes TOKEN with an arbitrary type, number 0, and a null string. */
void
-token_init (struct token *token)
+token_copy (struct token *dst, const struct token *src)
{
- token->type = 0;
- token->number = 0.0;
- token->string = ss_empty ();
+ *dst = (struct token) {
+ .type = src->type,
+ .number = src->number,
+ };
+ ss_alloc_substring (&dst->string, src->string);
}
/* Frees the string that TOKEN contains. */
token_uninit (struct token *token)
{
if (token != NULL)
- ss_dealloc (&token->string);
+ {
+ ss_dealloc (&token->string);
+ *token = (struct token) { .type = T_STOP };
+ }
+}
+
+bool
+token_equal (const struct token *a, const struct token *b)
+{
+ if (a->type != b->type)
+ return false;
+
+ switch (a->type)
+ {
+ case T_POS_NUM:
+ case T_NEG_NUM:
+ return a->number == b->number;
+
+ case T_ID:
+ case T_MACRO_ID:
+ case T_MACRO_PUNCT:
+ case T_STRING:
+ return ss_equals (a->string, b->string);
+
+ default:
+ return true;
+ }
}
static char *
return string_representation (token->string);
default:
- return xstrdup_if_nonnull (token_type_to_name (token->type));
+ return xstrdup_if_nonnull (token_type_to_string (token->type));
}
}
(int) token->string.length, token->string.string);
putc ('\n', stream);
}
+\f
+void
+tokens_copy (struct tokens *dst, const struct tokens *src)
+{
+ *dst = (struct tokens) {
+ .tokens = xnmalloc (src->n, sizeof *dst->tokens),
+ .n = src->n,
+ .allocated = src->n,
+ };
+
+ for (size_t i = 0; i < src->n; i++)
+ token_copy (&dst->tokens[i], &src->tokens[i]);
+}
+
+void
+tokens_uninit (struct tokens *tokens)
+{
+ for (size_t i = 0; i < tokens->n; i++)
+ token_uninit (&tokens->tokens[i]);
+ free (tokens->tokens);
+}
+
+void
+tokens_add (struct tokens *tokens, const struct token *t)
+{
+ if (tokens->allocated >= tokens->n)
+ tokens->tokens = x2nrealloc (tokens->tokens, &tokens->allocated,
+ sizeof *tokens->tokens);
+
+ token_copy (&tokens->tokens[tokens->n++], t);
+}
+
+void
+tokens_print (const struct tokens *tokens, FILE *stream)
+{
+ for (size_t i = 0; i < tokens->n; i++)
+ token_print (&tokens->tokens[i], stream);
+}
#ifndef TOKEN_H
#define TOKEN_H 1
+#include <stdbool.h>
#include <stdio.h>
#include "libpspp/str.h"
#include "data/identifier.h"
struct substring string;
};
-#define TOKEN_INITIALIZER(TYPE, NUMBER, STRING) \
- { TYPE, NUMBER, SS_LITERAL_INITIALIZER (STRING) }
-
-void token_init (struct token *);
+void token_copy (struct token *, const struct token *);
void token_uninit (struct token *);
+bool token_equal (const struct token *, const struct token *);
+
char *token_to_string (const struct token *);
void token_print (const struct token *, FILE *);
+\f
+struct tokens
+ {
+ struct token *tokens;
+ size_t n;
+ size_t allocated;
+ };
+
+void tokens_copy (struct tokens *, const struct tokens *);
+void tokens_uninit (struct tokens *);
+void tokens_add (struct tokens *, const struct token *);
+
+void tokens_print (const struct tokens *, FILE *);
#endif /* token.h */
tests/data/sys-file.at \
tests/data/encrypted-file.at \
tests/language/command.at \
+ tests/language/control/define.at \
tests/language/control/do-if.at \
tests/language/control/do-repeat.at \
tests/language/control/loop.at \
--- /dev/null
+dnl PSPP - a program for statistical analysis.
+dnl Copyright (C) 2017 Free Software Foundation, Inc.
+dnl
+dnl This program is free software: you can redistribute it and/or modify
+dnl it under the terms of the GNU General Public License as published by
+dnl the Free Software Foundation, either version 3 of the License, or
+dnl (at your option) any later version.
+dnl
+dnl This program is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+dnl GNU General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU General Public License
+dnl along with this program. If not, see <http://www.gnu.org/licenses/>.
+dnl
+AT_BANNER([DEFINE])
+
+AT_SETUP([DEFINE])
+AT_DATA([define.sps], [dnl
+DEFINE !variables()
+ brand model license color
+!ENDDEFINE.
+])
+AT_CHECK([pspp define.sps])
+AT_CLEANUP
static void
check_segmentation (const char *input, size_t length, bool print_segments)
{
- struct segmenter s;
- segmenter_init (&s, mode);
+ struct segmenter s = SEGMENTER_INIT (mode);
size_t line_number = 1;
size_t line_offset = 0;