#include "language/lexer/macro.h"
+#include <errno.h>
#include <limits.h>
#include <stdlib.h>
#include "libpspp/i18n.h"
#include "libpspp/message.h"
#include "libpspp/str.h"
+#include "libpspp/string-array.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
ss_dealloc (&mt->representation);
}
+void
+macro_token_to_representation (struct macro_token *mt, struct string *s)
+{
+ ds_put_substring (s, mt->representation);
+}
+
void
macro_tokens_copy (struct macro_tokens *dst, const struct macro_tokens *src)
{
token_print (&mts->mts[i].token, stream);
}
+enum token_class
+ {
+ TC_ENDCMD, /* No space before or after (new-line after). */
+ TC_BINOP, /* Space on both sides. */
+ TC_COMMA, /* Space afterward. */
+ TC_ID, /* Don't need spaces except sequentially. */
+ TC_PUNCT, /* Don't need spaces except sequentially. */
+ };
+
+static bool
+needs_space (enum token_class prev, enum token_class next)
+{
+ /* Don't need a space before or after the end of a command.
+ (A new-line is needed afterward as a special case.) */
+ if (prev == TC_ENDCMD || next == TC_ENDCMD)
+ return false;
+
+ /* Binary operators always have a space on both sides. */
+ if (prev == TC_BINOP || next == TC_BINOP)
+ return true;
+
+ /* A comma always has a space afterward. */
+ if (prev == TC_COMMA)
+ return true;
+
+ /* Otherwise, PREV is TC_ID or TC_PUNCT, which only need a space if there are
+ two or them in a row. */
+ return prev == next;
+}
+
+static enum token_class
+classify_token (enum token_type type)
+{
+ switch (type)
+ {
+ case T_ID:
+ case T_MACRO_ID:
+ case T_POS_NUM:
+ case T_NEG_NUM:
+ case T_STRING:
+ return TC_ID;
+
+ case T_STOP:
+ return TC_PUNCT;
+
+ case T_ENDCMD:
+ return TC_ENDCMD;
+
+ case T_LPAREN:
+ case T_RPAREN:
+ case T_LBRACK:
+ case T_RBRACK:
+ return TC_PUNCT;
+
+ case T_PLUS:
+ case T_DASH:
+ case T_ASTERISK:
+ case T_SLASH:
+ case T_EQUALS:
+ case T_AND:
+ case T_OR:
+ case T_NOT:
+ case T_EQ:
+ case T_GE:
+ case T_GT:
+ case T_LE:
+ case T_LT:
+ case T_NE:
+ case T_ALL:
+ case T_BY:
+ case T_TO:
+ case T_WITH:
+ case T_EXP:
+ case T_MACRO_PUNCT:
+ return TC_BINOP;
+
+ case T_COMMA:
+ return TC_COMMA;
+ }
+
+ NOT_REACHED ();
+}
+
+void
+macro_tokens_to_representation (struct macro_tokens *mts, struct string *s)
+{
+ if (!mts->n)
+ return;
+
+ macro_token_to_representation (&mts->mts[0], s);
+ for (size_t i = 1; i < mts->n; i++)
+ {
+ enum token_type prev = mts->mts[i - 1].token.type;
+ enum token_type next = mts->mts[i].token.type;
+
+ if (prev == T_ENDCMD)
+ ds_put_byte (s, '\n');
+ else
+ {
+ enum token_class pc = classify_token (prev);
+ enum token_class nc = classify_token (next);
+ if (needs_space (pc, nc))
+ ds_put_byte (s, ' ');
+ }
+
+ macro_token_to_representation (&mts->mts[i], s);
+ }
+}
+
void
macro_destroy (struct macro *m)
{
return me_finished (me);
else
{
- me->state = me->param->positional ? ME_ARG : ME_KEYWORD;
+ me->state = (!me->param->positional ? ME_KEYWORD
+ : me->param->arg_type == ARG_ENCLOSE ? ME_ENCLOSE
+ : ME_ARG);
return 0;
}
}
static int
me_add_arg (struct macro_expander *me, const struct macro_token *mt)
{
+ const struct macro_param *p = me->param;
+
const struct token *token = &mt->token;
- if (token->type == T_STOP)
+ if ((token->type == T_ENDCMD || token->type == T_STOP)
+ && p->arg_type != ARG_CMDEND)
{
- msg (SE, _("Unexpected end of file reading argument %s "
+ msg (SE, _("Unexpected end of command reading argument %s "
"to macro %s."), me->param->name, me->macro->name);
return me_error (me);
me->n_tokens++;
- const struct macro_param *p = me->param;
struct macro_tokens **argp = &me->args[p - me->macro->params];
if (!*argp)
*argp = xzalloc (sizeof **argp);
else
{
const struct token *end
- = p->arg_type == ARG_CMDEND ? &p->charend : &p->enclose[1];
+ = p->arg_type == ARG_CHAREND ? &p->charend : &p->enclose[1];
if (token_equal (token, end))
return me_next_arg (me);
macro_tokens_add (arg, mt);
static const struct macro_param *
macro_find_parameter_by_name (const struct macro *m, struct substring name)
{
+ if (ss_first (name) == '!')
+ ss_advance (&name, 1);
+
for (size_t i = 0; i < m->n_params; i++)
{
const struct macro_param *p = &m->params[i];
- struct substring p_name = ss_cstr (p->name);
+ struct substring p_name = ss_cstr (p->name + 1);
if (!utf8_strncasecmp (p_name.string, p_name.length,
name.string, name.length))
return p;
return 1;
else
{
- me->state = macro->params[0].positional ? ME_ARG : ME_KEYWORD;
+ me->state = (!macro->params[0].positional ? ME_KEYWORD
+ : macro->params[0].arg_type == ARG_ENCLOSE ? ME_ENCLOSE
+ : ME_ARG);
me->args = xcalloc (macro->n_params, sizeof *me->args);
me->param = macro->params;
return 0;
static bool
expand_macro_function (struct parse_macro_function_ctx *ctx,
- struct macro_token *output,
- size_t *input_consumed);
+ struct string *output, size_t *input_consumed);
static size_t
parse_function_arg (struct parse_macro_function_ctx *ctx,
- size_t i, struct macro_token *farg)
+ size_t i, struct string *farg)
{
struct macro_token *tokens = ctx->input;
const struct token *token = &tokens[i].token;
{
size_t param_idx = param - ctx->me->macro->params;
const struct macro_tokens *marg = ctx->me->args[param_idx];
- if (marg->n == 1)
- macro_token_copy (farg, &marg->mts[0]);
- else
+ for (size_t i = 0; i < marg->n; i++)
{
- struct string s = DS_EMPTY_INITIALIZER;
- for (size_t i = 0; i < marg->n; i++)
- {
- if (i)
- ds_put_byte (&s, ' ');
- ds_put_substring (&s, marg->mts[i].representation);
- }
-
- struct substring s_copy;
- ss_alloc_substring (&s_copy, s.ss);
-
- *farg = (struct macro_token) {
- .token = { .type = T_MACRO_ID, .string = s.ss },
- .representation = s_copy,
- };
+ if (i)
+ ds_put_byte (farg, ' ');
+ ds_put_substring (farg, marg->mts[i].representation);
}
return 1;
}
return subinput_consumed;
}
- macro_token_copy (farg, &tokens[i]);
+ ds_put_substring (farg, tokens[i].representation);
return 1;
}
static bool
parse_macro_function (struct parse_macro_function_ctx *ctx,
- struct macro_tokens *args,
+ struct string_array *args,
struct substring function,
int min_args, int max_args,
size_t *input_consumed)
return false;
}
- *args = (struct macro_tokens) { .n = 0 };
+ string_array_init (args);
for (size_t i = 2;; )
{
return true;
}
- i += parse_function_arg (ctx, i, macro_tokens_add_uninit (args));
+ struct string s = DS_EMPTY_INITIALIZER;
+ i += parse_function_arg (ctx, i, &s);
if (i >= n_tokens)
- goto unexpected_end;
+ {
+ ds_destroy (&s);
+ goto unexpected_end;
+ }
+ string_array_append_nocopy (args, ds_steal_cstr (&s));
if (tokens[i].token.type == T_COMMA)
i++;
function.string);
/* Fall through. */
error:
- macro_tokens_uninit (args);
+ string_array_destroy (args);
return false;
}
+static bool
+string_is_quoted_string (const char *s, struct string *content)
+{
+ struct string_lexer slex;
+ string_lexer_init (&slex, s, strlen (s), SEG_MODE_INTERACTIVE /* XXX */);
+
+ struct token token1;
+ if (!string_lexer_next (&slex, &token1))
+ return false;
+
+ if (token1.type != T_STRING)
+ {
+ token_uninit (&token1);
+ return false;
+ }
+
+ struct token token2;
+ if (string_lexer_next (&slex, &token2))
+ {
+ token_uninit (&token1);
+ token_uninit (&token2);
+ return false;
+ }
+
+ ds_put_substring (content, token1.string);
+ token_uninit (&token1);
+ return true;
+}
+
static bool
expand_macro_function (struct parse_macro_function_ctx *ctx,
- struct macro_token *output,
+ struct string *output,
size_t *input_consumed)
{
- struct macro_tokens args;
+ struct string_array args;
if (parse_macro_function (ctx, &args, ss_cstr ("!length"), 1, 1,
input_consumed))
- {
- size_t length = args.mts[0].representation.length;
- *output = (struct macro_token) {
- .token = { .type = T_POS_NUM, .number = length },
- .representation = ss_cstr (xasprintf ("%zu", length)),
- };
- }
+ ds_put_format (output, "%zu", strlen (args.strings[0]));
else if (parse_macro_function (ctx, &args, ss_cstr ("!blanks"), 1, 1,
input_consumed))
{
- /* XXX this isn't right, it might be a character string containing a
- positive integer, e.g. via !CONCAT. */
- if (args.mts[0].token.type != T_POS_NUM)
+ char *tail;
+ errno = 0;
+ int n = strtol (args.strings[0], &tail, 10);
+ if (*tail != '\0' || n < 0 || errno == ERANGE)
{
- printf ("argument to !BLANKS must be positive integer\n");
- macro_tokens_uninit (&args);
+ printf ("argument to !BLANKS must be non-negative integer (not \"%s\")\n", args.strings[0]);
+ string_array_destroy (&args);
return false;
}
- struct string s = DS_EMPTY_INITIALIZER;
- ds_put_byte_multiple (&s, ' ', args.mts[0].token.number);
-
- struct substring s_copy;
- ss_alloc_substring (&s_copy, s.ss);
-
- *output = (struct macro_token) {
- .token = { .type = T_ID, .string = s.ss },
- .representation = s_copy,
- };
+ ds_put_byte_multiple (output, ' ', n);
}
else if (parse_macro_function (ctx, &args, ss_cstr ("!concat"), 1, INT_MAX,
input_consumed))
{
- struct string s;
- bool all_strings = true;
for (size_t i = 0; i < args.n; i++)
- {
- if (args.mts[i].token.type == T_STRING)
- ds_put_substring (&s, args.mts[i].token.string);
- else
- {
- all_strings = false;
- ds_put_substring (&s, args.mts[i].representation);
- }
- }
-
- if (all_strings)
- {
- *output = (struct macro_token) {
- .token = { .type = T_STRING, .string = s.ss },
- };
- output->representation = ss_cstr (token_to_string (&output->token));
- }
- else
- {
- *output = (struct macro_token) {
- .token = { .type = T_MACRO_ID /*XXX*/, .string = s.ss },
- };
- ss_alloc_substring (&output->representation, s.ss);
- }
+ if (!string_is_quoted_string (args.strings[i], output))
+ ds_put_cstr (output, args.strings[i]);
}
else if (parse_macro_function (ctx, &args, ss_cstr ("!quote"), 1, 1,
input_consumed))
{
- if (args.mts[0].token.type == T_STRING)
- macro_token_copy (output, &args.mts[0]);
+ if (string_is_quoted_string (args.strings[0], NULL))
+ ds_put_cstr (output, args.strings[0]);
else
{
- *output = (struct macro_token) { .token = { .type = T_STRING } };
- ss_alloc_substring (&output->token.string, args.mts[0].representation);
- output->representation = ss_cstr (token_to_string (&output->token));
+ ds_extend (output, strlen (args.strings[0]) + 2);
+ ds_put_byte (output, '\'');
+ for (const char *p = args.strings[0]; *p; p++)
+ {
+ if (*p == '\'')
+ ds_put_byte (output, '\'');
+ ds_put_byte (output, *p);
+ }
+ ds_put_byte (output, '\'');
}
}
else if (parse_macro_function (ctx, &args, ss_cstr ("!unquote"), 1, 1,
input_consumed))
{
- if (args.mts[0].token.type == T_STRING)
- {
- *output = (struct macro_token) { .token = { .type = T_MACRO_ID } };
- ss_alloc_substring (&output->token.string, args.mts[0].token.string);
- output->representation = ss_cstr (token_to_string (&output->token));
- }
- else
- macro_token_copy (output, &args.mts[0]);
+ if (!string_is_quoted_string (args.strings[0], output))
+ ds_put_cstr (output, args.strings[0]);
+ }
+ else if (ctx->n_input > 0
+ && ctx->input[0].token.type == T_MACRO_ID
+ && ss_equals_case (ctx->input[0].token.string, ss_cstr ("!null")))
+ {
+ *input_consumed = 1;
+ return true;
}
else
return false;
- macro_tokens_uninit (&args);
+ string_array_destroy (&args);
return true;
}
const struct macro_expander *me, bool *expand,
struct macro_tokens *exp)
{
+ /* Macro expansion:
+
+ - Macro names in macro bodies are not expanded by default. !EVAL()
+ expands them.
+
+ - Macro names in arguments to macro invocations (outside of macro bodies)
+ are expanded by default, unless !NOEXPAND. */
if (nesting_countdown <= 0)
{
printf ("maximum nesting level exceeded\n");
int retval = macro_expander_create (macros, token, &subme);
for (size_t j = 1; !retval; j++)
{
- const struct macro_token stop = { .token = { .type = T_STOP } };
+ const struct macro_token endcmd = { .token = { .type = T_ENDCMD } };
retval = macro_expander_add (
- subme, i + j < mts->n ? &mts->mts[i + j] : &stop);
+ subme, i + j < mts->n ? &mts->mts[i + j] : &endcmd);
}
if (retval > 0)
{
.me = me,
.expand = expand,
};
- struct macro_token function_output;
+ struct string function_output = DS_EMPTY_INITIALIZER;
size_t function_consumed;
if (expand_macro_function (&ctx, &function_output, &function_consumed))
{
i += function_consumed - 1;
- if (function_output.token.type == T_MACRO_ID)
- macro_tokens_from_string (exp, function_output.token.string,
- SEG_MODE_INTERACTIVE /* XXX */);
- else
- macro_tokens_add (exp, &function_output);
- macro_token_uninit (&function_output);
+ macro_tokens_from_string (exp, function_output.ss,
+ SEG_MODE_INTERACTIVE /* XXX */);
+ ds_destroy (&function_output);
continue;
}