#include "language/lexer/macro.h"
+#include <errno.h>
#include <limits.h>
#include <stdlib.h>
#include "libpspp/i18n.h"
#include "libpspp/message.h"
#include "libpspp/str.h"
+#include "libpspp/string-array.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
ss_dealloc (&mt->representation);
}
+void
+macro_token_to_representation (struct macro_token *mt, struct string *s)
+{
+ ds_put_substring (s, mt->representation);
+}
+
void
macro_tokens_copy (struct macro_tokens *dst, const struct macro_tokens *src)
{
{
if (token->type != SCAN_SKIP)
{
+ printf ("error\n");
/* XXX report error */
}
}
token_print (&mts->mts[i].token, stream);
}
+enum token_class
+ {
+ TC_ENDCMD, /* No space before or after (new-line after). */
+ TC_BINOP, /* Space on both sides. */
+ TC_COMMA, /* Space afterward. */
+ TC_ID, /* Don't need spaces except sequentially. */
+ TC_PUNCT, /* Don't need spaces except sequentially. */
+ };
+
+static bool
+needs_space (enum token_class prev, enum token_class next)
+{
+ /* Don't need a space before or after the end of a command.
+ (A new-line is needed afterward as a special case.) */
+ if (prev == TC_ENDCMD || next == TC_ENDCMD)
+ return false;
+
+ /* Binary operators always have a space on both sides. */
+ if (prev == TC_BINOP || next == TC_BINOP)
+ return true;
+
+ /* A comma always has a space afterward. */
+ if (prev == TC_COMMA)
+ return true;
+
+ /* Otherwise, PREV is TC_ID or TC_PUNCT, which only need a space if there are
+ two or them in a row. */
+ return prev == next;
+}
+
+static enum token_class
+classify_token (enum token_type type)
+{
+ switch (type)
+ {
+ case T_ID:
+ case T_MACRO_ID:
+ case T_POS_NUM:
+ case T_NEG_NUM:
+ case T_STRING:
+ return TC_ID;
+
+ case T_STOP:
+ return TC_PUNCT;
+
+ case T_ENDCMD:
+ return TC_ENDCMD;
+
+ case T_LPAREN:
+ case T_RPAREN:
+ case T_LBRACK:
+ case T_RBRACK:
+ return TC_PUNCT;
+
+ case T_PLUS:
+ case T_DASH:
+ case T_ASTERISK:
+ case T_SLASH:
+ case T_EQUALS:
+ case T_AND:
+ case T_OR:
+ case T_NOT:
+ case T_EQ:
+ case T_GE:
+ case T_GT:
+ case T_LE:
+ case T_LT:
+ case T_NE:
+ case T_ALL:
+ case T_BY:
+ case T_TO:
+ case T_WITH:
+ case T_EXP:
+ case T_MACRO_PUNCT:
+ return TC_BINOP;
+
+ case T_COMMA:
+ return TC_COMMA;
+ }
+
+ NOT_REACHED ();
+}
+
+void
+macro_tokens_to_representation (struct macro_tokens *mts, struct string *s)
+{
+ if (!mts->n)
+ return;
+
+ macro_token_to_representation (&mts->mts[0], s);
+ for (size_t i = 1; i < mts->n; i++)
+ {
+ enum token_type prev = mts->mts[i - 1].token.type;
+ enum token_type next = mts->mts[i].token.type;
+
+ if (prev == T_ENDCMD)
+ ds_put_byte (s, '\n');
+ else
+ {
+ enum token_class pc = classify_token (prev);
+ enum token_class nc = classify_token (next);
+ if (needs_space (pc, nc))
+ ds_put_byte (s, ' ');
+ }
+
+ macro_token_to_representation (&mts->mts[i], s);
+ }
+}
+
void
macro_destroy (struct macro *m)
{
return me_finished (me);
else
{
- me->state = me->param->positional ? ME_ARG : ME_KEYWORD;
+ me->state = (!me->param->positional ? ME_KEYWORD
+ : me->param->arg_type == ARG_ENCLOSE ? ME_ENCLOSE
+ : ME_ARG);
return 0;
}
}
static int
me_add_arg (struct macro_expander *me, const struct macro_token *mt)
{
+ const struct macro_param *p = me->param;
+
const struct token *token = &mt->token;
- if (token->type == T_STOP)
+ if ((token->type == T_ENDCMD || token->type == T_STOP)
+ && p->arg_type != ARG_CMDEND)
{
- msg (SE, _("Unexpected end of file reading argument %s "
+ msg (SE, _("Unexpected end of command reading argument %s "
"to macro %s."), me->param->name, me->macro->name);
return me_error (me);
me->n_tokens++;
- const struct macro_param *p = me->param;
struct macro_tokens **argp = &me->args[p - me->macro->params];
if (!*argp)
*argp = xzalloc (sizeof **argp);
else
{
const struct token *end
- = p->arg_type == ARG_CMDEND ? &p->charend : &p->enclose[1];
+ = p->arg_type == ARG_CHAREND ? &p->charend : &p->enclose[1];
if (token_equal (token, end))
return me_next_arg (me);
macro_tokens_add (arg, mt);
static const struct macro_param *
macro_find_parameter_by_name (const struct macro *m, struct substring name)
{
+ if (ss_first (name) == '!')
+ ss_advance (&name, 1);
+
for (size_t i = 0; i < m->n_params; i++)
{
const struct macro_param *p = &m->params[i];
- struct substring p_name = ss_cstr (p->name);
+ struct substring p_name = ss_cstr (p->name + 1);
if (!utf8_strncasecmp (p_name.string, p_name.length,
name.string, name.length))
return p;
return 1;
else
{
- me->state = macro->params[0].positional ? ME_ARG : ME_KEYWORD;
+ me->state = (!macro->params[0].positional ? ME_KEYWORD
+ : macro->params[0].arg_type == ARG_ENCLOSE ? ME_ENCLOSE
+ : ME_ARG);
me->args = xcalloc (macro->n_params, sizeof *me->args);
me->param = macro->params;
return 0;
static bool
expand_macro_function (struct parse_macro_function_ctx *ctx,
- struct macro_token *output,
- size_t *input_consumed);
+ struct string *output, size_t *input_consumed);
+
+/* Returns true if the pair of tokens starting at offset OFS within MTS are !*,
+ false otherwise. */
+static bool
+is_bang_star (const struct macro_token *mts, size_t n, size_t ofs)
+{
+ return (ofs + 1 < n
+ && mts[ofs].token.type == T_MACRO_ID
+ && ss_equals (mts[ofs].token.string, ss_cstr ("!"))
+ && mts[ofs + 1].token.type == T_ASTERISK);
+}
static size_t
parse_function_arg (struct parse_macro_function_ctx *ctx,
- size_t i, struct macro_token *farg)
+ size_t i, struct string *farg)
{
struct macro_token *tokens = ctx->input;
const struct token *token = &tokens[i].token;
{
size_t param_idx = param - ctx->me->macro->params;
const struct macro_tokens *marg = ctx->me->args[param_idx];
- if (marg->n == 1)
- macro_token_copy (farg, &marg->mts[0]);
- else
+ for (size_t i = 0; i < marg->n; i++)
{
- struct string s = DS_EMPTY_INITIALIZER;
- for (size_t i = 0; i < marg->n; i++)
- {
- if (i)
- ds_put_byte (&s, ' ');
- ds_put_substring (&s, marg->mts[i].representation);
- }
+ if (i)
+ ds_put_byte (farg, ' ');
+ ds_put_substring (farg, marg->mts[i].representation);
+ }
+ return 1;
+ }
- struct substring s_copy;
- ss_alloc_substring (&s_copy, s.ss);
+ if (is_bang_star (ctx->input, ctx->n_input, i))
+ {
+ for (size_t i = 0; i < ctx->me->macro->n_params; i++)
+ {
+ if (!ctx->me->macro->params[i].positional)
+ break;
- *farg = (struct macro_token) {
- .token = { .type = T_MACRO_ID, .string = s.ss },
- .representation = s_copy,
- };
+ const struct macro_tokens *marg = ctx->me->args[i];
+ for (size_t j = 0; j < marg->n; j++)
+ {
+ if (i || j)
+ ds_put_byte (farg, ' ');
+ ds_put_substring (farg, marg->mts[j].representation);
+ }
}
- return 1;
+ return 2;
}
struct parse_macro_function_ctx subctx = {
return subinput_consumed;
}
- macro_token_copy (farg, &tokens[i]);
+ ds_put_substring (farg, tokens[i].representation);
return 1;
}
static bool
parse_macro_function (struct parse_macro_function_ctx *ctx,
- struct macro_tokens *args,
+ struct string_array *args,
struct substring function,
int min_args, int max_args,
size_t *input_consumed)
return false;
}
- *args = (struct macro_tokens) { .n = 0 };
+ string_array_init (args);
for (size_t i = 2;; )
{
return true;
}
- i += parse_function_arg (ctx, i, macro_tokens_add_uninit (args));
+ struct string s = DS_EMPTY_INITIALIZER;
+ i += parse_function_arg (ctx, i, &s);
if (i >= n_tokens)
- goto unexpected_end;
+ {
+ ds_destroy (&s);
+ goto unexpected_end;
+ }
+ string_array_append_nocopy (args, ds_steal_cstr (&s));
if (tokens[i].token.type == T_COMMA)
i++;
function.string);
/* Fall through. */
error:
- macro_tokens_uninit (args);
+ string_array_destroy (args);
return false;
}
+static bool
+unquote_string (const char *s, struct string *content)
+{
+ struct string_lexer slex;
+ string_lexer_init (&slex, s, strlen (s), SEG_MODE_INTERACTIVE /* XXX */);
+
+ struct token token1;
+ if (!string_lexer_next (&slex, &token1))
+ return false;
+
+ if (token1.type != T_STRING)
+ {
+ token_uninit (&token1);
+ return false;
+ }
+
+ struct token token2;
+ if (string_lexer_next (&slex, &token2))
+ {
+ token_uninit (&token1);
+ token_uninit (&token2);
+ return false;
+ }
+
+ ds_put_substring (content, token1.string);
+ token_uninit (&token1);
+ return true;
+}
+
+static const char *
+unquote_string_in_place (const char *s, struct string *tmp)
+{
+ ds_init_empty (tmp);
+ return unquote_string (s, tmp) ? ds_cstr (tmp) : s;
+}
+
+static bool
+parse_integer (const char *s, int *np)
+{
+ errno = 0;
+
+ char *tail;
+ long int n = strtol (s, &tail, 10);
+ *np = n < INT_MIN ? INT_MIN : n > INT_MAX ? INT_MAX : n;
+ tail += strspn (tail, CC_SPACES);
+ return *tail == '\0' && errno != ERANGE && n == *np;
+}
+
static bool
expand_macro_function (struct parse_macro_function_ctx *ctx,
- struct macro_token *output,
+ struct string *output,
size_t *input_consumed)
{
- struct macro_tokens args;
+ struct string_array args;
if (parse_macro_function (ctx, &args, ss_cstr ("!length"), 1, 1,
input_consumed))
- {
- size_t length = args.mts[0].representation.length;
- *output = (struct macro_token) {
- .token = { .type = T_POS_NUM, .number = length },
- .representation = ss_cstr (xasprintf ("%zu", length)),
- };
- }
+ ds_put_format (output, "%zu", strlen (args.strings[0]));
else if (parse_macro_function (ctx, &args, ss_cstr ("!blanks"), 1, 1,
input_consumed))
{
- /* XXX this isn't right, it might be a character string containing a
- positive integer, e.g. via !CONCAT. */
- if (args.mts[0].token.type != T_POS_NUM)
+ int n;
+ if (!parse_integer (args.strings[0], &n))
{
- printf ("argument to !BLANKS must be positive integer\n");
- macro_tokens_uninit (&args);
+ printf ("argument to !BLANKS must be non-negative integer (not \"%s\")\n", args.strings[0]);
+ string_array_destroy (&args);
return false;
}
- struct string s = DS_EMPTY_INITIALIZER;
- ds_put_byte_multiple (&s, ' ', args.mts[0].token.number);
-
- struct substring s_copy;
- ss_alloc_substring (&s_copy, s.ss);
-
- *output = (struct macro_token) {
- .token = { .type = T_ID, .string = s.ss },
- .representation = s_copy,
- };
+ ds_put_byte_multiple (output, ' ', n);
}
else if (parse_macro_function (ctx, &args, ss_cstr ("!concat"), 1, INT_MAX,
input_consumed))
{
- struct string s;
- bool all_strings = true;
for (size_t i = 0; i < args.n; i++)
+ if (!unquote_string (args.strings[i], output))
+ ds_put_cstr (output, args.strings[i]);
+ }
+ else if (parse_macro_function (ctx, &args, ss_cstr ("!head"), 1, 1,
+ input_consumed))
+ {
+ struct string tmp;
+ const char *s = unquote_string_in_place (args.strings[0], &tmp);
+
+ struct macro_tokens mts = { .n = 0 };
+ macro_tokens_from_string (&mts, ss_cstr (s), SEG_MODE_INTERACTIVE /* XXX */);
+ if (mts.n > 0)
+ ds_put_substring (output, mts.mts[0].representation);
+ macro_tokens_uninit (&mts);
+ ds_destroy (&tmp);
+ }
+ else if (parse_macro_function (ctx, &args, ss_cstr ("!index"), 2, 2,
+ input_consumed))
+ {
+ const char *haystack = args.strings[0];
+ const char *needle = strstr (haystack, args.strings[1]);
+ ds_put_format (output, "%zu", needle ? needle - haystack + 1 : 0);
+ }
+ else if (parse_macro_function (ctx, &args, ss_cstr ("!quote"), 1, 1,
+ input_consumed))
+ {
+ if (unquote_string (args.strings[0], NULL))
+ ds_put_cstr (output, args.strings[0]);
+ else
{
- if (args.mts[i].token.type == T_STRING)
- ds_put_substring (&s, args.mts[i].token.string);
- else
+ ds_extend (output, strlen (args.strings[0]) + 2);
+ ds_put_byte (output, '\'');
+ for (const char *p = args.strings[0]; *p; p++)
{
- all_strings = false;
- ds_put_substring (&s, args.mts[i].representation);
+ if (*p == '\'')
+ ds_put_byte (output, '\'');
+ ds_put_byte (output, *p);
}
+ ds_put_byte (output, '\'');
}
-
- if (all_strings)
+ }
+ else if (parse_macro_function (ctx, &args, ss_cstr ("!substr"), 2, 3,
+ input_consumed))
+ {
+ int start;
+ if (!parse_integer (args.strings[1], &start) || start < 1)
{
- *output = (struct macro_token) {
- .token = { .type = T_STRING, .string = s.ss },
- };
- output->representation = ss_cstr (token_to_string (&output->token));
+ printf ("second argument to !SUBSTR must be positive integer (not \"%s\")\n", args.strings[1]);
+ string_array_destroy (&args);
+ return false;
}
- else
+
+ int count = INT_MAX;
+ if (args.n > 2 && (!parse_integer (args.strings[2], &count) || count < 0))
{
- *output = (struct macro_token) {
- .token = { .type = T_MACRO_ID /*XXX*/, .string = s.ss },
- };
- ss_alloc_substring (&output->representation, s.ss);
+ printf ("third argument to !SUBSTR must be non-negative integer (not \"%s\")\n", args.strings[1]);
+ string_array_destroy (&args);
+ return false;
}
+
+ struct substring s = ss_cstr (args.strings[0]);
+ ds_put_substring (output, ss_substr (s, start - 1, count));
}
- else if (parse_macro_function (ctx, &args, ss_cstr ("!quote"), 1, 1,
+ else if (parse_macro_function (ctx, &args, ss_cstr ("!tail"), 1, 1,
input_consumed))
{
- if (args.mts[0].token.type == T_STRING)
- macro_token_copy (output, &args.mts[0]);
- else
+ struct string tmp;
+ const char *s = unquote_string_in_place (args.strings[0], &tmp);
+
+ struct macro_tokens mts = { .n = 0 };
+ macro_tokens_from_string (&mts, ss_cstr (s), SEG_MODE_INTERACTIVE /* XXX */);
+ if (mts.n > 1)
{
- *output = (struct macro_token) { .token = { .type = T_STRING } };
- ss_alloc_substring (&output->token.string, args.mts[0].representation);
- output->representation = ss_cstr (token_to_string (&output->token));
+ struct macro_tokens tail = { .mts = mts.mts + 1, .n = mts.n - 1 };
+ macro_tokens_to_representation (&tail, output);
}
+ macro_tokens_uninit (&mts);
+ ds_destroy (&tmp);
}
else if (parse_macro_function (ctx, &args, ss_cstr ("!unquote"), 1, 1,
input_consumed))
{
- if (args.mts[0].token.type == T_STRING)
- {
- *output = (struct macro_token) { .token = { .type = T_MACRO_ID } };
- ss_alloc_substring (&output->token.string, args.mts[0].token.string);
- output->representation = ss_cstr (token_to_string (&output->token));
- }
- else
- macro_token_copy (output, &args.mts[0]);
+ if (!unquote_string (args.strings[0], output))
+ ds_put_cstr (output, args.strings[0]);
+ }
+ else if (parse_macro_function (ctx, &args, ss_cstr ("!upcase"), 1, 1,
+ input_consumed))
+ {
+ struct string tmp;
+ const char *s = unquote_string_in_place (args.strings[0], &tmp);
+ char *upper = utf8_to_upper (s);
+ ds_put_cstr (output, upper);
+ free (upper);
+ ds_destroy (&tmp);
+ }
+ else if (parse_macro_function (ctx, &args, ss_cstr ("!eval"), 1, 1,
+ input_consumed))
+ {
+ struct macro_tokens mts = { .n = 0 };
+ macro_tokens_from_string (&mts, ss_cstr (args.strings[0]),
+ SEG_MODE_INTERACTIVE /* XXX */);
+ struct macro_tokens exp = { .n = 0 };
+ macro_expand (&mts, ctx->nesting_countdown - 1, ctx->macros,
+ ctx->me, ctx->expand, &exp);
+ macro_tokens_to_representation (&exp, output);
+ macro_tokens_uninit (&exp);
+ macro_tokens_uninit (&mts);
+ }
+ else if (ctx->n_input > 0
+ && ctx->input[0].token.type == T_MACRO_ID
+ && ss_equals_case (ctx->input[0].token.string, ss_cstr ("!null")))
+ {
+ *input_consumed = 1;
+ return true;
}
else
return false;
- macro_tokens_uninit (&args);
+ string_array_destroy (&args);
return true;
}
macro_tokens_add (exp, &arg->mts[i]);
continue;
}
+
+ if (is_bang_star (mts->mts, mts->n, i))
+ {
+ for (size_t j = 0; j < me->macro->n_params; j++)
+ {
+ const struct macro_param *param = &me->macro->params[j];
+ if (!param->positional)
+ break;
+
+ const struct macro_tokens *arg = me->args[j];
+ if (*expand && param->expand_arg)
+ macro_expand (arg, nesting_countdown, macros, NULL, expand, exp);
+ else
+ for (size_t k = 0; k < arg->n; k++)
+ macro_tokens_add (exp, &arg->mts[k]);
+ }
+ i++;
+ continue;
+ }
}
if (*expand)
int retval = macro_expander_create (macros, token, &subme);
for (size_t j = 1; !retval; j++)
{
- const struct macro_token stop = { .token = { .type = T_STOP } };
+ const struct macro_token endcmd = { .token = { .type = T_ENDCMD } };
retval = macro_expander_add (
- subme, i + j < mts->n ? &mts->mts[i + j] : &stop);
+ subme, i + j < mts->n ? &mts->mts[i + j] : &endcmd);
}
if (retval > 0)
{
.me = me,
.expand = expand,
};
- struct macro_token function_output;
+ struct string function_output = DS_EMPTY_INITIALIZER;
size_t function_consumed;
if (expand_macro_function (&ctx, &function_output, &function_consumed))
{
i += function_consumed - 1;
- if (function_output.token.type == T_MACRO_ID)
- macro_tokens_from_string (exp, function_output.token.string,
- SEG_MODE_INTERACTIVE /* XXX */);
- else
- macro_tokens_add (exp, &function_output);
- macro_token_uninit (&function_output);
+ macro_tokens_from_string (exp, function_output.ss,
+ SEG_MODE_INTERACTIVE /* XXX */);
+ ds_destroy (&function_output);
continue;
}