#include "language/lexer/segment.h"
#include "language/lexer/scan.h"
#include "libpspp/assertion.h"
+#include "libpspp/cast.h"
#include "libpspp/i18n.h"
#include "libpspp/message.h"
#include "libpspp/str.h"
{
if (token->type != SCAN_SKIP)
{
+ printf ("error\n");
/* XXX report error */
}
}
*/
struct parse_macro_function_ctx
{
- struct macro_token *input;
+ const struct macro_token *input;
size_t n_input;
int nesting_countdown;
const struct macro_set *macros;
expand_macro_function (struct parse_macro_function_ctx *ctx,
struct string *output, size_t *input_consumed);
+/* Returns true if the pair of tokens starting at offset OFS within MTS are !*,
+ false otherwise. */
+static bool
+is_bang_star (const struct macro_token *mts, size_t n, size_t ofs)
+{
+ return (ofs + 1 < n
+ && mts[ofs].token.type == T_MACRO_ID
+ && ss_equals (mts[ofs].token.string, ss_cstr ("!"))
+ && mts[ofs + 1].token.type == T_ASTERISK);
+}
+
static size_t
parse_function_arg (struct parse_macro_function_ctx *ctx,
size_t i, struct string *farg)
{
- struct macro_token *tokens = ctx->input;
+ const struct macro_token *tokens = ctx->input;
const struct token *token = &tokens[i].token;
if (token->type == T_MACRO_ID)
{
return 1;
}
+ if (is_bang_star (ctx->input, ctx->n_input, i))
+ {
+ for (size_t i = 0; i < ctx->me->macro->n_params; i++)
+ {
+ if (!ctx->me->macro->params[i].positional)
+ break;
+
+ const struct macro_tokens *marg = ctx->me->args[i];
+ for (size_t j = 0; j < marg->n; j++)
+ {
+ if (i || j)
+ ds_put_byte (farg, ' ');
+ ds_put_substring (farg, marg->mts[j].representation);
+ }
+ }
+ return 2;
+ }
+
struct parse_macro_function_ctx subctx = {
.input = &ctx->input[i],
.n_input = ctx->n_input - i,
int min_args, int max_args,
size_t *input_consumed)
{
- struct macro_token *tokens = ctx->input;
+ const struct macro_token *tokens = ctx->input;
size_t n_tokens = ctx->n_input;
if (!n_tokens
}
static bool
-string_is_quoted_string (const char *s, struct string *content)
+unquote_string (const char *s, struct string *content)
{
struct string_lexer slex;
string_lexer_init (&slex, s, strlen (s), SEG_MODE_INTERACTIVE /* XXX */);
return true;
}
+static const char *
+unquote_string_in_place (const char *s, struct string *tmp)
+{
+ ds_init_empty (tmp);
+ return unquote_string (s, tmp) ? ds_cstr (tmp) : s;
+}
+
+static bool
+parse_integer (const char *s, int *np)
+{
+ errno = 0;
+
+ char *tail;
+ long int n = strtol (s, &tail, 10);
+ *np = n < INT_MIN ? INT_MIN : n > INT_MAX ? INT_MAX : n;
+ tail += strspn (tail, CC_SPACES);
+ return *tail == '\0' && errno != ERANGE && n == *np;
+}
+
static bool
expand_macro_function (struct parse_macro_function_ctx *ctx,
struct string *output,
else if (parse_macro_function (ctx, &args, ss_cstr ("!blanks"), 1, 1,
input_consumed))
{
- char *tail;
- errno = 0;
- int n = strtol (args.strings[0], &tail, 10);
- if (*tail != '\0' || n < 0 || errno == ERANGE)
+ int n;
+ if (!parse_integer (args.strings[0], &n))
{
printf ("argument to !BLANKS must be non-negative integer (not \"%s\")\n", args.strings[0]);
string_array_destroy (&args);
input_consumed))
{
for (size_t i = 0; i < args.n; i++)
- if (!string_is_quoted_string (args.strings[i], output))
+ if (!unquote_string (args.strings[i], output))
ds_put_cstr (output, args.strings[i]);
}
+ else if (parse_macro_function (ctx, &args, ss_cstr ("!head"), 1, 1,
+ input_consumed))
+ {
+ struct string tmp;
+ const char *s = unquote_string_in_place (args.strings[0], &tmp);
+
+ struct macro_tokens mts = { .n = 0 };
+ macro_tokens_from_string (&mts, ss_cstr (s), SEG_MODE_INTERACTIVE /* XXX */);
+ if (mts.n > 0)
+ ds_put_substring (output, mts.mts[0].representation);
+ macro_tokens_uninit (&mts);
+ ds_destroy (&tmp);
+ }
+ else if (parse_macro_function (ctx, &args, ss_cstr ("!index"), 2, 2,
+ input_consumed))
+ {
+ const char *haystack = args.strings[0];
+ const char *needle = strstr (haystack, args.strings[1]);
+ ds_put_format (output, "%zu", needle ? needle - haystack + 1 : 0);
+ }
else if (parse_macro_function (ctx, &args, ss_cstr ("!quote"), 1, 1,
input_consumed))
{
- if (string_is_quoted_string (args.strings[0], NULL))
+ if (unquote_string (args.strings[0], NULL))
ds_put_cstr (output, args.strings[0]);
else
{
ds_put_byte (output, '\'');
}
}
+ else if (parse_macro_function (ctx, &args, ss_cstr ("!substr"), 2, 3,
+ input_consumed))
+ {
+ int start;
+ if (!parse_integer (args.strings[1], &start) || start < 1)
+ {
+ printf ("second argument to !SUBSTR must be positive integer (not \"%s\")\n", args.strings[1]);
+ string_array_destroy (&args);
+ return false;
+ }
+
+ int count = INT_MAX;
+ if (args.n > 2 && (!parse_integer (args.strings[2], &count) || count < 0))
+ {
+ printf ("third argument to !SUBSTR must be non-negative integer (not \"%s\")\n", args.strings[1]);
+ string_array_destroy (&args);
+ return false;
+ }
+
+ struct substring s = ss_cstr (args.strings[0]);
+ ds_put_substring (output, ss_substr (s, start - 1, count));
+ }
+ else if (parse_macro_function (ctx, &args, ss_cstr ("!tail"), 1, 1,
+ input_consumed))
+ {
+ struct string tmp;
+ const char *s = unquote_string_in_place (args.strings[0], &tmp);
+
+ struct macro_tokens mts = { .n = 0 };
+ macro_tokens_from_string (&mts, ss_cstr (s), SEG_MODE_INTERACTIVE /* XXX */);
+ if (mts.n > 1)
+ {
+ struct macro_tokens tail = { .mts = mts.mts + 1, .n = mts.n - 1 };
+ macro_tokens_to_representation (&tail, output);
+ }
+ macro_tokens_uninit (&mts);
+ ds_destroy (&tmp);
+ }
else if (parse_macro_function (ctx, &args, ss_cstr ("!unquote"), 1, 1,
input_consumed))
{
- if (!string_is_quoted_string (args.strings[0], output))
+ if (!unquote_string (args.strings[0], output))
ds_put_cstr (output, args.strings[0]);
}
+ else if (parse_macro_function (ctx, &args, ss_cstr ("!upcase"), 1, 1,
+ input_consumed))
+ {
+ struct string tmp;
+ const char *s = unquote_string_in_place (args.strings[0], &tmp);
+ char *upper = utf8_to_upper (s);
+ ds_put_cstr (output, upper);
+ free (upper);
+ ds_destroy (&tmp);
+ }
+ else if (parse_macro_function (ctx, &args, ss_cstr ("!eval"), 1, 1,
+ input_consumed))
+ {
+ struct macro_tokens mts = { .n = 0 };
+ macro_tokens_from_string (&mts, ss_cstr (args.strings[0]),
+ SEG_MODE_INTERACTIVE /* XXX */);
+ struct macro_tokens exp = { .n = 0 };
+ macro_expand (&mts, ctx->nesting_countdown - 1, ctx->macros,
+ ctx->me, ctx->expand, &exp);
+ macro_tokens_to_representation (&exp, output);
+ macro_tokens_uninit (&exp);
+ macro_tokens_uninit (&mts);
+ }
else if (ctx->n_input > 0
&& ctx->input[0].token.type == T_MACRO_ID
&& ss_equals_case (ctx->input[0].token.string, ss_cstr ("!null")))
return true;
}
+struct expr_context
+ {
+ int nesting_countdown;
+ const struct macro_set *macros;
+ const struct macro_expander *me;
+ bool *expand;
+ };
+
+static char *macro_evaluate_or (const struct expr_context *ctx,
+ const struct macro_token **tokens,
+ const struct macro_token *end);
+
+static char *
+macro_evaluate_literal (const struct expr_context *ctx,
+ const struct macro_token **tokens,
+ const struct macro_token *end)
+{
+ const struct macro_token *p = *tokens;
+ if (p >= end)
+ return NULL;
+ if (p->token.type == T_LPAREN)
+ {
+ p++;
+ char *value = macro_evaluate_or (ctx, &p, end);
+ if (!value)
+ return NULL;
+ if (p >= end || p->token.type != T_RPAREN)
+ {
+ free (value);
+ printf ("expecting ')' in macro expression\n");
+ return NULL;
+ }
+ p++;
+ *tokens = p;
+ return value;
+ }
+
+ struct parse_macro_function_ctx fctx = {
+ .input = p,
+ .n_input = end - p,
+ .nesting_countdown = ctx->nesting_countdown,
+ .macros = ctx->macros,
+ .me = ctx->me,
+ .expand = ctx->expand,
+ };
+ struct string function_output = DS_EMPTY_INITIALIZER;
+ size_t function_consumed;
+ if (expand_macro_function (&fctx, &function_output, &function_consumed))
+ {
+ *tokens = p + function_consumed;
+ return ds_steal_cstr (&function_output);
+ }
+
+ *tokens = p + 1;
+ return ss_xstrdup (p->representation);
+}
+
+static char *
+macro_evaluate_logical (const struct expr_context *ctx,
+ const struct macro_token **tokens,
+ const struct macro_token *end)
+{
+ const struct macro_token *p = *tokens;
+ char *lhs = macro_evaluate_literal (ctx, &p, end);
+ if (!lhs)
+ return NULL;
+
+ enum token_type op = p < end ? p->token.type : T_STOP;
+ if (op != T_EQUALS && op != T_EQ && op != T_NE && op != T_LT
+ && op != T_GT && op != T_LE && op != T_GE)
+ {
+ *tokens = p;
+ return lhs;
+ }
+ p++;
+
+ char *rhs = macro_evaluate_literal (ctx, &p, end);
+ if (!rhs)
+ {
+ free (lhs);
+ return NULL;
+ }
+
+ struct string lhs_tmp, rhs_tmp;
+ int cmp = strcmp/*XXX*/ (unquote_string_in_place (lhs, &lhs_tmp),
+ unquote_string_in_place (rhs, &rhs_tmp));
+ ds_destroy (&lhs_tmp);
+ ds_destroy (&rhs_tmp);
+
+ free (lhs);
+ free (rhs);
+
+ bool b = (op == T_EQUALS || op == T_EQ ? !cmp
+ : op == T_NE ? cmp
+ : op == T_LT ? cmp < 0
+ : op == T_GT ? cmp > 0
+ : op == T_LE ? cmp <= 0
+ :/*op == T_GE*/cmp >= 0);
+
+ *tokens = p;
+ return xstrdup (b ? "1" : "0");
+}
+
+static char *
+macro_evaluate_not (const struct expr_context *ctx,
+ const struct macro_token **tokens,
+ const struct macro_token *end)
+{
+ const struct macro_token *p = *tokens;
+
+ unsigned int negations = 0;
+ while (p < end && p->token.type == T_NOT)
+ {
+ p++;
+ negations++;
+ }
+
+ char *operand = macro_evaluate_logical (ctx, &p, end);
+ if (!operand || !negations)
+ {
+ *tokens = p;
+ return operand;
+ }
+
+ bool b = strcmp (operand, "0") ^ (negations & 1);
+ free (operand);
+ *tokens = p;
+ return xstrdup (b ? "1" : "0");
+}
+
+static char *
+macro_evaluate_and (const struct expr_context *ctx,
+ const struct macro_token **tokens,
+ const struct macro_token *end)
+{
+ const struct macro_token *p = *tokens;
+ char *lhs = macro_evaluate_not (ctx, &p, end);
+ if (!lhs)
+ return NULL;
+
+ while (p < end && p->token.type == T_AND)
+ {
+ p++;
+ char *rhs = macro_evaluate_not (ctx, &p, end);
+ if (!rhs)
+ {
+ free (lhs);
+ return NULL;
+ }
+
+ bool b = strcmp (lhs, "0") && strcmp (rhs, "0");
+ free (lhs);
+ free (rhs);
+ lhs = xstrdup (b ? "1" : "0");
+ }
+ *tokens = p;
+ return lhs;
+}
+
+static char *
+macro_evaluate_or (const struct expr_context *ctx,
+ const struct macro_token **tokens,
+ const struct macro_token *end)
+{
+ const struct macro_token *p = *tokens;
+ char *lhs = macro_evaluate_and (ctx, &p, end);
+ if (!lhs)
+ return NULL;
+
+ while (p < end && p->token.type == T_OR)
+ {
+ p++;
+ char *rhs = macro_evaluate_and (ctx, &p, end);
+ if (!rhs)
+ {
+ free (lhs);
+ return NULL;
+ }
+
+ bool b = strcmp (lhs, "0") || strcmp (rhs, "0");
+ free (lhs);
+ free (rhs);
+ lhs = xstrdup (b ? "1" : "0");
+ }
+ *tokens = p;
+ return lhs;
+}
+
+static char *
+macro_evaluate_expression (const struct macro_token **tokens, size_t n_tokens,
+ int nesting_countdown, const struct macro_set *macros,
+ const struct macro_expander *me, bool *expand)
+{
+ const struct expr_context ctx = {
+ .nesting_countdown = nesting_countdown,
+ .macros = macros,
+ .me = me,
+ .expand = expand,
+ };
+ return macro_evaluate_or (&ctx, tokens, *tokens + n_tokens);
+}
+
+static const struct macro_token *
+find_ifend_clause (const struct macro_token *p, const struct macro_token *end)
+{
+ size_t nesting = 0;
+ for (; p < end; p++)
+ {
+ if (p->token.type != T_MACRO_ID)
+ continue;
+
+ if (ss_equals_case (p->token.string, ss_cstr ("!IF")))
+ nesting++;
+ else if (ss_equals_case (p->token.string, ss_cstr ("!IFEND")))
+ {
+ if (!nesting)
+ return p;
+ nesting--;
+ }
+ else if (ss_equals_case (p->token.string, ss_cstr ("!ELSE")) && !nesting)
+ return p;
+ }
+ return NULL;
+}
+
+static size_t
+macro_expand_if (const struct macro_token *tokens, size_t n_tokens,
+ int nesting_countdown, const struct macro_set *macros,
+ const struct macro_expander *me, bool *expand,
+ struct macro_tokens *exp)
+{
+ const struct macro_token *p = tokens;
+ const struct macro_token *end = tokens + n_tokens;
+
+ if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!IF")))
+ return 0;
+
+ p++;
+ char *result = macro_evaluate_expression (&p, end - p,
+ nesting_countdown, macros, me, expand);
+ if (!result)
+ return 0;
+ bool b = strcmp (result, "0");
+ free (result);
+
+ if (p >= end
+ || p->token.type != T_MACRO_ID
+ || !ss_equals_case (p->token.string, ss_cstr ("!THEN")))
+ {
+ printf ("!THEN expected\n");
+ return 0;
+ }
+
+ const struct macro_token *start_then = p + 1;
+ const struct macro_token *end_then = find_ifend_clause (start_then, end);
+ if (!end_then)
+ {
+ printf ("!ELSE or !IFEND expected\n");
+ return 0;
+ }
+
+ const struct macro_token *start_else, *end_if;
+ if (ss_equals_case (end_then->token.string, ss_cstr ("!ELSE")))
+ {
+ start_else = end_then + 1;
+ end_if = find_ifend_clause (start_else, end);
+ if (!end_if
+ || !ss_equals_case (end_if->token.string, ss_cstr ("!IFEND")))
+ {
+ printf ("!IFEND expected\n");
+ return 0;
+ }
+ }
+ else
+ {
+ start_else = NULL;
+ end_if = end_then;
+ }
+
+ const struct macro_token *start;
+ size_t n;
+ if (b)
+ {
+ start = start_then;
+ n = end_then - start_then;
+ }
+ else if (start_else)
+ {
+ start = start_else;
+ n = end_if - start_else;
+ }
+ else
+ {
+ start = NULL;
+ n = 0;
+ }
+
+ if (n)
+ {
+ struct macro_tokens mts = {
+ .mts = CONST_CAST (struct macro_token *, start),
+ .n = n,
+ };
+ macro_expand (&mts, nesting_countdown, macros, me, expand, exp);
+ }
+ return (end_if + 1) - tokens;
+}
+
static void
macro_expand (const struct macro_tokens *mts,
int nesting_countdown, const struct macro_set *macros,
const struct macro_expander *me, bool *expand,
struct macro_tokens *exp)
{
- /* Macro expansion:
-
- - Macro names in macro bodies are not expanded by default. !EVAL()
- expands them.
-
- - Macro names in arguments to macro invocations (outside of macro bodies)
- are expanded by default, unless !NOEXPAND. */
if (nesting_countdown <= 0)
{
printf ("maximum nesting level exceeded\n");
macro_tokens_add (exp, &arg->mts[i]);
continue;
}
+
+ if (is_bang_star (mts->mts, mts->n, i))
+ {
+ for (size_t j = 0; j < me->macro->n_params; j++)
+ {
+ const struct macro_param *param = &me->macro->params[j];
+ if (!param->positional)
+ break;
+
+ const struct macro_tokens *arg = me->args[j];
+ if (*expand && param->expand_arg)
+ macro_expand (arg, nesting_countdown, macros, NULL, expand, exp);
+ else
+ for (size_t k = 0; k < arg->n; k++)
+ macro_tokens_add (exp, &arg->mts[k]);
+ }
+ i++;
+ continue;
+ }
+
+ size_t n = macro_expand_if (&mts->mts[i], mts->n - i,
+ nesting_countdown, macros, me, expand,
+ exp);
+ if (n > 0)
+ {
+ i += n - 1;
+ continue;
+ }
}
if (*expand)
continue;
}
- /* Maybe each arg should just be a string, either a quoted string or a
- non-quoted string containing tokens. */
struct parse_macro_function_ctx ctx = {
.input = &mts->mts[i],
.n_input = mts->n - i,