#include "language/lexer/macro.h"
+#include <errno.h>
#include <limits.h>
#include <stdlib.h>
#include "libpspp/i18n.h"
#include "libpspp/message.h"
#include "libpspp/str.h"
+#include "libpspp/string-array.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
static bool
expand_macro_function (struct parse_macro_function_ctx *ctx,
- struct macro_token *output,
- size_t *input_consumed);
+ struct string *output, size_t *input_consumed);
static size_t
parse_function_arg (struct parse_macro_function_ctx *ctx,
- size_t i, struct macro_token *farg)
+ size_t i, struct string *farg)
{
struct macro_token *tokens = ctx->input;
const struct token *token = &tokens[i].token;
{
size_t param_idx = param - ctx->me->macro->params;
const struct macro_tokens *marg = ctx->me->args[param_idx];
- if (marg->n == 1)
- macro_token_copy (farg, &marg->mts[0]);
- else
+ for (size_t i = 0; i < marg->n; i++)
{
- struct string s = DS_EMPTY_INITIALIZER;
- for (size_t i = 0; i < marg->n; i++)
- {
- if (i)
- ds_put_byte (&s, ' ');
- ds_put_substring (&s, marg->mts[i].representation);
- }
-
- struct substring s_copy;
- ss_alloc_substring (&s_copy, s.ss);
-
- *farg = (struct macro_token) {
- .token = { .type = T_MACRO_ID, .string = s.ss },
- .representation = s_copy,
- };
+ if (i)
+ ds_put_byte (farg, ' ');
+ ds_put_substring (farg, marg->mts[i].representation);
}
return 1;
}
return subinput_consumed;
}
- macro_token_copy (farg, &tokens[i]);
+ ds_put_substring (farg, tokens[i].representation);
return 1;
}
static bool
parse_macro_function (struct parse_macro_function_ctx *ctx,
- struct macro_tokens *args,
+ struct string_array *args,
struct substring function,
int min_args, int max_args,
size_t *input_consumed)
return false;
}
- *args = (struct macro_tokens) { .n = 0 };
+ string_array_init (args);
for (size_t i = 2;; )
{
return true;
}
- i += parse_function_arg (ctx, i, macro_tokens_add_uninit (args));
+ struct string s = DS_EMPTY_INITIALIZER;
+ i += parse_function_arg (ctx, i, &s);
if (i >= n_tokens)
- goto unexpected_end;
+ {
+ ds_destroy (&s);
+ goto unexpected_end;
+ }
+ string_array_append_nocopy (args, ds_steal_cstr (&s));
if (tokens[i].token.type == T_COMMA)
i++;
function.string);
/* Fall through. */
error:
- macro_tokens_uninit (args);
+ string_array_destroy (args);
return false;
}
+static bool
+unquote_string (const char *s, struct string *content)
+{
+ struct string_lexer slex;
+ string_lexer_init (&slex, s, strlen (s), SEG_MODE_INTERACTIVE /* XXX */);
+
+ struct token token1;
+ if (!string_lexer_next (&slex, &token1))
+ return false;
+
+ if (token1.type != T_STRING)
+ {
+ token_uninit (&token1);
+ return false;
+ }
+
+ struct token token2;
+ if (string_lexer_next (&slex, &token2))
+ {
+ token_uninit (&token1);
+ token_uninit (&token2);
+ return false;
+ }
+
+ ds_put_substring (content, token1.string);
+ token_uninit (&token1);
+ return true;
+}
+
+static const char *
+unquote_string_in_place (const char *s, struct string *tmp)
+{
+ ds_init_empty (tmp);
+ return unquote_string (s, tmp) ? ds_cstr (tmp) : s;
+}
+
+static bool
+parse_integer (const char *s, int *np)
+{
+ errno = 0;
+
+ char *tail;
+ long int n = strtol (s, &tail, 10);
+ *np = n < INT_MIN ? INT_MIN : n > INT_MAX ? INT_MAX : n;
+ tail += strspn (tail, CC_SPACES);
+ return *tail == '\0' && errno != ERANGE && n == *np;
+}
+
static bool
expand_macro_function (struct parse_macro_function_ctx *ctx,
- struct macro_token *output,
+ struct string *output,
size_t *input_consumed)
{
- struct macro_tokens args;
+ struct string_array args;
if (parse_macro_function (ctx, &args, ss_cstr ("!length"), 1, 1,
input_consumed))
- {
- size_t length = args.mts[0].representation.length;
- *output = (struct macro_token) {
- .token = { .type = T_POS_NUM, .number = length },
- .representation = ss_cstr (xasprintf ("%zu", length)),
- };
- }
+ ds_put_format (output, "%zu", strlen (args.strings[0]));
else if (parse_macro_function (ctx, &args, ss_cstr ("!blanks"), 1, 1,
input_consumed))
{
- /* XXX this isn't right, it might be a character string containing a
- positive integer, e.g. via !CONCAT. */
- if (args.mts[0].token.type != T_POS_NUM)
+ int n;
+ if (!parse_integer (args.strings[0], &n))
{
- printf ("argument to !BLANKS must be positive integer\n");
- macro_tokens_uninit (&args);
+ printf ("argument to !BLANKS must be non-negative integer (not \"%s\")\n", args.strings[0]);
+ string_array_destroy (&args);
return false;
}
- struct string s = DS_EMPTY_INITIALIZER;
- ds_put_byte_multiple (&s, ' ', args.mts[0].token.number);
-
- struct substring s_copy;
- ss_alloc_substring (&s_copy, s.ss);
-
- *output = (struct macro_token) {
- .token = { .type = T_ID, .string = s.ss },
- .representation = s_copy,
- };
+ ds_put_byte_multiple (output, ' ', n);
}
else if (parse_macro_function (ctx, &args, ss_cstr ("!concat"), 1, INT_MAX,
input_consumed))
{
- struct string s = DS_EMPTY_INITIALIZER;
for (size_t i = 0; i < args.n; i++)
- {
- if (args.mts[i].token.type == T_STRING)
- ds_put_substring (&s, args.mts[i].token.string);
- else
- ds_put_substring (&s, args.mts[i].representation);
- }
-
- *output = (struct macro_token) {
- .token = { .type = T_MACRO_ID /*XXX*/, .string = s.ss },
- };
- ss_alloc_substring (&output->representation, s.ss);
+ if (!unquote_string (args.strings[i], output))
+ ds_put_cstr (output, args.strings[i]);
+ }
+ else if (parse_macro_function (ctx, &args, ss_cstr ("!head"), 1, 1,
+ input_consumed))
+ {
+ struct string tmp;
+ const char *s = unquote_string_in_place (args.strings[0], &tmp);
+
+ struct macro_tokens mts = { .n = 0 };
+ macro_tokens_from_string (&mts, ss_cstr (s), SEG_MODE_INTERACTIVE /* XXX */);
+ if (mts.n > 0)
+ ds_put_substring (output, mts.mts[0].representation);
+ macro_tokens_uninit (&mts);
+ ds_destroy (&tmp);
+ }
+ else if (parse_macro_function (ctx, &args, ss_cstr ("!index"), 2, 2,
+ input_consumed))
+ {
+ const char *haystack = args.strings[0];
+ const char *needle = strstr (haystack, args.strings[1]);
+ ds_put_format (output, "%zu", needle ? needle - haystack + 1 : 0);
}
else if (parse_macro_function (ctx, &args, ss_cstr ("!quote"), 1, 1,
input_consumed))
{
- if (args.mts[0].token.type == T_STRING)
- macro_token_copy (output, &args.mts[0]);
+ if (unquote_string (args.strings[0], NULL))
+ ds_put_cstr (output, args.strings[0]);
else
{
- *output = (struct macro_token) { .token = { .type = T_STRING } };
- ss_alloc_substring (&output->token.string, args.mts[0].representation);
- output->representation = ss_cstr (token_to_string (&output->token));
+ ds_extend (output, strlen (args.strings[0]) + 2);
+ ds_put_byte (output, '\'');
+ for (const char *p = args.strings[0]; *p; p++)
+ {
+ if (*p == '\'')
+ ds_put_byte (output, '\'');
+ ds_put_byte (output, *p);
+ }
+ ds_put_byte (output, '\'');
}
}
- else if (parse_macro_function (ctx, &args, ss_cstr ("!unquote"), 1, 1,
+ else if (parse_macro_function (ctx, &args, ss_cstr ("!substr"), 2, 3,
input_consumed))
{
- if (args.mts[0].token.type == T_STRING)
+ int start;
+ if (!parse_integer (args.strings[1], &start) || start < 1)
{
- *output = (struct macro_token) { .token = { .type = T_MACRO_ID } };
- ss_alloc_substring (&output->token.string, args.mts[0].token.string);
- output->representation = ss_cstr (token_to_string (&output->token));
+ printf ("second argument to !SUBSTR must be positive integer (not \"%s\")\n", args.strings[1]);
+ string_array_destroy (&args);
+ return false;
}
- else
- macro_token_copy (output, &args.mts[0]);
+
+ int count = INT_MAX;
+ if (args.n > 2 && (!parse_integer (args.strings[2], &count) || count < 0))
+ {
+ printf ("third argument to !SUBSTR must be non-negative integer (not \"%s\")\n", args.strings[1]);
+ string_array_destroy (&args);
+ return false;
+ }
+
+ struct substring s = ss_cstr (args.strings[0]);
+ ds_put_substring (output, ss_substr (s, start - 1, count));
+ }
+ else if (parse_macro_function (ctx, &args, ss_cstr ("!tail"), 1, 1,
+ input_consumed))
+ {
+ struct string tmp;
+ const char *s = unquote_string_in_place (args.strings[0], &tmp);
+
+ struct macro_tokens mts = { .n = 0 };
+ macro_tokens_from_string (&mts, ss_cstr (s), SEG_MODE_INTERACTIVE /* XXX */);
+ if (mts.n > 1)
+ {
+ struct macro_tokens tail = { .mts = mts.mts + 1, .n = mts.n - 1 };
+ macro_tokens_to_representation (&tail, output);
+ }
+ macro_tokens_uninit (&mts);
+ ds_destroy (&tmp);
+ }
+ else if (parse_macro_function (ctx, &args, ss_cstr ("!unquote"), 1, 1,
+ input_consumed))
+ {
+ if (!unquote_string (args.strings[0], output))
+ ds_put_cstr (output, args.strings[0]);
+ }
+ else if (parse_macro_function (ctx, &args, ss_cstr ("!upcase"), 1, 1,
+ input_consumed))
+ {
+ struct string tmp;
+ const char *s = unquote_string_in_place (args.strings[0], &tmp);
+ char *upper = utf8_to_upper (s);
+ ds_put_cstr (output, upper);
+ free (upper);
+ ds_destroy (&tmp);
}
else if (ctx->n_input > 0
&& ctx->input[0].token.type == T_MACRO_ID
&& ss_equals_case (ctx->input[0].token.string, ss_cstr ("!null")))
{
*input_consumed = 1;
- *output = (struct macro_token) {
- .token = { .type = T_MACRO_ID /* XXX*/ },
- };
- ss_alloc_substring (&output->token.string, ss_cstr (""));
return true;
}
else
return false;
- macro_tokens_uninit (&args);
+ string_array_destroy (&args);
return true;
}
.me = me,
.expand = expand,
};
- struct macro_token function_output;
+ struct string function_output = DS_EMPTY_INITIALIZER;
size_t function_consumed;
if (expand_macro_function (&ctx, &function_output, &function_consumed))
{
i += function_consumed - 1;
- if (function_output.token.type == T_MACRO_ID)
- macro_tokens_from_string (exp, function_output.token.string,
- SEG_MODE_INTERACTIVE /* XXX */);
- else
- macro_tokens_add (exp, &function_output);
- macro_token_uninit (&function_output);
+ macro_tokens_from_string (exp, function_output.ss,
+ SEG_MODE_INTERACTIVE /* XXX */);
+ ds_destroy (&function_output);
continue;
}