X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;ds=inline;f=src%2Flanguage%2Flexer%2Fmacro.c;h=fdf76bacea12b2a07d5d3e6ead9811e7cdb84922;hb=25e92a9025f146d5880059f7e863d9c7944fafe6;hp=584ee7f2fd3ed29e3d4880761c7176814e55c62c;hpb=69bf3f901b0a949cfa957950f55df78d0c86a765;p=pspp diff --git a/src/language/lexer/macro.c b/src/language/lexer/macro.c index 584ee7f2fd..fdf76bacea 100644 --- a/src/language/lexer/macro.c +++ b/src/language/lexer/macro.c @@ -18,6 +18,7 @@ #include "language/lexer/macro.h" +#include #include #include @@ -25,9 +26,11 @@ #include "language/lexer/segment.h" #include "language/lexer/scan.h" #include "libpspp/assertion.h" +#include "libpspp/cast.h" #include "libpspp/i18n.h" #include "libpspp/message.h" #include "libpspp/str.h" +#include "libpspp/string-array.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -140,6 +143,7 @@ macro_tokens_from_string (struct macro_tokens *mts, const struct substring src, { if (token->type != SCAN_SKIP) { + printf ("error\n"); /* XXX report error */ } } @@ -701,7 +705,7 @@ macro_expander_add (struct macro_expander *me, const struct macro_token *mt) */ struct parse_macro_function_ctx { - struct macro_token *input; + const struct macro_token *input; size_t n_input; int nesting_countdown; const struct macro_set *macros; @@ -716,14 +720,24 @@ macro_expand (const struct macro_tokens *, static bool expand_macro_function (struct parse_macro_function_ctx *ctx, - struct macro_token *output, - size_t *input_consumed); + struct string *output, size_t *input_consumed); + +/* Returns true if the pair of tokens starting at offset OFS within MTS are !*, + false otherwise. */ +static bool +is_bang_star (const struct macro_token *mts, size_t n, size_t ofs) +{ + return (ofs + 1 < n + && mts[ofs].token.type == T_MACRO_ID + && ss_equals (mts[ofs].token.string, ss_cstr ("!")) + && mts[ofs + 1].token.type == T_ASTERISK); +} static size_t parse_function_arg (struct parse_macro_function_ctx *ctx, - size_t i, struct macro_token *farg) + size_t i, struct string *farg) { - struct macro_token *tokens = ctx->input; + const struct macro_token *tokens = ctx->input; const struct token *token = &tokens[i].token; if (token->type == T_MACRO_ID) { @@ -733,27 +747,31 @@ parse_function_arg (struct parse_macro_function_ctx *ctx, { size_t param_idx = param - ctx->me->macro->params; const struct macro_tokens *marg = ctx->me->args[param_idx]; - if (marg->n == 1) - macro_token_copy (farg, &marg->mts[0]); - else + for (size_t i = 0; i < marg->n; i++) { - struct string s = DS_EMPTY_INITIALIZER; - for (size_t i = 0; i < marg->n; i++) - { - if (i) - ds_put_byte (&s, ' '); - ds_put_substring (&s, marg->mts[i].representation); - } + if (i) + ds_put_byte (farg, ' '); + ds_put_substring (farg, marg->mts[i].representation); + } + return 1; + } - struct substring s_copy; - ss_alloc_substring (&s_copy, s.ss); + if (is_bang_star (ctx->input, ctx->n_input, i)) + { + for (size_t i = 0; i < ctx->me->macro->n_params; i++) + { + if (!ctx->me->macro->params[i].positional) + break; - *farg = (struct macro_token) { - .token = { .type = T_MACRO_ID, .string = s.ss }, - .representation = s_copy, - }; + const struct macro_tokens *marg = ctx->me->args[i]; + for (size_t j = 0; j < marg->n; j++) + { + if (i || j) + ds_put_byte (farg, ' '); + ds_put_substring (farg, marg->mts[j].representation); + } } - return 1; + return 2; } struct parse_macro_function_ctx subctx = { @@ -769,18 +787,18 @@ parse_function_arg (struct parse_macro_function_ctx *ctx, return subinput_consumed; } - macro_token_copy (farg, &tokens[i]); + ds_put_substring (farg, tokens[i].representation); return 1; } static bool parse_macro_function (struct parse_macro_function_ctx *ctx, - struct macro_tokens *args, + struct string_array *args, struct substring function, int min_args, int max_args, size_t *input_consumed) { - struct macro_token *tokens = ctx->input; + const struct macro_token *tokens = ctx->input; size_t n_tokens = ctx->n_input; if (!n_tokens @@ -794,7 +812,7 @@ parse_macro_function (struct parse_macro_function_ctx *ctx, return false; } - *args = (struct macro_tokens) { .n = 0 }; + string_array_init (args); for (size_t i = 2;; ) { @@ -811,9 +829,14 @@ parse_macro_function (struct parse_macro_function_ctx *ctx, return true; } - i += parse_function_arg (ctx, i, macro_tokens_add_uninit (args)); + struct string s = DS_EMPTY_INITIALIZER; + i += parse_function_arg (ctx, i, &s); if (i >= n_tokens) - goto unexpected_end; + { + ds_destroy (&s); + goto unexpected_end; + } + string_array_append_nocopy (args, ds_steal_cstr (&s)); if (tokens[i].token.type == T_COMMA) i++; @@ -829,124 +852,521 @@ unexpected_end: function.string); /* Fall through. */ error: - macro_tokens_uninit (args); + string_array_destroy (args); return false; } +static bool +unquote_string (const char *s, struct string *content) +{ + struct string_lexer slex; + string_lexer_init (&slex, s, strlen (s), SEG_MODE_INTERACTIVE /* XXX */); + + struct token token1; + if (!string_lexer_next (&slex, &token1)) + return false; + + if (token1.type != T_STRING) + { + token_uninit (&token1); + return false; + } + + struct token token2; + if (string_lexer_next (&slex, &token2)) + { + token_uninit (&token1); + token_uninit (&token2); + return false; + } + + ds_put_substring (content, token1.string); + token_uninit (&token1); + return true; +} + +static const char * +unquote_string_in_place (const char *s, struct string *tmp) +{ + ds_init_empty (tmp); + return unquote_string (s, tmp) ? ds_cstr (tmp) : s; +} + +static bool +parse_integer (const char *s, int *np) +{ + errno = 0; + + char *tail; + long int n = strtol (s, &tail, 10); + *np = n < INT_MIN ? INT_MIN : n > INT_MAX ? INT_MAX : n; + tail += strspn (tail, CC_SPACES); + return *tail == '\0' && errno != ERANGE && n == *np; +} + static bool expand_macro_function (struct parse_macro_function_ctx *ctx, - struct macro_token *output, + struct string *output, size_t *input_consumed) { - struct macro_tokens args; + struct string_array args; if (parse_macro_function (ctx, &args, ss_cstr ("!length"), 1, 1, input_consumed)) - { - size_t length = args.mts[0].representation.length; - *output = (struct macro_token) { - .token = { .type = T_POS_NUM, .number = length }, - .representation = ss_cstr (xasprintf ("%zu", length)), - }; - } + ds_put_format (output, "%zu", strlen (args.strings[0])); else if (parse_macro_function (ctx, &args, ss_cstr ("!blanks"), 1, 1, input_consumed)) { - /* XXX this isn't right, it might be a character string containing a - positive integer, e.g. via !CONCAT. */ - if (args.mts[0].token.type != T_POS_NUM) + int n; + if (!parse_integer (args.strings[0], &n)) { - printf ("argument to !BLANKS must be positive integer\n"); - macro_tokens_uninit (&args); + printf ("argument to !BLANKS must be non-negative integer (not \"%s\")\n", args.strings[0]); + string_array_destroy (&args); return false; } - struct string s = DS_EMPTY_INITIALIZER; - ds_put_byte_multiple (&s, ' ', args.mts[0].token.number); - - struct substring s_copy; - ss_alloc_substring (&s_copy, s.ss); - - *output = (struct macro_token) { - .token = { .type = T_ID, .string = s.ss }, - .representation = s_copy, - }; + ds_put_byte_multiple (output, ' ', n); } else if (parse_macro_function (ctx, &args, ss_cstr ("!concat"), 1, INT_MAX, input_consumed)) { - struct string s; - bool all_strings = true; for (size_t i = 0; i < args.n; i++) + if (!unquote_string (args.strings[i], output)) + ds_put_cstr (output, args.strings[i]); + } + else if (parse_macro_function (ctx, &args, ss_cstr ("!head"), 1, 1, + input_consumed)) + { + struct string tmp; + const char *s = unquote_string_in_place (args.strings[0], &tmp); + + struct macro_tokens mts = { .n = 0 }; + macro_tokens_from_string (&mts, ss_cstr (s), SEG_MODE_INTERACTIVE /* XXX */); + if (mts.n > 0) + ds_put_substring (output, mts.mts[0].representation); + macro_tokens_uninit (&mts); + ds_destroy (&tmp); + } + else if (parse_macro_function (ctx, &args, ss_cstr ("!index"), 2, 2, + input_consumed)) + { + const char *haystack = args.strings[0]; + const char *needle = strstr (haystack, args.strings[1]); + ds_put_format (output, "%zu", needle ? needle - haystack + 1 : 0); + } + else if (parse_macro_function (ctx, &args, ss_cstr ("!quote"), 1, 1, + input_consumed)) + { + if (unquote_string (args.strings[0], NULL)) + ds_put_cstr (output, args.strings[0]); + else { - if (args.mts[i].token.type == T_STRING) - ds_put_substring (&s, args.mts[i].token.string); - else + ds_extend (output, strlen (args.strings[0]) + 2); + ds_put_byte (output, '\''); + for (const char *p = args.strings[0]; *p; p++) { - all_strings = false; - ds_put_substring (&s, args.mts[i].representation); + if (*p == '\'') + ds_put_byte (output, '\''); + ds_put_byte (output, *p); } + ds_put_byte (output, '\''); } - - if (all_strings) + } + else if (parse_macro_function (ctx, &args, ss_cstr ("!substr"), 2, 3, + input_consumed)) + { + int start; + if (!parse_integer (args.strings[1], &start) || start < 1) { - *output = (struct macro_token) { - .token = { .type = T_STRING, .string = s.ss }, - }; - output->representation = ss_cstr (token_to_string (&output->token)); + printf ("second argument to !SUBSTR must be positive integer (not \"%s\")\n", args.strings[1]); + string_array_destroy (&args); + return false; } - else + + int count = INT_MAX; + if (args.n > 2 && (!parse_integer (args.strings[2], &count) || count < 0)) { - *output = (struct macro_token) { - .token = { .type = T_MACRO_ID /*XXX*/, .string = s.ss }, - }; - ss_alloc_substring (&output->representation, s.ss); + printf ("third argument to !SUBSTR must be non-negative integer (not \"%s\")\n", args.strings[1]); + string_array_destroy (&args); + return false; } + + struct substring s = ss_cstr (args.strings[0]); + ds_put_substring (output, ss_substr (s, start - 1, count)); } - else if (parse_macro_function (ctx, &args, ss_cstr ("!quote"), 1, 1, + else if (parse_macro_function (ctx, &args, ss_cstr ("!tail"), 1, 1, input_consumed)) { - if (args.mts[0].token.type == T_STRING) - macro_token_copy (output, &args.mts[0]); - else + struct string tmp; + const char *s = unquote_string_in_place (args.strings[0], &tmp); + + struct macro_tokens mts = { .n = 0 }; + macro_tokens_from_string (&mts, ss_cstr (s), SEG_MODE_INTERACTIVE /* XXX */); + if (mts.n > 1) { - *output = (struct macro_token) { .token = { .type = T_STRING } }; - ss_alloc_substring (&output->token.string, args.mts[0].representation); - output->representation = ss_cstr (token_to_string (&output->token)); + struct macro_tokens tail = { .mts = mts.mts + 1, .n = mts.n - 1 }; + macro_tokens_to_representation (&tail, output); } + macro_tokens_uninit (&mts); + ds_destroy (&tmp); } else if (parse_macro_function (ctx, &args, ss_cstr ("!unquote"), 1, 1, input_consumed)) { - if (args.mts[0].token.type == T_STRING) - { - *output = (struct macro_token) { .token = { .type = T_MACRO_ID } }; - ss_alloc_substring (&output->token.string, args.mts[0].token.string); - output->representation = ss_cstr (token_to_string (&output->token)); - } - else - macro_token_copy (output, &args.mts[0]); + if (!unquote_string (args.strings[0], output)) + ds_put_cstr (output, args.strings[0]); + } + else if (parse_macro_function (ctx, &args, ss_cstr ("!upcase"), 1, 1, + input_consumed)) + { + struct string tmp; + const char *s = unquote_string_in_place (args.strings[0], &tmp); + char *upper = utf8_to_upper (s); + ds_put_cstr (output, upper); + free (upper); + ds_destroy (&tmp); + } + else if (parse_macro_function (ctx, &args, ss_cstr ("!eval"), 1, 1, + input_consumed)) + { + struct macro_tokens mts = { .n = 0 }; + macro_tokens_from_string (&mts, ss_cstr (args.strings[0]), + SEG_MODE_INTERACTIVE /* XXX */); + struct macro_tokens exp = { .n = 0 }; + macro_expand (&mts, ctx->nesting_countdown - 1, ctx->macros, + ctx->me, ctx->expand, &exp); + macro_tokens_to_representation (&exp, output); + macro_tokens_uninit (&exp); + macro_tokens_uninit (&mts); + } + else if (ctx->n_input > 0 + && ctx->input[0].token.type == T_MACRO_ID + && ss_equals_case (ctx->input[0].token.string, ss_cstr ("!null"))) + { + *input_consumed = 1; + return true; } else return false; - macro_tokens_uninit (&args); + string_array_destroy (&args); return true; } +struct expr_context + { + int nesting_countdown; + const struct macro_set *macros; + const struct macro_expander *me; + bool *expand; + }; + +static char *macro_evaluate_or (const struct expr_context *ctx, + const struct macro_token **tokens, + const struct macro_token *end); + +static char * +macro_evaluate_literal (const struct expr_context *ctx, + const struct macro_token **tokens, + const struct macro_token *end) +{ + const struct macro_token *p = *tokens; + if (p >= end) + return NULL; + if (p->token.type == T_LPAREN) + { + p++; + char *value = macro_evaluate_or (ctx, &p, end); + if (!value) + return NULL; + if (p >= end || p->token.type != T_RPAREN) + { + free (value); + printf ("expecting ')' in macro expression\n"); + return NULL; + } + p++; + *tokens = p; + return value; + } + + struct parse_macro_function_ctx fctx = { + .input = p, + .n_input = end - p, + .nesting_countdown = ctx->nesting_countdown, + .macros = ctx->macros, + .me = ctx->me, + .expand = ctx->expand, + }; + struct string function_output = DS_EMPTY_INITIALIZER; + size_t function_consumed; + if (expand_macro_function (&fctx, &function_output, &function_consumed)) + { + *tokens = p + function_consumed; + return ds_steal_cstr (&function_output); + } + + *tokens = p + 1; + return ss_xstrdup (p->representation); +} + +static char * +macro_evaluate_logical (const struct expr_context *ctx, + const struct macro_token **tokens, + const struct macro_token *end) +{ + const struct macro_token *p = *tokens; + char *lhs = macro_evaluate_literal (ctx, &p, end); + if (!lhs) + return NULL; + + enum token_type op = p < end ? p->token.type : T_STOP; + if (op != T_EQUALS && op != T_EQ && op != T_NE && op != T_LT + && op != T_GT && op != T_LE && op != T_GE) + { + *tokens = p; + return lhs; + } + p++; + + char *rhs = macro_evaluate_literal (ctx, &p, end); + if (!rhs) + { + free (lhs); + return NULL; + } + + struct string lhs_tmp, rhs_tmp; + int cmp = strcmp/*XXX*/ (unquote_string_in_place (lhs, &lhs_tmp), + unquote_string_in_place (rhs, &rhs_tmp)); + ds_destroy (&lhs_tmp); + ds_destroy (&rhs_tmp); + + free (lhs); + free (rhs); + + bool b = (op == T_EQUALS || op == T_EQ ? !cmp + : op == T_NE ? cmp + : op == T_LT ? cmp < 0 + : op == T_GT ? cmp > 0 + : op == T_LE ? cmp <= 0 + :/*op == T_GE*/cmp >= 0); + + *tokens = p; + return xstrdup (b ? "1" : "0"); +} + +static char * +macro_evaluate_not (const struct expr_context *ctx, + const struct macro_token **tokens, + const struct macro_token *end) +{ + const struct macro_token *p = *tokens; + + unsigned int negations = 0; + while (p < end && p->token.type == T_NOT) + { + p++; + negations++; + } + + char *operand = macro_evaluate_logical (ctx, &p, end); + if (!operand || !negations) + { + *tokens = p; + return operand; + } + + bool b = strcmp (operand, "0") ^ (negations & 1); + free (operand); + *tokens = p; + return xstrdup (b ? "1" : "0"); +} + +static char * +macro_evaluate_and (const struct expr_context *ctx, + const struct macro_token **tokens, + const struct macro_token *end) +{ + const struct macro_token *p = *tokens; + char *lhs = macro_evaluate_not (ctx, &p, end); + if (!lhs) + return NULL; + + while (p < end && p->token.type == T_AND) + { + p++; + char *rhs = macro_evaluate_not (ctx, &p, end); + if (!rhs) + { + free (lhs); + return NULL; + } + + bool b = strcmp (lhs, "0") && strcmp (rhs, "0"); + free (lhs); + free (rhs); + lhs = xstrdup (b ? "1" : "0"); + } + *tokens = p; + return lhs; +} + +static char * +macro_evaluate_or (const struct expr_context *ctx, + const struct macro_token **tokens, + const struct macro_token *end) +{ + const struct macro_token *p = *tokens; + char *lhs = macro_evaluate_and (ctx, &p, end); + if (!lhs) + return NULL; + + while (p < end && p->token.type == T_OR) + { + p++; + char *rhs = macro_evaluate_and (ctx, &p, end); + if (!rhs) + { + free (lhs); + return NULL; + } + + bool b = strcmp (lhs, "0") || strcmp (rhs, "0"); + free (lhs); + free (rhs); + lhs = xstrdup (b ? "1" : "0"); + } + *tokens = p; + return lhs; +} + +static char * +macro_evaluate_expression (const struct macro_token **tokens, size_t n_tokens, + int nesting_countdown, const struct macro_set *macros, + const struct macro_expander *me, bool *expand) +{ + const struct expr_context ctx = { + .nesting_countdown = nesting_countdown, + .macros = macros, + .me = me, + .expand = expand, + }; + return macro_evaluate_or (&ctx, tokens, *tokens + n_tokens); +} + +static const struct macro_token * +find_ifend_clause (const struct macro_token *p, const struct macro_token *end) +{ + size_t nesting = 0; + for (; p < end; p++) + { + if (p->token.type != T_MACRO_ID) + continue; + + if (ss_equals_case (p->token.string, ss_cstr ("!IF"))) + nesting++; + else if (ss_equals_case (p->token.string, ss_cstr ("!IFEND"))) + { + if (!nesting) + return p; + nesting--; + } + else if (ss_equals_case (p->token.string, ss_cstr ("!ELSE")) && !nesting) + return p; + } + return NULL; +} + +static size_t +macro_expand_if (const struct macro_token *tokens, size_t n_tokens, + int nesting_countdown, const struct macro_set *macros, + const struct macro_expander *me, bool *expand, + struct macro_tokens *exp) +{ + const struct macro_token *p = tokens; + const struct macro_token *end = tokens + n_tokens; + + if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!IF"))) + return 0; + + p++; + char *result = macro_evaluate_expression (&p, end - p, + nesting_countdown, macros, me, expand); + if (!result) + return 0; + bool b = strcmp (result, "0"); + free (result); + + if (p >= end + || p->token.type != T_MACRO_ID + || !ss_equals_case (p->token.string, ss_cstr ("!THEN"))) + { + printf ("!THEN expected\n"); + return 0; + } + + const struct macro_token *start_then = p + 1; + const struct macro_token *end_then = find_ifend_clause (start_then, end); + if (!end_then) + { + printf ("!ELSE or !IFEND expected\n"); + return 0; + } + + const struct macro_token *start_else, *end_if; + if (ss_equals_case (end_then->token.string, ss_cstr ("!ELSE"))) + { + start_else = end_then + 1; + end_if = find_ifend_clause (start_else, end); + if (!end_if + || !ss_equals_case (end_if->token.string, ss_cstr ("!IFEND"))) + { + printf ("!IFEND expected\n"); + return 0; + } + } + else + { + start_else = NULL; + end_if = end_then; + } + + const struct macro_token *start; + size_t n; + if (b) + { + start = start_then; + n = end_then - start_then; + } + else if (start_else) + { + start = start_else; + n = end_if - start_else; + } + else + { + start = NULL; + n = 0; + } + + if (n) + { + struct macro_tokens mts = { + .mts = CONST_CAST (struct macro_token *, start), + .n = n, + }; + macro_expand (&mts, nesting_countdown, macros, me, expand, exp); + } + return (end_if + 1) - tokens; +} + static void macro_expand (const struct macro_tokens *mts, int nesting_countdown, const struct macro_set *macros, const struct macro_expander *me, bool *expand, struct macro_tokens *exp) { - /* Macro expansion: - - - Macro names in macro bodies are not expanded by default. !EVAL() - expands them. - - - Macro names in arguments to macro invocations (outside of macro bodies) - are expanded by default, unless !NOEXPAND. */ if (nesting_countdown <= 0) { printf ("maximum nesting level exceeded\n"); @@ -974,6 +1394,34 @@ macro_expand (const struct macro_tokens *mts, macro_tokens_add (exp, &arg->mts[i]); continue; } + + if (is_bang_star (mts->mts, mts->n, i)) + { + for (size_t j = 0; j < me->macro->n_params; j++) + { + const struct macro_param *param = &me->macro->params[j]; + if (!param->positional) + break; + + const struct macro_tokens *arg = me->args[j]; + if (*expand && param->expand_arg) + macro_expand (arg, nesting_countdown, macros, NULL, expand, exp); + else + for (size_t k = 0; k < arg->n; k++) + macro_tokens_add (exp, &arg->mts[k]); + } + i++; + continue; + } + + size_t n = macro_expand_if (&mts->mts[i], mts->n - i, + nesting_countdown, macros, me, expand, + exp); + if (n > 0) + { + i += n - 1; + continue; + } } if (*expand) @@ -1004,8 +1452,6 @@ macro_expand (const struct macro_tokens *mts, continue; } - /* Maybe each arg should just be a string, either a quoted string or a - non-quoted string containing tokens. */ struct parse_macro_function_ctx ctx = { .input = &mts->mts[i], .n_input = mts->n - i, @@ -1014,18 +1460,15 @@ macro_expand (const struct macro_tokens *mts, .me = me, .expand = expand, }; - struct macro_token function_output; + struct string function_output = DS_EMPTY_INITIALIZER; size_t function_consumed; if (expand_macro_function (&ctx, &function_output, &function_consumed)) { i += function_consumed - 1; - if (function_output.token.type == T_MACRO_ID) - macro_tokens_from_string (exp, function_output.token.string, - SEG_MODE_INTERACTIVE /* XXX */); - else - macro_tokens_add (exp, &function_output); - macro_token_uninit (&function_output); + macro_tokens_from_string (exp, function_output.ss, + SEG_MODE_INTERACTIVE /* XXX */); + ds_destroy (&function_output); continue; }