From 8fe18b90fa9f506e72f9943a8427795503bf28b4 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 4 Jul 2021 17:14:23 -0700 Subject: [PATCH] improve functino parsing --- src/language/lexer/macro.c | 417 ++++++++++++++++++------------- tests/language/control/define.at | 118 +++++++-- 2 files changed, 340 insertions(+), 195 deletions(-) diff --git a/src/language/lexer/macro.c b/src/language/lexer/macro.c index 9ae83d1437..c97bc61068 100644 --- a/src/language/lexer/macro.c +++ b/src/language/lexer/macro.c @@ -978,72 +978,44 @@ parse_function_arg (const struct macro_expander *me, return 1; } -static bool -parse_macro_function (const struct macro_expander *me, - const struct macro_token *tokens, size_t n_tokens, - struct string_array *args, - struct substring function, - int min_args, int max_args, - size_t *input_consumed) +static size_t +parse_function_args (const struct macro_expander *me, + const struct macro_token *mts, size_t n, + const char *function, + struct string_array *args) { - if (!n_tokens - || tokens[0].token.type != T_MACRO_ID - || !ss_equals_case (tokens[0].token.string, function)) /* XXX abbrevs allowed */ - return false; - - if (n_tokens < 2 || tokens[1].token.type != T_LPAREN) + if (n < 2 || mts[1].token.type != T_LPAREN) { - macro_error (me->stack, n_tokens > 1 ? &tokens[1] : NULL, - _("`(' expected following %s."), function.string); - return false; + macro_error (me->stack, n > 1 ? &mts[1] : NULL, + _("`(' expected following %s."), function); + return 0; } - string_array_init (args); - - for (size_t i = 2;; ) + for (size_t i = 2; i < n; ) { - if (i >= n_tokens) - goto unexpected_end; - if (tokens[i].token.type == T_RPAREN) - { - *input_consumed = i + 1; - if (args->n < min_args || args->n > max_args) - { - macro_error (me->stack, &tokens[i], - _("Wrong number of arguments to macro function %s."), - function.string); - goto error; - } - return true; - } + if (mts[i].token.type == T_RPAREN) + return i + 1; struct string s = DS_EMPTY_INITIALIZER; - i += parse_function_arg (me, tokens + i, n_tokens - i, &s); - if (i >= n_tokens) - { - ds_destroy (&s); - goto unexpected_end; - } + i += parse_function_arg (me, mts + i, n - i, &s); string_array_append_nocopy (args, ds_steal_cstr (&s)); - if (tokens[i].token.type == T_COMMA) + if (i >= n) + break; + else if (mts[i].token.type == T_COMMA) i++; - else if (tokens[i].token.type != T_RPAREN) + else if (mts[i].token.type != T_RPAREN) { - macro_error (me->stack, &tokens[i], + macro_error (me->stack, &mts[i], _("`,' or `)' expected in call to macro function %s."), - function.string); - goto error; + function); + return 0; } } -unexpected_end: macro_error (me->stack, NULL, _("Missing `)' in call to macro function %s."), - function.string); - /* Fall through. */ -error: - string_array_destroy (args); - return false; + function); + return 0; } static bool @@ -1101,57 +1073,148 @@ expand_macro_function (const struct macro_expander *me, const struct macro_token *input, size_t n_input, struct string *output, size_t *input_consumed) { - struct string_array args; - if (parse_macro_function (me, input, n_input, &args, ss_cstr ("!LENGTH"), 1, 1, - input_consumed)) - ds_put_format (output, "%zu", strlen (args.strings[0])); - else if (parse_macro_function (me, input, n_input, &args, ss_cstr ("!BLANKS"), 1, 1, - input_consumed)) - { - int n; - if (!parse_integer (args.strings[0], &n)) + if (!n_input || input[0].token.type != T_MACRO_ID) + return false; + + struct macro_function + { + const char *name; + int min_args; + int max_args; + }; + enum macro_function_id + { + MF_BLANKS, + MF_CONCAT, + MF_EVAL, + MF_HEAD, + MF_INDEX, + MF_LENGTH, + MF_NULL, + MF_QUOTE, + MF_SUBSTR, + MF_TAIL, + MF_UNQUOTE, + MF_UPCASE, + }; + static const struct macro_function mfs[] = { + [MF_BLANKS] = { "!BLANKS", 1, 1 }, + [MF_CONCAT] = { "!CONCAT", 1, INT_MAX }, + [MF_EVAL] = { "!EVAL", 1, 1 }, + [MF_HEAD] = { "!HEAD", 1, 1 }, + [MF_INDEX] = { "!INDEX", 2, 2 }, + [MF_LENGTH] = { "!LENGTH", 1, 1 }, + [MF_NULL] = { "!NULL", 0, 0 }, + [MF_QUOTE] = { "!QUOTE", 1, 1 }, + [MF_SUBSTR] = { "!SUBSTR", 2, 3 }, + [MF_TAIL] = { "!TAIL", 1, 1 }, + [MF_UNQUOTE] = { "!UNQUOTE", 1, 1 }, + [MF_UPCASE] = { "!UPCASE", 1, 1 }, + }; + + /* Is this a macro function? */ + const struct macro_function *mf; + for (mf = mfs; ; mf++) + { + if (mf >= mfs + sizeof mfs / sizeof *mfs) { - macro_error (me->stack, NULL, - _("Argument to !BLANKS must be non-negative integer " - "(not \"%s\")."), args.strings[0]); - string_array_destroy (&args); + /* Not a macro function. */ return false; } - ds_put_byte_multiple (output, ' ', n); + if (lex_id_match_n (ss_cstr (mf->name), input[0].token.string, 4)) + break; } - else if (parse_macro_function (me, input, n_input, &args, ss_cstr ("!CONCAT"), 1, INT_MAX, - input_consumed)) + + enum macro_function_id id = mf - mfs; + if (id == MF_NULL) { - for (size_t i = 0; i < args.n; i++) - if (!unquote_string (args.strings[i], me->segmenter_mode, output)) - ds_put_cstr (output, args.strings[i]); + *input_consumed = 1; + return true; } - else if (parse_macro_function (me, input, n_input, &args, ss_cstr ("!HEAD"), 1, 1, - input_consumed)) - { - struct string tmp; - const char *s = unquote_string_in_place (args.strings[0], - me->segmenter_mode, &tmp); - struct macro_tokens mts = { .n = 0 }; - macro_tokens_from_string__ (&mts, ss_cstr (s), me->segmenter_mode, - me->stack); - if (mts.n > 0) - ds_put_substring (output, mts.mts[0].representation); - macro_tokens_uninit (&mts); - ds_destroy (&tmp); - } - else if (parse_macro_function (me, input, n_input, &args, ss_cstr ("!INDEX"), 2, 2, - input_consumed)) - { - const char *haystack = args.strings[0]; - const char *needle = strstr (haystack, args.strings[1]); - ds_put_format (output, "%zu", needle ? needle - haystack + 1 : 0); + struct string_array args = STRING_ARRAY_INITIALIZER; + *input_consumed = parse_function_args (me, input, n_input, mf->name, &args); + if (!*input_consumed) + return false; + + if (args.n < mf->min_args || args.n > mf->max_args) + { + if (mf->min_args == 1 && mf->max_args == 1) + macro_error (me->stack, NULL, + _("Macro function %s takes one argument (not %zu)."), + mf->name, args.n); + else if (mf->min_args == 2 && mf->max_args == 2) + macro_error (me->stack, NULL, + _("Macro function %s takes two arguments (not %zu)."), + mf->name, args.n); + else if (mf->min_args == 2 && mf->max_args == 3) + macro_error (me->stack, NULL, + _("Macro function %s takes two or three arguments " + "(not %zu)."), + mf->name, args.n); + else if (mf->min_args == 1 && mf->max_args == INT_MAX) + macro_error (me->stack, NULL, + _("Macro function %s needs at least one argument."), + mf->name); + else + NOT_REACHED (); + return false; } - else if (parse_macro_function (me, input, n_input, &args, ss_cstr ("!QUOTE"), 1, 1, - input_consumed)) + + switch (id) { + case MF_LENGTH: + ds_put_format (output, "%zu", strlen (args.strings[0])); + break; + + case MF_BLANKS: + { + int n; + if (!parse_integer (args.strings[0], &n)) + { + macro_error (me->stack, NULL, + _("Argument to !BLANKS must be non-negative integer " + "(not \"%s\")."), args.strings[0]); + string_array_destroy (&args); + return false; + } + + ds_put_byte_multiple (output, ' ', n); + } + break; + + case MF_CONCAT: + for (size_t i = 0; i < args.n; i++) + if (!unquote_string (args.strings[i], me->segmenter_mode, output)) + ds_put_cstr (output, args.strings[i]); + break; + + case MF_HEAD: + { + struct string tmp; + const char *s = unquote_string_in_place (args.strings[0], + me->segmenter_mode, &tmp); + + struct macro_tokens mts = { .n = 0 }; + macro_tokens_from_string__ (&mts, ss_cstr (s), me->segmenter_mode, + me->stack); + if (mts.n > 0) + ds_put_substring (output, mts.mts[0].representation); + macro_tokens_uninit (&mts); + ds_destroy (&tmp); + } + break; + + case MF_INDEX: + { + const char *haystack = args.strings[0]; + const char *needle = strstr (haystack, args.strings[1]); + ds_put_format (output, "%zu", needle ? needle - haystack + 1 : 0); + } + break; + + case MF_QUOTE: if (unquote_string (args.strings[0], me->segmenter_mode, NULL)) ds_put_cstr (output, args.strings[0]); else @@ -1166,99 +1229,97 @@ expand_macro_function (const struct macro_expander *me, } ds_put_byte (output, '\''); } - } - else if (parse_macro_function (me, input, n_input, &args, ss_cstr ("!SUBSTR"), 2, 3, - input_consumed)) - { - int start; - if (!parse_integer (args.strings[1], &start) || start < 1) - { - macro_error (me->stack, NULL, - _("Second argument of !SUBSTR must be " - "positive integer (not \"%s\")."), - args.strings[1]); - string_array_destroy (&args); - return false; - } + break; - int count = INT_MAX; - if (args.n > 2 && (!parse_integer (args.strings[2], &count) || count < 0)) - { - macro_error (me->stack, NULL, - _("Third argument of !SUBSTR must be " - "non-negative integer (not \"%s\")."), - args.strings[2]); - string_array_destroy (&args); - return false; - } + case MF_SUBSTR: + { + int start; + if (!parse_integer (args.strings[1], &start) || start < 1) + { + macro_error (me->stack, NULL, + _("Second argument of !SUBSTR must be " + "positive integer (not \"%s\")."), + args.strings[1]); + string_array_destroy (&args); + return false; + } - struct substring s = ss_cstr (args.strings[0]); - ds_put_substring (output, ss_substr (s, start - 1, count)); - } - else if (parse_macro_function (me, input, n_input, &args, ss_cstr ("!TAIL"), 1, 1, - input_consumed)) - { - struct string tmp; - const char *s = unquote_string_in_place (args.strings[0], - me->segmenter_mode, &tmp); + int count = INT_MAX; + if (args.n > 2 && (!parse_integer (args.strings[2], &count) || count < 0)) + { + macro_error (me->stack, NULL, + _("Third argument of !SUBSTR must be " + "non-negative integer (not \"%s\")."), + args.strings[2]); + string_array_destroy (&args); + return false; + } - struct macro_tokens mts = { .n = 0 }; - macro_tokens_from_string__ (&mts, ss_cstr (s), me->segmenter_mode, - me->stack); - if (mts.n > 1) - { - struct macro_tokens tail = { .mts = mts.mts + 1, .n = mts.n - 1 }; - macro_tokens_to_representation (&tail, output, NULL, NULL); - } - macro_tokens_uninit (&mts); - ds_destroy (&tmp); - } - else if (parse_macro_function (me, input, n_input, &args, ss_cstr ("!UNQUOTE"), 1, 1, - input_consumed)) - { + struct substring s = ss_cstr (args.strings[0]); + ds_put_substring (output, ss_substr (s, start - 1, count)); + } + break; + + case MF_TAIL: + { + struct string tmp; + const char *s = unquote_string_in_place (args.strings[0], + me->segmenter_mode, &tmp); + + struct macro_tokens mts = { .n = 0 }; + macro_tokens_from_string__ (&mts, ss_cstr (s), me->segmenter_mode, + me->stack); + if (mts.n > 1) + { + struct macro_tokens tail = { .mts = mts.mts + 1, .n = mts.n - 1 }; + macro_tokens_to_representation (&tail, output, NULL, NULL); + } + macro_tokens_uninit (&mts); + ds_destroy (&tmp); + } + break; + + case MF_UNQUOTE: if (!unquote_string (args.strings[0], me->segmenter_mode, output)) ds_put_cstr (output, args.strings[0]); + break; + + case MF_UPCASE: + { + struct string tmp; + const char *s = unquote_string_in_place (args.strings[0], + me->segmenter_mode, &tmp); + char *upper = utf8_to_upper (s); + ds_put_cstr (output, upper); + free (upper); + ds_destroy (&tmp); + } + break; + + case MF_EVAL: + { + struct macro_tokens mts = { .n = 0 }; + macro_tokens_from_string__ (&mts, ss_cstr (args.strings[0]), + me->segmenter_mode, me->stack); + struct macro_tokens exp = { .n = 0 }; + struct macro_expansion_stack stack = { + .name = "!EVAL", + .next = me->stack + }; + struct macro_expander subme = *me; + subme.break_ = NULL; + subme.stack = &stack; + + macro_expand (&mts, &subme, &exp); + macro_tokens_to_representation (&exp, output, NULL, NULL); + macro_tokens_uninit (&exp); + macro_tokens_uninit (&mts); + } + break; + + default: + NOT_REACHED (); } - else if (parse_macro_function (me, input, n_input, &args, ss_cstr ("!UPCASE"), 1, 1, - input_consumed)) - { - struct string tmp; - const char *s = unquote_string_in_place (args.strings[0], - me->segmenter_mode, &tmp); - char *upper = utf8_to_upper (s); - ds_put_cstr (output, upper); - free (upper); - ds_destroy (&tmp); - } - else if (parse_macro_function (me, input, n_input, &args, ss_cstr ("!EVAL"), 1, 1, - input_consumed)) - { - struct macro_tokens mts = { .n = 0 }; - macro_tokens_from_string__ (&mts, ss_cstr (args.strings[0]), - me->segmenter_mode, me->stack); - struct macro_tokens exp = { .n = 0 }; - struct macro_expansion_stack stack = { - .name = "!EVAL", - .next = me->stack - }; - struct macro_expander subme = *me; - subme.break_ = NULL; - subme.stack = &stack; - - macro_expand (&mts, &subme, &exp); - macro_tokens_to_representation (&exp, output, NULL, NULL); - macro_tokens_uninit (&exp); - macro_tokens_uninit (&mts); - } - else if (n_input > 0 - && input[0].token.type == T_MACRO_ID - && ss_equals_case (input[0].token.string, ss_cstr ("!NULL"))) - { - *input_consumed = 1; - return true; - } - else - return false; string_array_destroy (&args); return true; diff --git a/tests/language/control/define.at b/tests/language/control/define.at index 1e18b81bd7..aa1998d82e 100644 --- a/tests/language/control/define.at +++ b/tests/language/control/define.at @@ -1254,45 +1254,129 @@ AT_SETUP([generic macro function syntax errors]) AT_DATA([define.sps], [dnl DEFINE !a() !SUBSTR !ENDDEFINE. DEFINE !b() !SUBSTR x !ENDDEFINE. -DEFINE !c() !SUBSTR(1,2,3,4) !ENDDEFINE. -DEFINE !d() !SUBSTR(1x) !ENDDEFINE. -DEFINE !e() !SUBSTR(1 !ENDDEFINE. +DEFINE !c() !SUBSTR(1x) !ENDDEFINE. +DEFINE !d() !SUBSTR(1 !ENDDEFINE. +DEFINE !narg_blanks() !BLANKS() !ENDDEFINE. +DEFINE !narg_concat() !CONCAT() !ENDDEFINE. +DEFINE !narg_eval() !EVAL() !ENDDEFINE. +DEFINE !narg_head() !HEAD() !ENDDEFINE. +DEFINE !narg_index() !INDEX() !ENDDEFINE. +DEFINE !narg_length() !LENGTH() !ENDDEFINE. +DEFINE !narg_null() !NULL() !ENDDEFINE. +DEFINE !narg_quote() !QUOTE() !ENDDEFINE. +DEFINE !narg_substr() !SUBSTR() !ENDDEFINE. +DEFINE !narg_tail() !TAIL() !ENDDEFINE. +DEFINE !narg_unquote() !UNQUOTE() !ENDDEFINE. +DEFINE !narg_upcase() !UPCASE() !ENDDEFINE. dnl ) DEBUG EXPAND. !a. !b. !c. !d. -!e. +!narg_blanks. +!narg_concat. +!narg_eval. +!narg_head. +!narg_index. +!narg_length. +!narg_null. +!narg_quote. +!narg_substr. +!narg_tail. +!narg_unquote. +!narg_upcase. ]) AT_CHECK([pspp --testing-mode define.sps], [1], [dnl define.sps:1: In the expansion of `!a', -define.sps:7: error: DEBUG EXPAND: `(' expected following !SUBSTR. +define.sps:18: error: DEBUG EXPAND: `@{:@' expected following !SUBSTR. !SUBSTR define.sps:2: At `x' in the expansion of `!b', -define.sps:8: error: DEBUG EXPAND: `(' expected following !SUBSTR. +define.sps:19: error: DEBUG EXPAND: `@{:@' expected following !SUBSTR. !SUBSTR x -define.sps:3: At `)' in the expansion of `!c', -define.sps:9: error: DEBUG EXPAND: Wrong number of arguments to macro -function !SUBSTR. - -!SUBSTR(1, 2, 3, 4) - -define.sps:4: At `x' in the expansion of `!d', -define.sps:10: error: DEBUG EXPAND: `,' or `)' expected in call to macro +define.sps:3: At `x' in the expansion of `!c', +define.sps:20: error: DEBUG EXPAND: `,' or `@:}@' expected in call to macro function !SUBSTR. !SUBSTR(1 x) -define.sps:5: In the expansion of `!e', -define.sps:11: error: DEBUG EXPAND: Missing `)' in call to macro function ! +define.sps:4: In the expansion of `!d', +define.sps:21: error: DEBUG EXPAND: Missing `@:}@' in call to macro function ! SUBSTR. -!SUBSTR(1 +!SUBSTR@{:@1 + +define.sps:5: In the expansion of `!narg_blanks', +define.sps:22: error: DEBUG EXPAND: Macro function !BLANKS takes one argument +(not 0). + +!BLANKS( ) + +define.sps:6: In the expansion of `!narg_concat', +define.sps:23: error: DEBUG EXPAND: Macro function !CONCAT needs at least one +argument. + +!CONCAT( ) + +define.sps:7: In the expansion of `!narg_eval', +define.sps:24: error: DEBUG EXPAND: Macro function !EVAL takes one argument +(not 0). + +!EVAL( ) + +define.sps:8: In the expansion of `!narg_head', +define.sps:25: error: DEBUG EXPAND: Macro function !HEAD takes one argument +(not 0). + +!HEAD( ) + +define.sps:9: In the expansion of `!narg_index', +define.sps:26: error: DEBUG EXPAND: Macro function !INDEX takes two arguments +(not 0). + +!INDEX( ) + +define.sps:10: In the expansion of `!narg_length', +define.sps:27: error: DEBUG EXPAND: Macro function !LENGTH takes one argument +(not 0). + +!LENGTH( ) + +( ) + +define.sps:12: In the expansion of `!narg_quote', +define.sps:29: error: DEBUG EXPAND: Macro function !QUOTE takes one argument +(not 0). + +!QUOTE( ) + +define.sps:13: In the expansion of `!narg_substr', +define.sps:30: error: DEBUG EXPAND: Macro function !SUBSTR takes two or three +arguments (not 0). + +!SUBSTR( ) + +define.sps:14: In the expansion of `!narg_tail', +define.sps:31: error: DEBUG EXPAND: Macro function !TAIL takes one argument +(not 0). + +!TAIL( ) + +define.sps:15: In the expansion of `!narg_unquote', +define.sps:32: error: DEBUG EXPAND: Macro function !UNQUOTE takes one argument +(not 0). + +!UNQUOTE( ) + +define.sps:16: In the expansion of `!narg_upcase', +define.sps:33: error: DEBUG EXPAND: Macro function !UPCASE takes one argument +(not 0). + +!UPCASE( ) ]) AT_CLEANUP -- 2.30.2