From bb7605897eddfc1a416aa5cb2b6a3de13ee624de Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 2 Sep 2021 09:23:11 -0700 Subject: [PATCH] DEFINE: Properly support redefining a macro. Redefining a macro didn't work in simple cases because the macro name was being expanded in the DEFINE command. Thanks to Frans Houweling for reporting the bug. --- src/language/control/define.c | 27 ++++++++++++-------- src/language/lexer/scan.c | 1 + src/language/lexer/segment.c | 42 +++++++++++++++++++++----------- src/language/lexer/segment.h | 1 + tests/language/control/define.at | 12 +++++++++ tests/language/lexer/scan.at | 28 ++++++++++----------- tests/language/lexer/segment.at | 28 ++++++++++----------- 7 files changed, 87 insertions(+), 52 deletions(-) diff --git a/src/language/control/define.c b/src/language/control/define.c index 23749eb69e..4c12ac2493 100644 --- a/src/language/control/define.c +++ b/src/language/control/define.c @@ -30,12 +30,6 @@ #include "gettext.h" #define _(msgid) gettext (msgid) -static bool -force_macro_id (struct lexer *lexer) -{ - return lex_token (lexer) == T_MACRO_ID || lex_force_id (lexer); -} - static bool match_macro_id (struct lexer *lexer, const char *keyword) { @@ -96,13 +90,26 @@ dup_arg_type (struct lexer *lexer, bool *saw_arg_type) int cmd_define (struct lexer *lexer, struct dataset *ds UNUSED) { - if (!force_macro_id (lexer)) - return CMD_FAILURE; + /* Parse macro name. + + The macro name is a T_STRING token, even though it's an identifier, + because that's the way that the segmenter prevents it from getting + macro-expanded. */ + if (lex_token (lexer) != T_STRING) + { + lex_error (lexer, _("expecting identifier")); + return CMD_FAILURE; + } + const char *name = lex_tokcstr (lexer); + if (!id_is_plausible (name + (name[0] == '!'), false)) + { + lex_error (lexer, _("expecting identifier")); + return CMD_FAILURE; + } - /* Parse macro name. */ struct macro *m = xmalloc (sizeof *m); *m = (struct macro) { - .name = ss_xstrdup (lex_tokss (lexer)), + .name = xstrdup (name), .location = xmalloc (sizeof *m->location), }; *m->location = (struct msg_location) { diff --git a/src/language/lexer/scan.c b/src/language/lexer/scan.c index 3150470caa..b6baf5ce55 100644 --- a/src/language/lexer/scan.c +++ b/src/language/lexer/scan.c @@ -328,6 +328,7 @@ token_from_segment (enum segment_type type, struct substring s, case SEG_INLINE_DATA: case SEG_DOCUMENT: case SEG_MACRO_BODY: + case SEG_MACRO_NAME: *token = (struct token) { .type = T_STRING }; ss_alloc_substring (&token->string, s); return TOKENIZE_TOKEN; diff --git a/src/language/lexer/segment.c b/src/language/lexer/segment.c index 1cf7ac6f6d..519f6ec9f2 100644 --- a/src/language/lexer/segment.c +++ b/src/language/lexer/segment.c @@ -48,6 +48,7 @@ enum segmenter_state S_DEFINE_2, S_DEFINE_3, S_DEFINE_4, + S_DEFINE_5, S_BEGIN_DATA_1, S_BEGIN_DATA_2, S_BEGIN_DATA_3, @@ -680,6 +681,7 @@ next_id_in_command (const struct segmenter *s, const char *input, size_t n, case SEG_DO_REPEAT_COMMAND: case SEG_INLINE_DATA: case SEG_MACRO_ID: + case SEG_MACRO_NAME: case SEG_MACRO_BODY: case SEG_START_DOCUMENT: case SEG_DOCUMENT: @@ -1525,6 +1527,10 @@ segmenter_parse_do_repeat_3__ (struct segmenter *s, - The DEFINE keyword. + - An identifier. We transform this into SEG_MACRO_NAME instead of + SEG_IDENTIFIER or SEG_MACRO_NAME because this identifier must never be + macro-expanded. + - Anything but "(". - "(" followed by a sequence of tokens possibly including balanced parentheses @@ -1537,15 +1543,21 @@ segmenter_parse_do_repeat_3__ (struct segmenter *s, line, even. */ static int -segmenter_parse_define_1__ (struct segmenter *s, - const char *input, size_t n, bool eof, - enum segment_type *type) +segmenter_parse_define_1_2__ (struct segmenter *s, + const char *input, size_t n, bool eof, + enum segment_type *type) { int ofs = segmenter_subparse (s, input, n, eof, type); if (ofs < 0) return -1; - if (*type == SEG_SEPARATE_COMMANDS + if (s->state == S_DEFINE_1 + && (*type == SEG_IDENTIFIER || *type == SEG_MACRO_ID)) + { + *type = SEG_MACRO_NAME; + s->state = S_DEFINE_2; + } + else if (*type == SEG_SEPARATE_COMMANDS || *type == SEG_END_COMMAND || *type == SEG_START_COMMAND) { @@ -1556,7 +1568,7 @@ segmenter_parse_define_1__ (struct segmenter *s, } else if (*type == SEG_PUNCT && input[0] == '(') { - s->state = S_DEFINE_2; + s->state = S_DEFINE_3; s->nest = 1; return ofs; } @@ -1565,7 +1577,7 @@ segmenter_parse_define_1__ (struct segmenter *s, } static int -segmenter_parse_define_2__ (struct segmenter *s, +segmenter_parse_define_3__ (struct segmenter *s, const char *input, size_t n, bool eof, enum segment_type *type) { @@ -1593,7 +1605,7 @@ segmenter_parse_define_2__ (struct segmenter *s, s->nest--; if (!s->nest) { - s->state = S_DEFINE_3; + s->state = S_DEFINE_4; s->substate = 0; } return ofs; @@ -1639,7 +1651,7 @@ find_enddefine (struct substring input) /* We are in the body of a macro definition, looking for additional lines of the body or !ENDDEFINE. */ static int -segmenter_parse_define_3__ (struct segmenter *s, +segmenter_parse_define_4__ (struct segmenter *s, const char *input, size_t n, bool eof, enum segment_type *type) { @@ -1666,7 +1678,7 @@ segmenter_parse_define_3__ (struct segmenter *s, report it as spaces because it's not significant. */ *type = (s->substate == 0 && is_all_spaces (input, ofs) ? SEG_SPACES : SEG_MACRO_BODY); - s->state = S_DEFINE_4; + s->state = S_DEFINE_5; s->substate = 1; return ofs; } @@ -1698,7 +1710,7 @@ segmenter_parse_define_3__ (struct segmenter *s, } static int -segmenter_parse_define_4__ (struct segmenter *s, +segmenter_parse_define_5__ (struct segmenter *s, const char *input, size_t n, bool eof, enum segment_type *type) { @@ -1706,7 +1718,7 @@ segmenter_parse_define_4__ (struct segmenter *s, if (ofs < 0) return -1; - s->state = S_DEFINE_3; + s->state = S_DEFINE_4; return ofs; } @@ -1945,13 +1957,14 @@ segmenter_push (struct segmenter *s, const char *input, size_t n, bool eof, return segmenter_parse_do_repeat_3__ (s, input, n, eof, type); case S_DEFINE_1: - return segmenter_parse_define_1__ (s, input, n, eof, type); case S_DEFINE_2: - return segmenter_parse_define_2__ (s, input, n, eof, type); + return segmenter_parse_define_1_2__ (s, input, n, eof, type); case S_DEFINE_3: return segmenter_parse_define_3__ (s, input, n, eof, type); case S_DEFINE_4: return segmenter_parse_define_4__ (s, input, n, eof, type); + case S_DEFINE_5: + return segmenter_parse_define_5__ (s, input, n, eof, type); case S_BEGIN_DATA_1: return segmenter_parse_begin_data_1__ (s, input, n, eof, type); @@ -2005,9 +2018,10 @@ segmenter_get_prompt (const struct segmenter *s) case S_DEFINE_1: case S_DEFINE_2: - return s->substate & SS_START_OF_COMMAND ? PROMPT_FIRST : PROMPT_LATER; case S_DEFINE_3: + return s->substate & SS_START_OF_COMMAND ? PROMPT_FIRST : PROMPT_LATER; case S_DEFINE_4: + case S_DEFINE_5: return PROMPT_DEFINE; case S_BEGIN_DATA_1: diff --git a/src/language/lexer/segment.h b/src/language/lexer/segment.h index 5d550f531f..199e390f7e 100644 --- a/src/language/lexer/segment.h +++ b/src/language/lexer/segment.h @@ -79,6 +79,7 @@ enum segmenter_mode SEG_TYPE(INLINE_DATA) \ \ SEG_TYPE(MACRO_ID) \ + SEG_TYPE(MACRO_NAME) \ SEG_TYPE(MACRO_BODY) \ \ SEG_TYPE(START_DOCUMENT) \ diff --git a/tests/language/control/define.at b/tests/language/control/define.at index 204ed2efd3..8ca40cbaba 100644 --- a/tests/language/control/define.at +++ b/tests/language/control/define.at @@ -41,6 +41,18 @@ m(n, o). ]) AT_CLEANUP +AT_SETUP([redefining a macro]) +AT_DATA([define.sps], [dnl +DEFINE !macro() 0 !ENDDEFINE. +DEFINE !macro() 1 !ENDDEFINE. +DEBUG EXPAND. +!macro. +]) +AT_CHECK([pspp --testing-mode define.sps], [0], [dnl +1 +]) +AT_CLEANUP + AT_SETUP([macro expansion - one !TOKENS(1) positional argument]) AT_KEYWORDS([TOKENS]) AT_DATA([define.sps], [dnl diff --git a/tests/language/lexer/scan.at b/tests/language/lexer/scan.at index 187c22efee..90dea5d346 100644 --- a/tests/language/lexer/scan.at +++ b/tests/language/lexer/scan.at @@ -638,7 +638,7 @@ var1 var2 var3 ]) AT_DATA([expout-base], [dnl ID "define" -MACRO_ID "!macro1" +STRING "!macro1" LPAREN RPAREN STRING "var1 var2 var3" @@ -657,7 +657,7 @@ define !macro1() var1 var2 var3 ]) AT_DATA([expout-base], [dnl ID "define" -MACRO_ID "!macro1" +STRING "!macro1" LPAREN RPAREN STRING " var1 var2 var3" @@ -676,7 +676,7 @@ var1 var2 var3!enddefine. ]) AT_DATA([expout-base], [dnl ID "define" -MACRO_ID "!macro1" +STRING "!macro1" LPAREN RPAREN STRING "var1 var2 var3" @@ -694,7 +694,7 @@ define !macro1()var1 var2 var3!enddefine. ]) AT_DATA([expout-base], [dnl ID "define" -MACRO_ID "!macro1" +STRING "!macro1" LPAREN RPAREN STRING "var1 var2 var3" @@ -713,7 +713,7 @@ define !macro1() ]) AT_DATA([expout-base], [dnl ID "define" -MACRO_ID "!macro1" +STRING "!macro1" LPAREN RPAREN MACRO_ID "!enddefine" @@ -733,7 +733,7 @@ define !macro1() ]) AT_DATA([expout-base], [dnl ID "define" -MACRO_ID "!macro1" +STRING "!macro1" LPAREN RPAREN STRING "" @@ -753,7 +753,7 @@ define !macro1(a(), b(), c()) ]) AT_DATA([expout-base], [dnl ID "define" -MACRO_ID "!macro1" +STRING "!macro1" LPAREN ID "a" LPAREN @@ -786,7 +786,7 @@ define !macro1( ]) AT_DATA([expout-base], [dnl ID "define" -MACRO_ID "!macro1" +STRING "!macro1" LPAREN ID "a" LPAREN @@ -819,7 +819,7 @@ content 2 ]) AT_DATA([expout-base], [dnl ID "define" -MACRO_ID "!macro1" +STRING "!macro1" LPAREN ID "x" COMMA @@ -844,7 +844,7 @@ data list /x 1. ]) AT_DATA([expout-base], [dnl ID "define" -MACRO_ID "!macro1" +STRING "!macro1" ENDCMD ID "data" ID "list" @@ -866,7 +866,7 @@ data list /x 1. ]) AT_DATA([expout-base], [dnl ID "define" -MACRO_ID "!macro1" +STRING "!macro1" ID "x" ENDCMD ID "data" @@ -889,7 +889,7 @@ data list /x 1. ]) AT_DATA([expout-base], [dnl ID "define" -MACRO_ID "!macro1" +STRING "!macro1" LPAREN ENDCMD ID "x" @@ -915,7 +915,7 @@ data list /x 1. ]) AT_DATA([expout-base], [dnl ID "define" -MACRO_ID "!macro1" +STRING "!macro1" ENDCMD ID "data" ID "list" @@ -937,7 +937,7 @@ content line 2 ]) AT_DATA([expout-base], [dnl ID "define" -MACRO_ID "!macro1" +STRING "!macro1" LPAREN RPAREN STRING "content line 1" diff --git a/tests/language/lexer/segment.at b/tests/language/lexer/segment.at index 8936ae98df..78ad1e99eb 100644 --- a/tests/language/lexer/segment.at +++ b/tests/language/lexer/segment.at @@ -1098,7 +1098,7 @@ var1 var2 var3 "!enddefine" ]) AT_DATA([expout-base], [dnl identifier define space -macro_id !macro1 +macro_name !macro1 punct ( punct ) spaces @@ -1124,7 +1124,7 @@ define !macro1() var1 var2 var3 /* !enddefine ]) AT_DATA([expout-base], [dnl identifier define space -macro_id !macro1 +macro_name !macro1 punct ( punct ) macro_body _var1_var2_var3_/*_!enddefine @@ -1147,7 +1147,7 @@ var1 var2 var3!enddefine. ]) AT_DATA([expout-base], [dnl identifier define space -macro_id !macro1 +macro_name !macro1 punct ( punct ) spaces @@ -1170,7 +1170,7 @@ define !macro1()var1 var2 var3!enddefine. ]) AT_DATA([expout-base], [dnl identifier define space -macro_id !macro1 +macro_name !macro1 punct ( punct ) macro_body var1_var2_var3 @@ -1191,7 +1191,7 @@ define !macro1() ]) AT_DATA([expout-base], [dnl identifier define space -macro_id !macro1 +macro_name !macro1 punct ( punct ) spaces @@ -1216,7 +1216,7 @@ define !macro1() ]) AT_DATA([expout-base], [dnl identifier define space -macro_id !macro1 +macro_name !macro1 punct ( punct ) spaces @@ -1245,7 +1245,7 @@ define !macro1(a(), b(), c()) ]) AT_DATA([expout-base], [dnl identifier define space -macro_id !macro1 +macro_name !macro1 punct ( identifier a punct ( @@ -1283,7 +1283,7 @@ define !macro1( ]) AT_DATA([expout-base], [dnl identifier define space -macro_id !macro1 +macro_name !macro1 punct ( newline \n (later) @@ -1332,7 +1332,7 @@ content 2 ]) AT_DATA([expout-base], [dnl identifier define space -macro_id !macro1 +macro_name !macro1 newline \n (later) punct ( @@ -1370,7 +1370,7 @@ data list /x 1. ]) AT_DATA([expout-base], [dnl identifier define space -macro_id !macro1 +macro_name !macro1 end_command . newline \n (first) @@ -1396,7 +1396,7 @@ data list /x 1. ]) AT_DATA([expout-base], [dnl identifier define space -macro_id !macro1 +macro_name !macro1 newline \n (later) identifier x @@ -1425,7 +1425,7 @@ data list /x 1. ]) AT_DATA([expout-base], [dnl identifier define space -macro_id !macro1 +macro_name !macro1 punct ( end_command . newline \n (first) @@ -1457,7 +1457,7 @@ data list /x 1. ]) AT_DATA([expout-base], [dnl identifier define space -macro_id !macro1 +macro_name !macro1 end_command . newline \n (first) @@ -1483,7 +1483,7 @@ content line 2 ]) AT_DATA([expout-base], [dnl identifier define space -macro_id !macro1 +macro_name !macro1 punct ( punct ) spaces -- 2.30.2