From 320622191b3de640da6ba0e347a94d28493711ae Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 26 Jun 2021 14:53:23 -0700 Subject: [PATCH] TITLE and SUBTITLE: Don't treat an unquoted argument as a quoted string. This will allow the argument to be processed through the macro processor. --- src/language/lexer/segment.c | 177 ++++++++++++++---------------- src/language/utilities/set.c | 28 ++++- src/language/utilities/title.c | 34 +++++- src/output/driver.c | 12 ++ src/output/driver.h | 2 + tests/language/lexer/scan.at | 43 +------- tests/language/lexer/segment.at | 50 +-------- tests/language/utilities/title.at | 18 +++ 8 files changed, 173 insertions(+), 191 deletions(-) diff --git a/src/language/lexer/segment.c b/src/language/lexer/segment.c index a4fea0b213..35240b4c64 100644 --- a/src/language/lexer/segment.c +++ b/src/language/lexer/segment.c @@ -38,7 +38,9 @@ enum segmenter_state S_DOCUMENT_1, S_DOCUMENT_2, S_DOCUMENT_3, - S_FILE_LABEL, + S_FILE_LABEL_1, + S_FILE_LABEL_2, + S_FILE_LABEL_3, S_DO_REPEAT_1, S_DO_REPEAT_2, S_DO_REPEAT_3, @@ -50,8 +52,6 @@ enum segmenter_state S_BEGIN_DATA_2, S_BEGIN_DATA_3, S_BEGIN_DATA_4, - S_TITLE_1, - S_TITLE_2 }; #define SS_START_OF_LINE (1u << 0) @@ -759,18 +759,6 @@ segmenter_parse_id__ (struct segmenter *s, const char *input, size_t n, *type = SEG_START_DOCUMENT; return 0; } - else if (lex_id_match (ss_cstr ("TITLE"), word) - || lex_id_match (ss_cstr ("SUBTITLE"), word)) - { - int result = segmenter_unquoted (input, n, eof, ofs); - if (result < 0) - return -1; - else if (result) - { - s->state = S_TITLE_1; - return ofs; - } - } else if (lex_id_match_n (ss_cstr ("DEFINE"), word, 6)) { s->state = S_DEFINE_1; @@ -784,7 +772,7 @@ segmenter_parse_id__ (struct segmenter *s, const char *input, size_t n, return -1; else if (lex_id_match (ss_cstr ("LABEL"), ss_cstr (id))) { - s->state = S_FILE_LABEL; + s->state = S_FILE_LABEL_1; s->substate = 0; return ofs; } @@ -1257,9 +1245,9 @@ segmenter_parse_start_of_line__ (struct segmenter *s, } static int -segmenter_parse_file_label__ (struct segmenter *s, - const char *input, size_t n, bool eof, - enum segment_type *type) +segmenter_parse_file_label_1__ (struct segmenter *s, + const char *input, size_t n, bool eof, + enum segment_type *type) { struct segmenter sub; int ofs; @@ -1282,7 +1270,7 @@ segmenter_parse_file_label__ (struct segmenter *s, else { if (result) - s->state = S_TITLE_1; + s->state = S_FILE_LABEL_2; else *s = sub; return ofs; @@ -1295,6 +1283,70 @@ segmenter_parse_file_label__ (struct segmenter *s, } } +static int +segmenter_parse_file_label_2__ (struct segmenter *s, + const char *input, size_t n, bool eof, + enum segment_type *type) +{ + int ofs; + + ofs = skip_spaces (input, n, eof, 0); + if (ofs < 0) + return -1; + s->state = S_FILE_LABEL_3; + *type = SEG_SPACES; + return ofs; +} + +static int +segmenter_parse_file_label_3__ (struct segmenter *s, + const char *input, size_t n, bool eof, + enum segment_type *type) +{ + int endcmd; + int ofs; + + endcmd = -1; + ofs = 0; + while (ofs < n) + { + ucs4_t uc; + int mblen; + + mblen = segmenter_u8_to_uc__ (&uc, input, n, eof, ofs); + if (mblen < 0) + return -1; + + switch (uc) + { + case '\n': + goto end_of_line; + + case '.': + endcmd = ofs; + break; + + default: + if (!lex_uc_is_space (uc)) + endcmd = -1; + break; + } + + ofs += mblen; + } + + if (eof) + { + end_of_line: + s->state = S_GENERAL; + s->substate = 0; + *type = SEG_UNQUOTED_STRING; + return endcmd >= 0 ? endcmd : ofs; + } + + return -1; +} + static int segmenter_subparse (struct segmenter *s, const char *input, size_t n, bool eof, @@ -1717,70 +1769,6 @@ segmenter_parse_begin_data_4__ (struct segmenter *s, return ofs; } -static int -segmenter_parse_title_1__ (struct segmenter *s, - const char *input, size_t n, bool eof, - enum segment_type *type) -{ - int ofs; - - ofs = skip_spaces (input, n, eof, 0); - if (ofs < 0) - return -1; - s->state = S_TITLE_2; - *type = SEG_SPACES; - return ofs; -} - -static int -segmenter_parse_title_2__ (struct segmenter *s, - const char *input, size_t n, bool eof, - enum segment_type *type) -{ - int endcmd; - int ofs; - - endcmd = -1; - ofs = 0; - while (ofs < n) - { - ucs4_t uc; - int mblen; - - mblen = segmenter_u8_to_uc__ (&uc, input, n, eof, ofs); - if (mblen < 0) - return -1; - - switch (uc) - { - case '\n': - goto end_of_line; - - case '.': - endcmd = ofs; - break; - - default: - if (!lex_uc_is_space (uc)) - endcmd = -1; - break; - } - - ofs += mblen; - } - - if (eof) - { - end_of_line: - s->state = S_GENERAL; - s->substate = 0; - *type = SEG_UNQUOTED_STRING; - return endcmd >= 0 ? endcmd : ofs; - } - - return -1; -} - /* Returns the name of segment TYPE as a string. The caller must not modify or free the returned string. @@ -1881,8 +1869,12 @@ segmenter_push (struct segmenter *s, const char *input, size_t n, bool eof, case S_DOCUMENT_3: return segmenter_parse_document_3__ (s, type); - case S_FILE_LABEL: - return segmenter_parse_file_label__ (s, input, n, eof, type); + case S_FILE_LABEL_1: + return segmenter_parse_file_label_1__ (s, input, n, eof, type); + case S_FILE_LABEL_2: + return segmenter_parse_file_label_2__ (s, input, n, eof, type); + case S_FILE_LABEL_3: + return segmenter_parse_file_label_3__ (s, input, n, eof, type); case S_DO_REPEAT_1: return segmenter_parse_do_repeat_1__ (s, input, n, eof, type); @@ -1908,11 +1900,6 @@ segmenter_push (struct segmenter *s, const char *input, size_t n, bool eof, return segmenter_parse_begin_data_3__ (s, input, n, eof, type); case S_BEGIN_DATA_4: return segmenter_parse_begin_data_4__ (s, input, n, eof, type); - - case S_TITLE_1: - return segmenter_parse_title_1__ (s, input, n, eof, type); - case S_TITLE_2: - return segmenter_parse_title_2__ (s, input, n, eof, type); } NOT_REACHED (); @@ -1943,8 +1930,11 @@ segmenter_get_prompt (const struct segmenter *s) case S_DOCUMENT_3: return PROMPT_FIRST; - case S_FILE_LABEL: + case S_FILE_LABEL_1: return PROMPT_LATER; + case S_FILE_LABEL_2: + case S_FILE_LABEL_3: + return PROMPT_FIRST; case S_DO_REPEAT_1: case S_DO_REPEAT_2: @@ -1967,9 +1957,6 @@ segmenter_get_prompt (const struct segmenter *s) case S_BEGIN_DATA_4: return PROMPT_DATA; - case S_TITLE_1: - case S_TITLE_2: - return PROMPT_FIRST; } NOT_REACHED (); diff --git a/src/language/utilities/set.c b/src/language/utilities/set.c index 4b99bde6d9..5e3489e117 100644 --- a/src/language/utilities/set.c +++ b/src/language/utilities/set.c @@ -916,16 +916,28 @@ show_SMALL (const struct dataset *ds UNUSED) return xstrdup (buf); } +static char * +show_SUBTITLE (const struct dataset *ds UNUSED) +{ + return xstrdup (output_get_subtitle ()); +} + static char * show_SYSTEM (const struct dataset *ds UNUSED) { - return strdup (host_system); + return xstrdup (host_system); } static char * show_TEMPDIR (const struct dataset *ds UNUSED) { - return strdup (temp_dir_name ()); + return xstrdup (temp_dir_name ()); +} + +static char * +show_TITLE (const struct dataset *ds UNUSED) +{ + return xstrdup (output_get_title ()); } static bool @@ -1123,7 +1135,7 @@ static void do_show (const struct dataset *ds, const struct setting *s) { char *value = s->show (ds); - msg (SN, _("%s is %s."), s->name, value); + msg (SN, _("%s is %s."), s->name, value ? value : _("empty")); free (value); } @@ -1262,6 +1274,16 @@ cmd_show (struct lexer *lexer, struct dataset *ds) show_warranty (ds); else if (lex_match_id (lexer, "COPYING") || lex_match_id (lexer, "LICENSE")) show_copying (ds); + else if (lex_match_id (lexer, "TITLE")) + { + struct setting s = { .name = "TITLE", .show = show_TITLE }; + do_show (ds, &s); + } + else if (lex_match_id (lexer, "SUBTITLE")) + { + struct setting s = { .name = "SUBTITLE", .show = show_SUBTITLE }; + do_show (ds, &s); + } else if (lex_token (lexer) == T_ID) { int i; diff --git a/src/language/utilities/title.c b/src/language/utilities/title.c index 686774463e..a0a71e9cee 100644 --- a/src/language/utilities/title.c +++ b/src/language/utilities/title.c @@ -24,11 +24,13 @@ #include "data/variable.h" #include "language/command.h" #include "language/lexer/lexer.h" +#include "language/lexer/token.h" #include "libpspp/message.h" #include "libpspp/start-date.h" #include "libpspp/version.h" #include "output/driver.h" +#include "gl/c-ctype.h" #include "gl/xalloc.h" #include "gettext.h" @@ -37,10 +39,34 @@ static int parse_title (struct lexer *lexer, void (*set_title) (const char *)) { - if (!lex_force_string (lexer)) - return CMD_FAILURE; - set_title (lex_tokcstr (lexer)); - lex_get (lexer); + if (lex_token (lexer) == T_STRING) + { + set_title (lex_tokcstr (lexer)); + lex_get (lexer); + } + else if (lex_token (lexer) == T_ENDCMD) + { + /* This would be a bad special case below because n-1 would be + SIZE_MAX. */ + set_title (""); + } + else + { + /* Count the tokens in the title. */ + size_t n = 0; + while (lex_next (lexer, n)->type != T_ENDCMD) + n++; + + /* Get the raw representation of all the tokens, including any space + between them, and use it as the title. */ + char *title = ss_xstrdup (lex_next_representation (lexer, 0, n - 1)); + set_title (title); + free (title); + + /* Skip past the tokens. */ + for (size_t i = 0; i < n; i++) + lex_get (lexer); + } return CMD_SUCCESS; } diff --git a/src/output/driver.c b/src/output/driver.c index 33473a0142..f40876449b 100644 --- a/src/output/driver.c +++ b/src/output/driver.c @@ -369,6 +369,12 @@ output_log (const char *format, ...) output_submit (text_item_create_nocopy (TEXT_ITEM_LOG, s, NULL)); } +const char * +output_get_title (void) +{ + return engine_stack_top ()->title; +} + void output_set_title (const char *title) { @@ -377,6 +383,12 @@ output_set_title (const char *title) output_set_title__ (e, &e->title, title); } +const char * +output_get_subtitle (void) +{ + return engine_stack_top ()->subtitle; +} + void output_set_subtitle (const char *subtitle) { diff --git a/src/output/driver.h b/src/output/driver.h index ab162a6268..3e5cf193a4 100644 --- a/src/output/driver.h +++ b/src/output/driver.h @@ -36,7 +36,9 @@ void output_flush (void); void output_log (const char *, ...) PRINTF_FORMAT (1, 2); +const char *output_get_title (void); void output_set_title (const char *); +const char *output_get_subtitle (void); void output_set_subtitle (const char *); void output_set_filename (const char *); diff --git a/tests/language/lexer/scan.at b/tests/language/lexer/scan.at index d1fb66fcbe..146b891e1c 100644 --- a/tests/language/lexer/scan.at +++ b/tests/language/lexer/scan.at @@ -604,18 +604,9 @@ STOP PSPP_CHECK_SCAN([-i]) AT_CLEANUP -AT_SETUP([TITLE, SUBTITLE, FILE LABEL commands]) +AT_SETUP([FILE LABEL commands]) AT_KEYWORDS([scan]) AT_DATA([input], [dnl -title/**/'Quoted string title'. -tit /* -"Quoted string on second line". -sub "Quoted string subtitle" - . - -TITL /* Not a */ quoted string title. -SUBT Not a quoted string /* subtitle - FIL label isn't quoted. FILE lab 'is quoted'. @@ -624,38 +615,6 @@ FILE /* ]) AT_DATA([expout-base], [dnl -ID "title" -SKIP -STRING "Quoted string title" -ENDCMD -SKIP -ID "tit" -SKIP -SKIP -SKIP -STRING "Quoted string on second line" -ENDCMD -SKIP -ID "sub" -SKIP -STRING "Quoted string subtitle" -SKIP -SKIP -ENDCMD -SKIP -ENDCMD -SKIP -ID "TITL" -SKIP -STRING "/* Not a */ quoted string title" -ENDCMD -SKIP -ID "SUBT" -SKIP -STRING "Not a quoted string /* subtitle" -SKIP -ENDCMD -SKIP ID "FIL" SKIP ID "label" diff --git a/tests/language/lexer/segment.at b/tests/language/lexer/segment.at index 67e647d221..00f3fe0c3f 100644 --- a/tests/language/lexer/segment.at +++ b/tests/language/lexer/segment.at @@ -508,7 +508,8 @@ shbang #!_/usr/bin/pspp newline \n (first) identifier title space -unquoted_string my_title +identifier my space +identifier title end_command . newline \n (first) @@ -674,18 +675,9 @@ end PSPP_CHECK_SEGMENT([-i]) AT_CLEANUP -AT_SETUP([TITLE, SUBTITLE, FILE LABEL commands]) +AT_SETUP([FILE LABEL command]) AT_KEYWORDS([segment]) AT_DATA([input], [dnl -title/**/'Quoted string title'. -tit /* -"Quoted string on second line". -sub "Quoted string subtitle" - . - -TITL /* Not a */ quoted string title. -SUBT Not a quoted string /* subtitle - FIL label isn't quoted. FILE lab 'is quoted'. @@ -694,42 +686,6 @@ FILE /* ]) AT_DATA([expout-base], [dnl -identifier title -comment /**/ -quoted_string 'Quoted_string_title' -end_command . -newline \n (first) - -identifier tit space -comment /* -newline \n (later) - -quoted_string "Quoted_string_on_second_line" -end_command . -newline \n (first) - -identifier sub space -quoted_string "Quoted_string_subtitle" -newline \n (later) - space -end_command . -newline \n (first) - -separate_commands -newline \n (first) - -identifier TITL space -unquoted_string /*_Not_a_*/_quoted_string_title -end_command . -newline \n (first) - -identifier SUBT space -unquoted_string Not_a_quoted_string_/*_subtitle -newline \n (later) - -separate_commands -newline \n (first) - identifier FIL space identifier label space unquoted_string isn't_quoted diff --git a/tests/language/utilities/title.at b/tests/language/utilities/title.at index 822b5c5091..61be72222c 100644 --- a/tests/language/utilities/title.at +++ b/tests/language/utilities/title.at @@ -127,3 +127,21 @@ Table: File Label Label,This is a test file label ]) AT_CLEANUP + +AT_SETUP([TITLE and SUBTITLE]) +for command in TITLE SUBTITLE; do + cat >title.sps <expout <