From d4b8d953acd00e9a51b79cb2e345342649c5ff0c Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 29 May 2023 13:09:09 -0700 Subject: [PATCH] command-segmenter: New library for dividing syntax into individual commands. This is a building block for allowing the GUI to run whole commands instead of just lines. --- src/language/lexer/automake.mk | 2 + src/language/lexer/command-segmenter.c | 205 +++++++++++++++++++++++++ src/language/lexer/command-segmenter.h | 44 ++++++ src/language/lexer/scan.c | 3 + src/language/lexer/segment.c | 48 +++++- src/language/lexer/segment.h | 3 + tests/automake.mk | 1 + tests/language/lexer/segment-test.c | 67 +++++++- tests/language/lexer/segment.at | 200 ++++++++++++++++++++++-- 9 files changed, 549 insertions(+), 24 deletions(-) create mode 100644 src/language/lexer/command-segmenter.c create mode 100644 src/language/lexer/command-segmenter.h diff --git a/src/language/lexer/automake.mk b/src/language/lexer/automake.mk index 01b3df49c6..29f0a637f7 100644 --- a/src/language/lexer/automake.mk +++ b/src/language/lexer/automake.mk @@ -20,6 +20,8 @@ language_lexer_sources = \ src/language/lexer/command-name.c \ src/language/lexer/command-name.h \ + src/language/lexer/command-segmenter.c \ + src/language/lexer/command-segmenter.h \ src/language/lexer/include-path.c \ src/language/lexer/include-path.h \ src/language/lexer/lexer.c \ diff --git a/src/language/lexer/command-segmenter.c b/src/language/lexer/command-segmenter.c new file mode 100644 index 0000000000..8d1036d23c --- /dev/null +++ b/src/language/lexer/command-segmenter.c @@ -0,0 +1,205 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "language/lexer/command-segmenter.h" + +#include "language/lexer/segment.h" +#include "libpspp/deque.h" +#include "libpspp/str.h" + +struct lines + { + int first; + int last; + }; + +struct command_segmenter + { + struct segmenter segmenter; + struct string input; + + int command_first_line; + int line; + enum segment_type prev_segment; + + struct deque deque; + struct lines *lines; + }; + +/* Creates and returns a new command segmenter for the given syntax MODE. */ +struct command_segmenter * +command_segmenter_create (enum segmenter_mode mode) +{ + struct command_segmenter *cs = xmalloc (sizeof *cs); + *cs = (struct command_segmenter) { + .segmenter = segmenter_init (mode, false), + .input = DS_EMPTY_INITIALIZER, + .prev_segment = SEG_NEWLINE, + .deque = DEQUE_EMPTY_INITIALIZER, + }; + return cs; +} + +/* Destroys CS. */ +void +command_segmenter_destroy (struct command_segmenter *cs) +{ + if (cs) + { + ds_destroy (&cs->input); + free (cs->lines); + free (cs); + } +} + +static void +emit (struct command_segmenter *cs, int first, int last) +{ + if (first < last) + { + if (deque_is_full (&cs->deque)) + cs->lines = deque_expand (&cs->deque, cs->lines, sizeof *cs->lines); + cs->lines[deque_push_back (&cs->deque)] = (struct lines) { + .first = first, + .last = last, + }; + } +} + +static void +command_segmenter_push__ (struct command_segmenter *cs, + const char *input, size_t n, bool eof) +{ + if (!ds_is_empty (&cs->input)) + { + ds_put_substring (&cs->input, ss_buffer (input, n)); + input = ds_cstr (&cs->input); + n = ds_length (&cs->input); + } + + for (;;) + { + enum segment_type type; + int retval = segmenter_push (&cs->segmenter, input, n, eof, &type); + if (retval < 0) + break; + + switch (type) + { + case SEG_NUMBER: + case SEG_QUOTED_STRING: + case SEG_HEX_STRING: + case SEG_UNICODE_STRING: + case SEG_UNQUOTED_STRING: + case SEG_RESERVED_WORD: + case SEG_IDENTIFIER: + case SEG_PUNCT: + case SEG_SHBANG: + case SEG_SPACES: + case SEG_COMMENT: + case SEG_COMMENT_COMMAND: + case SEG_DO_REPEAT_COMMAND: + case SEG_INLINE_DATA: + case SEG_INNER_START_COMMAND: + case SEG_INNER_SEPARATE_COMMANDS: + case SEG_INNER_END_COMMAND: + case SEG_MACRO_ID: + case SEG_MACRO_NAME: + case SEG_MACRO_BODY: + case SEG_START_DOCUMENT: + case SEG_DOCUMENT: + case SEG_EXPECTED_QUOTE: + case SEG_EXPECTED_EXPONENT: + case SEG_UNEXPECTED_CHAR: + break; + + case SEG_NEWLINE: + cs->line++; + break; + + case SEG_START_COMMAND: + if (cs->line > cs->command_first_line) + emit (cs, cs->command_first_line, cs->line); + cs->command_first_line = cs->line; + break; + + case SEG_SEPARATE_COMMANDS: + if (cs->line > cs->command_first_line) + emit (cs, cs->command_first_line, cs->line); + cs->command_first_line = cs->line + 1; + break; + + case SEG_END_COMMAND: + emit (cs, cs->command_first_line, cs->line + 1); + cs->command_first_line = cs->line + 1; + break; + + case SEG_END: + emit (cs, cs->command_first_line, cs->line + (cs->prev_segment != SEG_NEWLINE)); + break; + } + + cs->prev_segment = type; + input += retval; + n -= retval; + if (type == SEG_END) + break; + } + + ds_assign_substring (&cs->input, ss_buffer (input, n)); +} + +/* Adds the N bytes of UTF-8 encoded syntax INPUT to CS. */ +void +command_segmenter_push (struct command_segmenter *cs, + const char *input, size_t n) +{ + command_segmenter_push__ (cs, input, n, false); +} + +/* Tells CS that no more input is coming. The caller shouldn't call + command_segmenter_push() again. */ +void +command_segmenter_eof (struct command_segmenter *cs) +{ + command_segmenter_push__ (cs, "", 0, true); +} + +/* Attempts to get a pair of line numbers bounding a command in the input from + CS. If successful, returns true and stores the first line in LINES[0] and + one past the last line in LINES[1]. On failure, returns false. + + Command bounds can start becoming available as soon as after the first call + to command_segmenter_push(). Often the output lags behind the input a + little because some lookahead is needed. After calling + command_segmenter_eof(), all the output is available. + + Command bounds are always in order and commands never overlap. Some lines, + such as blank lines, might not be part of any command. An empty input or + input consisting of just blank lines contains no commands. */ +bool +command_segmenter_get (struct command_segmenter *cs, int lines[2]) +{ + if (deque_is_empty (&cs->deque)) + return false; + + struct lines *r = &cs->lines[deque_pop_front (&cs->deque)]; + lines[0] = r->first; + lines[1] = r->last; + return true; +} diff --git a/src/language/lexer/command-segmenter.h b/src/language/lexer/command-segmenter.h new file mode 100644 index 0000000000..52c2b025a7 --- /dev/null +++ b/src/language/lexer/command-segmenter.h @@ -0,0 +1,44 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef COMMAND_SEGMENTER_H +#define COMMAND_SEGMENTER_H 1 + +#include "language/lexer/segment.h" + +/* Divides syntax lines into individual commands. + + This is for use by the GUI, which has a feature to run an individual command + in a syntax window. + + This groups together some kinds of commands that the PSPP tokenizer would + put T_ENDCMD inside. For example, it always considers BEGIN DATA...END DATA + as a single command, even though the tokenizer will emit T_ENDCMD after + BEGIN DATA if it has a command terminator. That's because it's the behavior + most useful for the GUI feature. +*/ + +struct command_segmenter; + +struct command_segmenter *command_segmenter_create (enum segmenter_mode); +void command_segmenter_destroy (struct command_segmenter *); + +void command_segmenter_push (struct command_segmenter *, + const char *input, size_t n); +void command_segmenter_eof (struct command_segmenter *); +bool command_segmenter_get (struct command_segmenter *, int lines[2]); + +#endif /* command-segmenter.h */ diff --git a/src/language/lexer/scan.c b/src/language/lexer/scan.c index e4fe405d47..43f79df4ee 100644 --- a/src/language/lexer/scan.c +++ b/src/language/lexer/scan.c @@ -372,6 +372,9 @@ token_from_segment (enum segment_type type, struct substring s, case SEG_START_COMMAND: case SEG_SEPARATE_COMMANDS: case SEG_END_COMMAND: + case SEG_INNER_START_COMMAND: + case SEG_INNER_SEPARATE_COMMANDS: + case SEG_INNER_END_COMMAND: *token = (struct token) { .type = T_ENDCMD }; return TOKENIZE_TOKEN; diff --git a/src/language/lexer/segment.c b/src/language/lexer/segment.c index a7bce8b6e7..58d9af5031 100644 --- a/src/language/lexer/segment.c +++ b/src/language/lexer/segment.c @@ -688,6 +688,9 @@ next_id_in_command (const struct segmenter *s, const char *input, size_t n, case SEG_START_COMMAND: case SEG_SEPARATE_COMMANDS: case SEG_END_COMMAND: + case SEG_INNER_START_COMMAND: + case SEG_INNER_SEPARATE_COMMANDS: + case SEG_INNER_END_COMMAND: case SEG_END: case SEG_EXPECTED_QUOTE: case SEG_EXPECTED_EXPONENT: @@ -800,8 +803,9 @@ segmenter_parse_id__ (struct segmenter *s, const char *input, size_t n, return -1; else if (lex_id_match (ss_cstr ("DATA"), ss_cstr (id))) { - int eol; - + /* We've found BEGIN DATA. Check whether that's the entire + command (either followed by a new-line or by '.' then a + new-line). */ ofs2 = skip_spaces_and_comments (input, n, eof, ofs2); if (ofs2 < 0) return -1; @@ -815,11 +819,14 @@ segmenter_parse_id__ (struct segmenter *s, const char *input, size_t n, return -1; } - eol = is_end_of_line (input, n, eof, ofs2); + int eol = is_end_of_line (input, n, eof, ofs2); if (eol < 0) return -1; else if (eol) { + /* BEGIN DATA is indeed the entire command. We choose a next + state depending on whether it's one line long or two lines + long. */ if (memchr (input, '\n', ofs2)) s->state = S_BEGIN_DATA_1; else @@ -1229,14 +1236,30 @@ segmenter_parse_start_of_line__ (struct segmenter *s, return 1; } } - /* Fall through. */ + *type = SEG_START_COMMAND; + s->substate = SS_START_OF_COMMAND; + return 1; case '-': - case '.': *type = SEG_START_COMMAND; s->substate = SS_START_OF_COMMAND; return 1; + case '.': + /* We've found '.' at the beginning of a line. If there's more text on + the line, then it starts a new command, because '+' or '-' or '.' in + the leftmost column does that. If the command is otherwise blank, + then it ends the previous command. The difference only matters for + deciding whether the line is part of the previous command in + command_segmenter. */ + int eol = at_end_of_line (input, n, eof, 1); + if (eol < 0) + return -1; + + *type = eol ? SEG_END_COMMAND : SEG_START_COMMAND; + s->substate = SS_START_OF_COMMAND; + return 1; + default: if (lex_uc_is_space (uc)) { @@ -1409,12 +1432,16 @@ segmenter_parse_do_repeat_1__ (struct segmenter *s, { /* We reached a blank line that separates the head from the body. */ s->state = S_DO_REPEAT_2; + *type = SEG_INNER_SEPARATE_COMMANDS; } else if (*type == SEG_END_COMMAND || *type == SEG_START_COMMAND) { /* We reached the body. */ s->state = S_DO_REPEAT_3; s->substate = 1; + *type = (*type == SEG_END_COMMAND + ? SEG_INNER_END_COMMAND + : SEG_INNER_START_COMMAND); } return ofs; @@ -1722,6 +1749,13 @@ segmenter_parse_define_5__ (struct segmenter *s, return ofs; } +/* We're segmenting the first line of a two-line BEGIN DATA command. Segment + up to the first new-line. + + This BEGIN DATA is expressed something like this (weird, but legal): + + BEGIN + DATA. */ static int segmenter_parse_begin_data_1__ (struct segmenter *s, const char *input, size_t n, bool eof, @@ -1737,6 +1771,8 @@ segmenter_parse_begin_data_1__ (struct segmenter *s, return ofs; } +/* We're segmenting a one-line BEGIN DATA command, or the second line of a + two-line BEGIN DATA command. Segment up to the new-line. */ static int segmenter_parse_begin_data_2__ (struct segmenter *s, const char *input, size_t n, bool eof, @@ -1748,6 +1784,8 @@ segmenter_parse_begin_data_2__ (struct segmenter *s, if (*type == SEG_NEWLINE) s->state = S_BEGIN_DATA_3; + else if (*type == SEG_END_COMMAND) + *type = SEG_INNER_END_COMMAND; return ofs; } diff --git a/src/language/lexer/segment.h b/src/language/lexer/segment.h index d5f846a900..29bac670c2 100644 --- a/src/language/lexer/segment.h +++ b/src/language/lexer/segment.h @@ -88,6 +88,9 @@ enum segmenter_mode SEG_TYPE(START_COMMAND) \ SEG_TYPE(SEPARATE_COMMANDS) \ SEG_TYPE(END_COMMAND) \ + SEG_TYPE(INNER_START_COMMAND) \ + SEG_TYPE(INNER_SEPARATE_COMMANDS) \ + SEG_TYPE(INNER_END_COMMAND) \ SEG_TYPE(END) \ \ SEG_TYPE(EXPECTED_QUOTE) \ diff --git a/tests/automake.mk b/tests/automake.mk index aa79c2a482..43c2f89078 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -206,6 +206,7 @@ check_PROGRAMS += tests/language/lexer/segment-test tests_language_lexer_segment_test_SOURCES = \ src/data/identifier.c \ src/language/lexer/command-name.c \ + src/language/lexer/command-segmenter.c \ src/language/lexer/segment.c \ tests/language/lexer/segment-test.c tests_language_lexer_segment_test_CFLAGS = $(AM_CFLAGS) diff --git a/tests/language/lexer/segment-test.c b/tests/language/lexer/segment-test.c index 5977e8fce6..3f5a579d5a 100644 --- a/tests/language/lexer/segment-test.c +++ b/tests/language/lexer/segment-test.c @@ -29,6 +29,7 @@ #include "libpspp/cast.h" #include "libpspp/compiler.h" #include "libpspp/misc.h" +#include "language/lexer/command-segmenter.h" #include "language/lexer/segment.h" #include "gl/error.h" @@ -50,6 +51,9 @@ static bool one_byte; /* -0, --truncations: Check that every truncation of input yields a result. */ static bool check_truncations; +/* -c, --commands: Print segmentation of input into commands. */ +static bool commands; + /* -s, --strip-trailing-newline: Strip trailing newline from last line of input. */ static bool strip_trailing_newline; @@ -59,6 +63,7 @@ static void usage (void) NO_RETURN; static void check_segmentation (const char *input, size_t length, bool print_segments); +static void check_commands (const char *input, size_t length); int main (int argc, char *argv[]) @@ -72,8 +77,7 @@ main (int argc, char *argv[]) setvbuf (stdout, NULL, _IONBF, 0); - /* Read from stdin into 'input'. Ensure that 'input' ends in a new-line - followed by a null byte. */ + /* Read syntax into 'input'. */ input = (!strcmp (file_name, "-") ? fread_file (stdin, 0, &length) : read_file (file_name, 0, &length)); @@ -87,9 +91,7 @@ main (int argc, char *argv[]) length--; } - if (!check_truncations) - check_segmentation (input, length, true); - else + if (check_truncations) { size_t test_len; @@ -100,11 +102,54 @@ main (int argc, char *argv[]) free (copy); } } + else if (commands) + check_commands (input, length); + else + check_segmentation (input, length, true); + free (input); return 0; } +static void +print_line (const char *input, size_t length, int line) +{ + for (int i = 0; i < line; i++) + { + const char *newline = memchr (input, '\n', length); + size_t line_len = newline ? newline - input + 1 : strlen (input); + input += line_len; + length -= line_len; + } + + int line_len = strcspn (input, "\n"); + printf ("%.*s\n", line_len, input); +} + +static void +check_commands (const char *input, size_t length) +{ + struct command_segmenter *cs = command_segmenter_create (mode); + command_segmenter_push (cs, input, length); + command_segmenter_eof (cs); + + int last_line = -1; + int lines[2]; + while (command_segmenter_get (cs, lines)) + { + assert (last_line == -1 || lines[0] >= last_line); + assert (lines[0] < lines[1]); + if (last_line != -1) + printf ("-----\n"); + for (int line = lines[0]; line < lines[1]; line++) + print_line (input, length, line); + last_line = lines[1]; + } + + command_segmenter_destroy (cs); +} + static void check_segmentation (const char *input, size_t length, bool print_segments) { @@ -300,12 +345,13 @@ parse_options (int argc, char **argv) {"auto", no_argument, NULL, 'a'}, {"batch", no_argument, NULL, 'b'}, {"interactive", no_argument, NULL, 'i'}, + {"commands", no_argument, NULL, 'c'}, {"verbose", no_argument, NULL, 'v'}, {"help", no_argument, NULL, 'h'}, {NULL, 0, NULL, 0}, }; - int c = getopt_long (argc, argv, "01abivhs", options, NULL); + int c = getopt_long (argc, argv, "01abivhsc", options, NULL); if (c == -1) break; @@ -335,6 +381,10 @@ parse_options (int argc, char **argv) mode = SEG_MODE_INTERACTIVE; break; + case 'c': + commands = true; + break; + case 'v': verbose = true; break; @@ -368,9 +418,12 @@ usage (void) %s, to test breaking PSPP syntax into lexical segments\n\ usage: %s [OPTIONS] INPUT\n\ \n\ +By default, print segmentation of input into PSPP syntax units. Other modes:\n\ + -0, --truncations check null truncation of each prefix of input\n\ + -c, --commands print segmentation into PSPP commands\n\ +\n\ Options:\n\ -1, --one-byte feed one byte at a time\n\ - -0, --truncations check null truncation of each prefix of input\n\ -s, --strip-trailing-newline remove newline from end of input\n\ -a, --auto use \"auto\" syntax mode\n\ -b, --batch use \"batch\" syntax mode\n\ diff --git a/tests/language/lexer/segment.at b/tests/language/lexer/segment.at index abbc08c8cd..0d49147ada 100644 --- a/tests/language/lexer/segment.at +++ b/tests/language/lexer/segment.at @@ -178,10 +178,10 @@ identifier #abcd end_command . newline \n (first) -start_command . +end_command . newline \n (first) -start_command . space +end_command . space newline \n (first) identifier LMNOP @@ -610,7 +610,8 @@ AT_CLEANUP AT_SETUP([* and COMMENT commands]) AT_KEYWORDS([segment]) AT_DATA([input], [dnl -* Comment commands "don't +* Comment commands "don't dnl " + have to contain valid tokens. ** Check ambiguity with ** token. @@ -626,9 +627,31 @@ com is ambiguous with COMPUTE. next command. +]) +AT_CHECK([segment-test -c -i input], [0], [dnl +* Comment commands "don't dnl " + +have to contain valid tokens. +----- +** Check ambiguity with ** token. +----- +****************. +----- +comment keyword works too. +----- +COMM also. +----- +com is ambiguous with COMPUTE. +----- + * Comment need not start at left margin. +----- +* Comment ends with blank line +----- +next command. ]) AT_DATA([expout-base], [dnl -comment_command *_Comment_commands_"don't +comment_command *_Comment_commands_"don't dnl " + newline \n (COMMENT) comment_command have_to_contain_valid_tokens @@ -707,6 +730,20 @@ docu first.paragraph isn't parsed as tokens +second paragraph. +]) +AT_CHECK([segment-test -c -i input], [0], [dnl +DOCUMENT one line. +----- +DOC more + than + one + line. +----- +docu +first.paragraph +isn't parsed as tokens + second paragraph. ]) AT_DATA([expout-base], [dnl @@ -763,6 +800,15 @@ FILE FILE /* /**/ lab not quoted here either +]) +AT_CHECK([segment-test -c -i input], [0], [dnl +FIL label isn't quoted. +----- +FILE + lab 'is quoted'. +----- +FILE /* +/**/ lab not quoted here either ]) AT_DATA([expout-base], [dnl identifier FIL space @@ -825,10 +871,37 @@ begin data "xxx". begin data 123. not data ]) +AT_CHECK([segment-test -c -i input], [0], [dnl +begin data. +end data. +----- +begin data. /* +123 +xxx +end data. +----- +BEG /**/ DAT /* +5 6 7 /* x + +end data +end data +. +----- +begin + data. +data +end data. +----- +begin data "xxx". +----- +begin data 123. +----- +not data +]) AT_DATA([expout-base], [dnl identifier begin space identifier data -end_command . +inner_end_command . newline \n (data) identifier end space @@ -841,7 +914,7 @@ newline \n (first) identifier begin space identifier data -end_command . space +inner_end_command . space comment /* newline \n (data) @@ -878,7 +951,7 @@ identifier end space identifier data newline \n (later) -start_command . +end_command . newline \n (first) separate_commands @@ -888,7 +961,7 @@ identifier begin newline \n (later) space identifier data -end_command . +inner_end_command . newline \n (data) inline_data data @@ -940,6 +1013,22 @@ do inner command. end repeat. ]) +AT_CHECK([segment-test -c -i input], [0], [dnl +do repeat x=a b c + y=d e f. + do repeat a=1 thru 5. +another command. +second command ++ third command. +end /* x */ /* y */ repeat print. +end + repeat. +----- +do + repeat #a=1. + inner command. +end repeat. +]) AT_DATA([expout-base], [dnl identifier do space identifier repeat space @@ -956,7 +1045,7 @@ punct = identifier d space identifier e space identifier f -end_command . +inner_end_command . newline \n (DO REPEAT) do_repeat_command __do_repeat_a=1_thru_5. @@ -989,7 +1078,7 @@ identifier repeat space identifier #a punct = number 1 -end_command . +inner_end_command . newline \n (DO REPEAT) do_repeat_command __inner_command. @@ -1023,6 +1112,23 @@ do inner command end repeat ]) +AT_CHECK([segment-test -c -b input], [0], [dnl +do repeat x=a b c + y=d e f +do repeat a=1 thru 5 +another command +second command ++ third command +end /* x */ /* y */ repeat print +end + repeat +----- +do + repeat #a=1 + + inner command +end repeat +]) AT_DATA([expout-base], [dnl identifier do space identifier repeat space @@ -1041,7 +1147,7 @@ identifier e space identifier f newline \n (later) -start_command +inner_start_command do_repeat_command do_repeat_a=1_thru_5 newline \n (DO REPEAT) @@ -1074,7 +1180,7 @@ punct = number 1 newline \n (later) -separate_commands +inner_separate_commands newline \n (DO REPEAT) do_repeat_command __inner_command @@ -1096,6 +1202,7 @@ define !macro1() var1 var2 var3 "!enddefine" !enddefine. ]) +AT_CHECK([cp input expout && segment-test -c -i input], [0], [expout]) AT_DATA([expout-base], [dnl identifier define space macro_name !macro1 @@ -1122,6 +1229,7 @@ AT_DATA([input], [dnl define !macro1() var1 var2 var3 /* !enddefine !enddefine. ]) +AT_CHECK([cp input expout && segment-test -c -i input], [0], [expout]) AT_DATA([expout-base], [dnl identifier define space macro_name !macro1 @@ -1145,6 +1253,7 @@ AT_DATA([input], [dnl define !macro1() var1 var2 var3!enddefine. ]) +AT_CHECK([cp input expout && segment-test -c -i input], [0], [expout]) AT_DATA([expout-base], [dnl identifier define space macro_name !macro1 @@ -1168,6 +1277,7 @@ AT_KEYWORDS([segment]) AT_DATA([input], [dnl define !macro1()var1 var2 var3!enddefine. ]) +AT_CHECK([cp input expout && segment-test -c -i input], [0], [expout]) AT_DATA([expout-base], [dnl identifier define space macro_name !macro1 @@ -1189,6 +1299,7 @@ AT_DATA([input], [dnl define !macro1() !enddefine. ]) +AT_CHECK([cp input expout && segment-test -c -i input], [0], [expout]) AT_DATA([expout-base], [dnl identifier define space macro_name !macro1 @@ -1214,6 +1325,7 @@ define !macro1() !enddefine. ]) +AT_CHECK([cp input expout && segment-test -c -i input], [0], [expout]) AT_DATA([expout-base], [dnl identifier define space macro_name !macro1 @@ -1243,6 +1355,7 @@ AT_DATA([input], [dnl define !macro1(a(), b(), c()) !enddefine. ]) +AT_CHECK([cp input expout && segment-test -c -i input], [0], [expout]) AT_DATA([expout-base], [dnl identifier define space macro_name !macro1 @@ -1281,6 +1394,7 @@ define !macro1( ) !enddefine. ]) +AT_CHECK([cp input expout && segment-test -c -i input], [0], [expout]) AT_DATA([expout-base], [dnl identifier define space macro_name !macro1 @@ -1330,6 +1444,7 @@ content 1 content 2 !enddefine. ]) +AT_CHECK([cp input expout && segment-test -c -i input], [0], [expout]) AT_DATA([expout-base], [dnl identifier define space macro_name !macro1 @@ -1368,6 +1483,11 @@ AT_DATA([input], [dnl define !macro1. data list /x 1. ]) +AT_CHECK([segment-test -c -i input], [0], [dnl +define !macro1. +----- +data list /x 1. +]) AT_DATA([expout-base], [dnl identifier define space macro_name !macro1 @@ -1394,6 +1514,12 @@ define !macro1 x. data list /x 1. ]) +AT_CHECK([segment-test -c -i input], [0], [dnl +define !macro1 +x. +----- +data list /x 1. +]) AT_DATA([expout-base], [dnl identifier define space macro_name !macro1 @@ -1423,6 +1549,13 @@ define !macro1(. x. data list /x 1. ]) +AT_CHECK([segment-test -c -i input], [0], [dnl +define !macro1@{:@. +----- +x. +----- +data list /x 1. +]) AT_DATA([expout-base], [dnl identifier define space macro_name !macro1 @@ -1455,6 +1588,11 @@ dnl which should not be there and ends it early. define !macro1. data list /x 1. ]) +AT_CHECK([segment-test -c -i input], [0], [dnl +define !macro1. +----- +data list /x 1. +]) AT_DATA([expout-base], [dnl identifier define space macro_name !macro1 @@ -1643,11 +1781,49 @@ end ]) PSPP_CHECK_SEGMENT([-a]) AT_CLEANUP + +AT_SETUP([empty input]) +AT_KEYWORDS([segment]) +: > input +AT_DATA([expout-base], [dnl +end +]) +AT_CHECK([cp input expout && segment-test -c -i input], [0], [expout]) +PSPP_CHECK_SEGMENT +AT_CLEANUP + +AT_SETUP([blank lines input]) +AT_KEYWORDS([segment]) +AT_DATA([input], [dnl + + + + +]) +AT_DATA([expout-base], [dnl +separate_commands +newline \n (first) + +separate_commands +newline \n (first) + +separate_commands +newline \n (first) + +-separate_commands +-newline \n (first) +- +end +]) +AT_CHECK([segment-test -c -i input]) +PSPP_CHECK_SEGMENT +AT_CLEANUP # This checks for regression against bug #61253. To see the read of # uninitialized data, run with valgrind. The test will pass either # way. (The bug report has a more complicated crashing case.) AT_SETUP([input ends in carriage return]) +AT_KEYWORDS([segment]) printf '\r' > input AT_DATA([expout-base], [dnl separate_commands -- 2.30.2