X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Flexer%2Fsegment.c;h=ac88117ff5270e1a8b43cdf38172af5aaf431f16;hb=c94be33beb7085e1cbb1ec47f0e3a49c896d443b;hp=d2aa391748f024ed32aeb51364e8cda1442dae60;hpb=6467d294ac5750d7ee060f5c4e0291275ece238d;p=pspp diff --git a/src/language/lexer/segment.c b/src/language/lexer/segment.c index d2aa391748..ac88117ff5 100644 --- a/src/language/lexer/segment.c +++ b/src/language/lexer/segment.c @@ -28,6 +28,7 @@ #include "gl/c-ctype.h" #include "gl/c-strcase.h" +#include "gl/verify.h" enum segmenter_state { @@ -46,7 +47,6 @@ enum segmenter_state S_DEFINE_2, S_DEFINE_3, S_DEFINE_4, - S_DEFINE_5, S_BEGIN_DATA_1, S_BEGIN_DATA_2, S_BEGIN_DATA_3, @@ -55,6 +55,9 @@ enum segmenter_state S_TITLE_2 }; +/* S_SHBANG is the start state that SEGMENTER_INIT refers to as just 0. */ +verify (S_SHBANG == 0); + #define SS_START_OF_LINE (1u << 0) #define SS_START_OF_COMMAND (1u << 1) @@ -219,6 +222,22 @@ at_end_of_line (const char *input, size_t n, bool eof, int ofs) return is_end_of_line (input, n, eof, ofs); } +static bool +is_all_spaces (const char *input_, size_t n) +{ + const uint8_t *input = CHAR_CAST (const uint8_t *, input_); + + int mblen; + for (int ofs = 0; ofs < n; ofs += mblen) + { + ucs4_t uc; + mblen = u8_mbtouc (&uc, input + ofs, n - ofs); + if (!lex_uc_is_space (uc)) + return false; + } + return true; +} + static int segmenter_parse_newline__ (const char *input, size_t n, bool eof, enum segment_type *type) @@ -676,7 +695,6 @@ next_id_in_command (const struct segmenter *s, const char *input, size_t n, case SEG_END: case SEG_EXPECTED_QUOTE: case SEG_EXPECTED_EXPONENT: - case SEG_UNEXPECTED_DOT: case SEG_UNEXPECTED_CHAR: id[0] = '\0'; return ofs + retval; @@ -981,7 +999,7 @@ segmenter_parse_mid_command__ (struct segmenter *s, s->substate = SS_START_OF_COMMAND; } else - *type = SEG_UNEXPECTED_DOT; + *type = SEG_PUNCT; return 1; case '0': case '1': case '2': case '3': case '4': @@ -1436,13 +1454,11 @@ segmenter_parse_do_repeat_3__ (struct segmenter *s, - "(" followed by a sequence of tokens possibly including balanced parentheses up to a final ")". - - A newline. - - - A sequence of lines that don't start with "!ENDDEFINE", one string per line, - each ending in a newline. - - - "!ENDDEFINE". - + - A sequence of any number of lines, one string per line, ending with + "!ENDDEFINE". The first line is usually blank (that is, a newline follows + the "("). The last line usually just has "!ENDDEFINE." on it, but it can + start with other tokens. The whole DEFINE...!ENDDEFINE can be on a single + line, even. */ static int segmenter_parse_define_1__ (struct segmenter *s, @@ -1500,94 +1516,99 @@ segmenter_parse_define_2__ (struct segmenter *s, { s->nest--; if (!s->nest) - s->state = S_DEFINE_3; - return ofs; - } - - return ofs; -} - -static int -segmenter_parse_define_3__ (struct segmenter *s, - const char *input, size_t n, bool eof, - enum segment_type *type) -{ - int ofs = segmenter_subparse (s, input, n, eof, type); - if (ofs < 0) - return -1; - - if (*type == SEG_END_COMMAND) - { - /* The DEFINE command is malformed because there was a command terminator - before the first line of the body. Transition back to general - parsing. */ - s->state = S_GENERAL; + { + s->state = S_DEFINE_3; + s->substate = 0; + } return ofs; } - else if (*type == SEG_NEWLINE) - s->state = S_DEFINE_4; return ofs; } -static bool -is_enddefine (const char *input, size_t n) +static size_t +find_enddefine (struct substring input) { - int ofs = skip_spaces_and_comments (input, n, true, 0); - assert (ofs >= 0); - + size_t n = input.length; const struct substring enddefine = ss_cstr ("!ENDDEFINE"); - if (n - ofs < enddefine.length) - return false; - - if (!ss_equals_case (ss_buffer (input + ofs, enddefine.length), enddefine)) - return false; - - if (ofs + enddefine.length >= n) - return true; - - const uint8_t *u_input = CHAR_CAST (const uint8_t *, input); - ucs4_t uc; - u8_mbtouc (&uc, u_input + ofs, n - ofs); - return uc == '.' || !lex_uc_is_idn (uc); + for (size_t i = 0; i + enddefine.length <= n; i++) + if (input.string[i] == '!' + && ss_equals_case (ss_substr (input, i, enddefine.length), enddefine)) + return i; + return SIZE_MAX; } +/* We are in the body of a macro definition, looking for additional lines of + the body or !ENDDEFINE. */ static int -segmenter_parse_define_4__ (struct segmenter *s, +segmenter_parse_define_3__ (struct segmenter *s, const char *input, size_t n, bool eof, enum segment_type *type) { - int ofs; - - ofs = segmenter_parse_full_line__ (input, n, eof, type); + /* Gather a whole line. */ + const char *newline = memchr (input, '\n', n); + int ofs = (newline ? newline - input - (newline > input && newline[-1] == '\r') + : eof ? n + : -1); if (ofs < 0) return -1; - else if (is_enddefine (input, ofs)) + + /* Does the line contain !ENDDEFINE? */ + size_t end = find_enddefine (ss_buffer (input, ofs)); + if (end == SIZE_MAX) { - s->state = S_GENERAL; - s->substate = SS_START_OF_COMMAND | SS_START_OF_LINE; - return segmenter_push (s, input, n, eof, type); + /* No !ENDDEFINE. We have a full line of macro body. + + The line might be blank, whether completely empty or just spaces and + comments. That's OK: we need to report blank lines because they can + have significance. + + However, if the first line of the macro body (the same line as the + closing parenthesis in the argument definition) is blank, we just + report it as spaces because it's not significant. */ + *type = (s->substate == 0 && is_all_spaces (input, ofs) + ? SEG_SPACES : SEG_MACRO_BODY); + s->state = S_DEFINE_4; + s->substate = 1; + return ofs; } else { - *type = SEG_MACRO_BODY; - s->state = S_DEFINE_5; - return input[ofs - 1] == '\n' ? 0 : ofs; + /* Macro ends at the !ENDDEFINE on this line. */ + s->state = S_GENERAL; + s->substate = 0; + if (!end) + { + /* Line starts with !ENDDEFINE. */ + return segmenter_push (s, input, n, eof, type); + } + else + { + if (is_all_spaces (input, end)) + { + /* Line starts with spaces followed by !ENDDEFINE. */ + *type = SEG_SPACES; + } + else + { + /* Line starts with some content followed by !ENDDEFINE. */ + *type = SEG_MACRO_BODY; + } + return end; + } } } static int -segmenter_parse_define_5__ (struct segmenter *s, +segmenter_parse_define_4__ (struct segmenter *s, const char *input, size_t n, bool eof, enum segment_type *type) { - int ofs; - - ofs = segmenter_parse_newline__ (input, n, eof, type); + int ofs = segmenter_parse_newline__ (input, n, eof, type); if (ofs < 0) return -1; - s->state = S_DEFINE_4; + s->state = S_DEFINE_3; return ofs; } @@ -1789,9 +1810,7 @@ segment_type_to_string (enum segment_type type) void segmenter_init (struct segmenter *s, enum segmenter_mode mode) { - s->state = S_SHBANG; - s->substate = 0; - s->mode = mode; + *s = (struct segmenter) SEGMENTER_INIT (mode); } /* Returns the mode passed to segmenter_init() for S. */ @@ -1882,8 +1901,6 @@ segmenter_push (struct segmenter *s, const char *input, size_t n, bool eof, return segmenter_parse_define_3__ (s, input, n, eof, type); case S_DEFINE_4: return segmenter_parse_define_4__ (s, input, n, eof, type); - case S_DEFINE_5: - return segmenter_parse_define_5__ (s, input, n, eof, type); case S_BEGIN_DATA_1: return segmenter_parse_begin_data_1__ (s, input, n, eof, type); @@ -1939,10 +1956,9 @@ segmenter_get_prompt (const struct segmenter *s) case S_DEFINE_1: case S_DEFINE_2: - case S_DEFINE_3: return s->substate & SS_START_OF_COMMAND ? PROMPT_FIRST : PROMPT_LATER; + case S_DEFINE_3: case S_DEFINE_4: - case S_DEFINE_5: return PROMPT_DEFINE; case S_BEGIN_DATA_1: