X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Flexer%2Fsegment.c;h=346910898ce3898b4bec6c62563a1a6f8dfe10bd;hb=8a0397328b6230fd49724e1c6d91a5a545d2fb4b;hp=5c298a781dcb0dce16c80394d5cc1f35a3ee6ee8;hpb=be14f24529e64b7d1dad2b148b4d254da38160de;p=pspp diff --git a/src/language/lexer/segment.c b/src/language/lexer/segment.c index 5c298a781d..346910898c 100644 --- a/src/language/lexer/segment.c +++ b/src/language/lexer/segment.c @@ -48,6 +48,7 @@ enum segmenter_state S_DEFINE_2, S_DEFINE_3, S_DEFINE_4, + S_DEFINE_5, S_BEGIN_DATA_1, S_BEGIN_DATA_2, S_BEGIN_DATA_3, @@ -680,6 +681,7 @@ next_id_in_command (const struct segmenter *s, const char *input, size_t n, case SEG_DO_REPEAT_COMMAND: case SEG_INLINE_DATA: case SEG_MACRO_ID: + case SEG_MACRO_NAME: case SEG_MACRO_BODY: case SEG_START_DOCUMENT: case SEG_DOCUMENT: @@ -941,9 +943,9 @@ segmenter_parse_mid_command__ (struct segmenter *s, ofs = skip_spaces (input, n, eof, 1); if (ofs < 0) return -1; - else if (c_isdigit (input[ofs])) + else if (ofs < n && c_isdigit (input[ofs])) return segmenter_parse_number__ (s, input, n, eof, type, ofs); - else if (input[ofs] == '.') + else if (ofs < n && input[ofs] == '.') { if (ofs + 1 >= n) { @@ -954,8 +956,8 @@ segmenter_parse_mid_command__ (struct segmenter *s, return segmenter_parse_number__ (s, input, n, eof, type, ofs); } /* Fall through. */ - case '(': case ')': case ',': case '=': - case '[': case ']': case '&': case '|': case '+': + case '(': case ')': case '{': case ',': case '=': case ';': case ':': + case '[': case ']': case '}': case '&': case '|': case '+': *type = SEG_PUNCT; s->substate = 0; return 1; @@ -1018,7 +1020,20 @@ segmenter_parse_mid_command__ (struct segmenter *s, s, input, n, eof, type); case '!': - return segmenter_parse_id__ (s, input, n, eof, type); + if (n < 2) + { + if (!eof) + return -1; + *type = SEG_PUNCT; + return 1; + } + else if (input[1] == '*') + { + *type = SEG_MACRO_ID; + return 2; + } + else + return segmenter_parse_id__ (s, input, n, eof, type); default: if (lex_uc_is_space (uc)) @@ -1027,7 +1042,7 @@ segmenter_parse_mid_command__ (struct segmenter *s, if (ofs < 0) return -1; - if (input[ofs - 1] == '\r' && input[ofs] == '\n') + if (ofs < n && input[ofs - 1] == '\r' && input[ofs] == '\n') { if (ofs == 1) { @@ -1512,6 +1527,10 @@ segmenter_parse_do_repeat_3__ (struct segmenter *s, - The DEFINE keyword. + - An identifier. We transform this into SEG_MACRO_NAME instead of + SEG_IDENTIFIER or SEG_MACRO_NAME because this identifier must never be + macro-expanded. + - Anything but "(". - "(" followed by a sequence of tokens possibly including balanced parentheses @@ -1524,15 +1543,21 @@ segmenter_parse_do_repeat_3__ (struct segmenter *s, line, even. */ static int -segmenter_parse_define_1__ (struct segmenter *s, - const char *input, size_t n, bool eof, - enum segment_type *type) +segmenter_parse_define_1_2__ (struct segmenter *s, + const char *input, size_t n, bool eof, + enum segment_type *type) { int ofs = segmenter_subparse (s, input, n, eof, type); if (ofs < 0) return -1; - if (*type == SEG_SEPARATE_COMMANDS + if (s->state == S_DEFINE_1 + && (*type == SEG_IDENTIFIER || *type == SEG_MACRO_ID)) + { + *type = SEG_MACRO_NAME; + s->state = S_DEFINE_2; + } + else if (*type == SEG_SEPARATE_COMMANDS || *type == SEG_END_COMMAND || *type == SEG_START_COMMAND) { @@ -1543,7 +1568,7 @@ segmenter_parse_define_1__ (struct segmenter *s, } else if (*type == SEG_PUNCT && input[0] == '(') { - s->state = S_DEFINE_2; + s->state = S_DEFINE_3; s->nest = 1; return ofs; } @@ -1552,7 +1577,7 @@ segmenter_parse_define_1__ (struct segmenter *s, } static int -segmenter_parse_define_2__ (struct segmenter *s, +segmenter_parse_define_3__ (struct segmenter *s, const char *input, size_t n, bool eof, enum segment_type *type) { @@ -1580,7 +1605,7 @@ segmenter_parse_define_2__ (struct segmenter *s, s->nest--; if (!s->nest) { - s->state = S_DEFINE_3; + s->state = S_DEFINE_4; s->substate = 0; } return ofs; @@ -1626,7 +1651,7 @@ find_enddefine (struct substring input) /* We are in the body of a macro definition, looking for additional lines of the body or !ENDDEFINE. */ static int -segmenter_parse_define_3__ (struct segmenter *s, +segmenter_parse_define_4__ (struct segmenter *s, const char *input, size_t n, bool eof, enum segment_type *type) { @@ -1653,7 +1678,7 @@ segmenter_parse_define_3__ (struct segmenter *s, report it as spaces because it's not significant. */ *type = (s->substate == 0 && is_all_spaces (input, ofs) ? SEG_SPACES : SEG_MACRO_BODY); - s->state = S_DEFINE_4; + s->state = S_DEFINE_5; s->substate = 1; return ofs; } @@ -1685,7 +1710,7 @@ segmenter_parse_define_3__ (struct segmenter *s, } static int -segmenter_parse_define_4__ (struct segmenter *s, +segmenter_parse_define_5__ (struct segmenter *s, const char *input, size_t n, bool eof, enum segment_type *type) { @@ -1693,7 +1718,7 @@ segmenter_parse_define_4__ (struct segmenter *s, if (ofs < 0) return -1; - s->state = S_DEFINE_3; + s->state = S_DEFINE_4; return ofs; } @@ -1868,6 +1893,9 @@ segmenter_get_mode (const struct segmenter *s) bytes as part of INPUT, because they have (figuratively) been consumed by the segmenter. + Segments can have zero length, including segment types SEG_END, + SEG_SEPARATE_COMMANDS, SEG_START_DOCUMENT, SEG_INLINE_DATA, and SEG_SPACES. + Failure occurs only if the segment type of the N bytes in INPUT cannot yet be determined. In this case segmenter_push() returns -1. If more input is available, the caller should obtain some more, then call again with a larger @@ -1932,13 +1960,14 @@ segmenter_push (struct segmenter *s, const char *input, size_t n, bool eof, return segmenter_parse_do_repeat_3__ (s, input, n, eof, type); case S_DEFINE_1: - return segmenter_parse_define_1__ (s, input, n, eof, type); case S_DEFINE_2: - return segmenter_parse_define_2__ (s, input, n, eof, type); + return segmenter_parse_define_1_2__ (s, input, n, eof, type); case S_DEFINE_3: return segmenter_parse_define_3__ (s, input, n, eof, type); case S_DEFINE_4: return segmenter_parse_define_4__ (s, input, n, eof, type); + case S_DEFINE_5: + return segmenter_parse_define_5__ (s, input, n, eof, type); case S_BEGIN_DATA_1: return segmenter_parse_begin_data_1__ (s, input, n, eof, type); @@ -1992,9 +2021,10 @@ segmenter_get_prompt (const struct segmenter *s) case S_DEFINE_1: case S_DEFINE_2: - return s->substate & SS_START_OF_COMMAND ? PROMPT_FIRST : PROMPT_LATER; case S_DEFINE_3: + return s->substate & SS_START_OF_COMMAND ? PROMPT_FIRST : PROMPT_LATER; case S_DEFINE_4: + case S_DEFINE_5: return PROMPT_DEFINE; case S_BEGIN_DATA_1: