X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Flexer%2Fsegment.c;h=5f7fc01310d4241288b39d038ceb1294c25c46f6;hb=ed109bf498216cef15a3cbf180827dc8b20eff0b;hp=c0a09973ce8059535ea7c1e487c5fa2016ff8134;hpb=e0f9210e814d03bc43b6a9b30a402e403d5666b9;p=pspp diff --git a/src/language/lexer/segment.c b/src/language/lexer/segment.c index c0a09973ce..5f7fc01310 100644 --- a/src/language/lexer/segment.c +++ b/src/language/lexer/segment.c @@ -92,21 +92,26 @@ segmenter_parse_shbang__ (struct segmenter *s, const char *input, size_t n, { if (input[1] == '!') { - int ofs; - - for (ofs = 2; ofs < n; ofs++) - if (input[ofs] == '\n') - { - if (input[ofs] == '\n' && input[ofs - 1] == '\r') - ofs--; - - s->state = S_GENERAL; - s->substate = SS_START_OF_COMMAND; - *type = SEG_SHBANG; - return ofs; - } + for (int ofs = 2; ; ofs++) + { + if (ofs >= n) + { + if (!eof) + return -1; + } + else if (input[ofs] == '\n') + { + if (input[ofs - 1] == '\r') + ofs--; + } + else + continue; - return eof ? ofs : -1; + s->state = S_GENERAL; + s->substate = SS_START_OF_COMMAND; + *type = SEG_SHBANG; + return ofs; + } } } else if (!eof) @@ -281,20 +286,23 @@ segmenter_parse_number__ (struct segmenter *s, const char *input, size_t n, if (!eof) return -1; goto number; - }; + } if (input[ofs] == '.') { + if (ofs + 1 >= n) + { + if (!eof) + return -1; + goto number; + } + ofs = skip_digits (input, n, eof, ofs + 1); if (ofs < 0) return -1; + else if (ofs >= n) + goto number; } - if (ofs >= n) - { - if (!eof) - return -1; - goto number; - } if (input[ofs] == 'e' || input[ofs] == 'E') { ofs++; @@ -643,6 +651,7 @@ next_id_in_command (const struct segmenter *s, const char *input, size_t n, } /* fall through */ + case SEG_MACRO_ID: case SEG_NUMBER: case SEG_QUOTED_STRING: case SEG_HEX_STRING: @@ -711,10 +720,9 @@ segmenter_parse_id__ (struct segmenter *s, const char *input, size_t n, ofs--; } - if (is_reserved_word (input, ofs)) - *type = SEG_RESERVED_WORD; - else - *type = SEG_IDENTIFIER; + *type = (is_reserved_word (input, ofs) ? SEG_RESERVED_WORD + : input[0] == '!' ? SEG_MACRO_ID + : SEG_IDENTIFIER); if (s->substate & SS_START_OF_COMMAND) { @@ -981,6 +989,9 @@ segmenter_parse_mid_command__ (struct segmenter *s, return segmenter_parse_string__ (SEG_QUOTED_STRING, 0, s, input, n, eof, type); + case '!': + return segmenter_parse_id__ (s, input, n, eof, type); + default: if (lex_uc_is_space (uc)) { @@ -1004,6 +1015,12 @@ segmenter_parse_mid_command__ (struct segmenter *s, } else if (lex_uc_is_id1 (uc)) return segmenter_parse_id__ (s, input, n, eof, type); + else if (uc > 32 && uc < 127 && uc != '\\' && uc != '^') + { + *type = SEG_PUNCT; + s->substate = 0; + return 1; + } else { *type = SEG_UNEXPECTED_CHAR; @@ -1269,6 +1286,9 @@ segmenter_subparse (struct segmenter *s, return ofs; } +/* We are segmenting a DO REPEAT command, currently reading the syntax that + defines the stand-in variables (the head) before the lines of syntax to be + repeated (the body). */ static int segmenter_parse_do_repeat_1__ (struct segmenter *s, const char *input, size_t n, bool eof, @@ -1278,10 +1298,14 @@ segmenter_parse_do_repeat_1__ (struct segmenter *s, if (ofs < 0) return -1; - if (*type == SEG_START_COMMAND || *type == SEG_SEPARATE_COMMANDS) - s->state = S_DO_REPEAT_2; - else if (*type == SEG_END_COMMAND) + if (*type == SEG_SEPARATE_COMMANDS) { + /* We reached a blank line that separates the head from the body. */ + s->state = S_DO_REPEAT_2; + } + else if (*type == SEG_END_COMMAND || *type == SEG_START_COMMAND) + { + /* We reached the body. */ s->state = S_DO_REPEAT_3; s->substate = 1; } @@ -1289,6 +1313,8 @@ segmenter_parse_do_repeat_1__ (struct segmenter *s, return ofs; } +/* We are segmenting a DO REPEAT command, currently reading a blank line that + separates the head from the body. */ static int segmenter_parse_do_repeat_2__ (struct segmenter *s, const char *input, size_t n, bool eof, @@ -1300,6 +1326,7 @@ segmenter_parse_do_repeat_2__ (struct segmenter *s, if (*type == SEG_NEWLINE) { + /* We reached the body. */ s->state = S_DO_REPEAT_3; s->substate = 1; } @@ -1356,6 +1383,12 @@ segmenter_parse_full_line__ (const char *input, size_t n, bool eof, return ofs - (input[ofs - 1] == '\r'); } +/* We are in the body of DO REPEAT, segmenting the lines of syntax that are to + be repeated. Report each line of syntax as a single SEG_DO_REPEAT_COMMAND. + + DO REPEAT can be nested, so we look for DO REPEAT...END REPEAT blocks inside + the lines we're segmenting. s->substate counts the nesting level, starting + at 1. */ static int segmenter_parse_do_repeat_3__ (struct segmenter *s, const char *input, size_t n, bool eof, @@ -1370,6 +1403,8 @@ segmenter_parse_do_repeat_3__ (struct segmenter *s, return -1; else if (s->substate == 0) { + /* Nesting level dropped to 0, so we've finished reading the DO REPEAT + body. */ s->state = S_GENERAL; s->substate = SS_START_OF_COMMAND | SS_START_OF_LINE; return segmenter_push (s, input, n, eof, type);