X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Flexer%2Fsegment.c;h=346910898ce3898b4bec6c62563a1a6f8dfe10bd;hb=7cd70df07f7e183645853716642810afd4b87bcd;hp=2689811593f4b515682e21c8c315d4a44562d9b4;hpb=e246f73b3dfceae79a23056fee0a18bb4a08bc33;p=pspp diff --git a/src/language/lexer/segment.c b/src/language/lexer/segment.c index 2689811593..346910898c 100644 --- a/src/language/lexer/segment.c +++ b/src/language/lexer/segment.c @@ -48,6 +48,7 @@ enum segmenter_state S_DEFINE_2, S_DEFINE_3, S_DEFINE_4, + S_DEFINE_5, S_BEGIN_DATA_1, S_BEGIN_DATA_2, S_BEGIN_DATA_3, @@ -291,13 +292,11 @@ skip_digits (const char *input, size_t n, bool eof, int ofs) static int segmenter_parse_number__ (struct segmenter *s, const char *input, size_t n, - bool eof, enum segment_type *type) + bool eof, enum segment_type *type, int ofs) { - int ofs; - assert (s->state == S_GENERAL); - ofs = skip_digits (input, n, eof, 0); + ofs = skip_digits (input, n, eof, ofs); if (ofs < 0) return -1; @@ -682,6 +681,7 @@ next_id_in_command (const struct segmenter *s, const char *input, size_t n, case SEG_DO_REPEAT_COMMAND: case SEG_INLINE_DATA: case SEG_MACRO_ID: + case SEG_MACRO_NAME: case SEG_MACRO_BODY: case SEG_START_DOCUMENT: case SEG_DOCUMENT: @@ -939,8 +939,25 @@ segmenter_parse_mid_command__ (struct segmenter *s, *type = SEG_PUNCT; return 1; - case '(': case ')': case ',': case '=': case '-': - case '[': case ']': case '&': case '|': case '+': + case '-': + ofs = skip_spaces (input, n, eof, 1); + if (ofs < 0) + return -1; + else if (ofs < n && c_isdigit (input[ofs])) + return segmenter_parse_number__ (s, input, n, eof, type, ofs); + else if (ofs < n && input[ofs] == '.') + { + if (ofs + 1 >= n) + { + if (!eof) + return -1; + } + else if (c_isdigit (input[ofs + 1])) + return segmenter_parse_number__ (s, input, n, eof, type, ofs); + } + /* Fall through. */ + case '(': case ')': case '{': case ',': case '=': case ';': case ':': + case '[': case ']': case '}': case '&': case '|': case '+': *type = SEG_PUNCT; s->substate = 0; return 1; @@ -971,7 +988,7 @@ segmenter_parse_mid_command__ (struct segmenter *s, return -1; } else if (c_isdigit (input[1])) - return segmenter_parse_number__ (s, input, n, eof, type); + return segmenter_parse_number__ (s, input, n, eof, type, 0); int eol = at_end_of_line (input, n, eof, 1); if (eol < 0) @@ -988,7 +1005,7 @@ segmenter_parse_mid_command__ (struct segmenter *s, case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - return segmenter_parse_number__ (s, input, n, eof, type); + return segmenter_parse_number__ (s, input, n, eof, type, 0); case 'u': case 'U': return segmenter_maybe_parse_string__ (SEG_UNICODE_STRING, @@ -1003,7 +1020,20 @@ segmenter_parse_mid_command__ (struct segmenter *s, s, input, n, eof, type); case '!': - return segmenter_parse_id__ (s, input, n, eof, type); + if (n < 2) + { + if (!eof) + return -1; + *type = SEG_PUNCT; + return 1; + } + else if (input[1] == '*') + { + *type = SEG_MACRO_ID; + return 2; + } + else + return segmenter_parse_id__ (s, input, n, eof, type); default: if (lex_uc_is_space (uc)) @@ -1012,7 +1042,7 @@ segmenter_parse_mid_command__ (struct segmenter *s, if (ofs < 0) return -1; - if (input[ofs - 1] == '\r' && input[ofs] == '\n') + if (ofs < n && input[ofs - 1] == '\r' && input[ofs] == '\n') { if (ofs == 1) { @@ -1497,6 +1527,10 @@ segmenter_parse_do_repeat_3__ (struct segmenter *s, - The DEFINE keyword. + - An identifier. We transform this into SEG_MACRO_NAME instead of + SEG_IDENTIFIER or SEG_MACRO_NAME because this identifier must never be + macro-expanded. + - Anything but "(". - "(" followed by a sequence of tokens possibly including balanced parentheses @@ -1509,15 +1543,21 @@ segmenter_parse_do_repeat_3__ (struct segmenter *s, line, even. */ static int -segmenter_parse_define_1__ (struct segmenter *s, - const char *input, size_t n, bool eof, - enum segment_type *type) +segmenter_parse_define_1_2__ (struct segmenter *s, + const char *input, size_t n, bool eof, + enum segment_type *type) { int ofs = segmenter_subparse (s, input, n, eof, type); if (ofs < 0) return -1; - if (*type == SEG_SEPARATE_COMMANDS + if (s->state == S_DEFINE_1 + && (*type == SEG_IDENTIFIER || *type == SEG_MACRO_ID)) + { + *type = SEG_MACRO_NAME; + s->state = S_DEFINE_2; + } + else if (*type == SEG_SEPARATE_COMMANDS || *type == SEG_END_COMMAND || *type == SEG_START_COMMAND) { @@ -1528,7 +1568,7 @@ segmenter_parse_define_1__ (struct segmenter *s, } else if (*type == SEG_PUNCT && input[0] == '(') { - s->state = S_DEFINE_2; + s->state = S_DEFINE_3; s->nest = 1; return ofs; } @@ -1537,7 +1577,7 @@ segmenter_parse_define_1__ (struct segmenter *s, } static int -segmenter_parse_define_2__ (struct segmenter *s, +segmenter_parse_define_3__ (struct segmenter *s, const char *input, size_t n, bool eof, enum segment_type *type) { @@ -1565,7 +1605,7 @@ segmenter_parse_define_2__ (struct segmenter *s, s->nest--; if (!s->nest) { - s->state = S_DEFINE_3; + s->state = S_DEFINE_4; s->substate = 0; } return ofs; @@ -1579,17 +1619,39 @@ find_enddefine (struct substring input) { size_t n = input.length; const struct substring enddefine = ss_cstr ("!ENDDEFINE"); - for (size_t i = 0; i + enddefine.length <= n; i++) - if (input.string[i] == '!' - && ss_equals_case (ss_substr (input, i, enddefine.length), enddefine)) - return i; - return SIZE_MAX; + for (int ofs = 0;;) + { + /* Skip !ENDDEFINE in comment. */ + ofs = skip_spaces_and_comments (input.string, n, true, ofs); + if (ofs + enddefine.length > n) + return SIZE_MAX; + + char c = input.string[ofs]; + if (c == '!' + && ss_equals_case (ss_substr (input, ofs, enddefine.length), + enddefine)) + return ofs; + else if (c == '\'' || c == '"') + { + /* Skip quoted !ENDDEFINE. */ + ofs++; + for (;;) + { + if (ofs >= n) + return SIZE_MAX; + else if (input.string[ofs++] == c) + break; + } + } + else + ofs++; + } } /* We are in the body of a macro definition, looking for additional lines of the body or !ENDDEFINE. */ static int -segmenter_parse_define_3__ (struct segmenter *s, +segmenter_parse_define_4__ (struct segmenter *s, const char *input, size_t n, bool eof, enum segment_type *type) { @@ -1616,7 +1678,7 @@ segmenter_parse_define_3__ (struct segmenter *s, report it as spaces because it's not significant. */ *type = (s->substate == 0 && is_all_spaces (input, ofs) ? SEG_SPACES : SEG_MACRO_BODY); - s->state = S_DEFINE_4; + s->state = S_DEFINE_5; s->substate = 1; return ofs; } @@ -1648,7 +1710,7 @@ segmenter_parse_define_3__ (struct segmenter *s, } static int -segmenter_parse_define_4__ (struct segmenter *s, +segmenter_parse_define_5__ (struct segmenter *s, const char *input, size_t n, bool eof, enum segment_type *type) { @@ -1656,7 +1718,7 @@ segmenter_parse_define_4__ (struct segmenter *s, if (ofs < 0) return -1; - s->state = S_DEFINE_3; + s->state = S_DEFINE_4; return ofs; } @@ -1831,6 +1893,9 @@ segmenter_get_mode (const struct segmenter *s) bytes as part of INPUT, because they have (figuratively) been consumed by the segmenter. + Segments can have zero length, including segment types SEG_END, + SEG_SEPARATE_COMMANDS, SEG_START_DOCUMENT, SEG_INLINE_DATA, and SEG_SPACES. + Failure occurs only if the segment type of the N bytes in INPUT cannot yet be determined. In this case segmenter_push() returns -1. If more input is available, the caller should obtain some more, then call again with a larger @@ -1895,13 +1960,14 @@ segmenter_push (struct segmenter *s, const char *input, size_t n, bool eof, return segmenter_parse_do_repeat_3__ (s, input, n, eof, type); case S_DEFINE_1: - return segmenter_parse_define_1__ (s, input, n, eof, type); case S_DEFINE_2: - return segmenter_parse_define_2__ (s, input, n, eof, type); + return segmenter_parse_define_1_2__ (s, input, n, eof, type); case S_DEFINE_3: return segmenter_parse_define_3__ (s, input, n, eof, type); case S_DEFINE_4: return segmenter_parse_define_4__ (s, input, n, eof, type); + case S_DEFINE_5: + return segmenter_parse_define_5__ (s, input, n, eof, type); case S_BEGIN_DATA_1: return segmenter_parse_begin_data_1__ (s, input, n, eof, type); @@ -1955,9 +2021,10 @@ segmenter_get_prompt (const struct segmenter *s) case S_DEFINE_1: case S_DEFINE_2: - return s->substate & SS_START_OF_COMMAND ? PROMPT_FIRST : PROMPT_LATER; case S_DEFINE_3: + return s->substate & SS_START_OF_COMMAND ? PROMPT_FIRST : PROMPT_LATER; case S_DEFINE_4: + case S_DEFINE_5: return PROMPT_DEFINE; case S_BEGIN_DATA_1: