X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Flexer%2Fsegment.c;h=4a6fefb4c25d1e7c1441cd1a6a8d43f5b76b1873;hb=134b0f8bcfadd9d4ae051d665f30c3227fae1c75;hp=35240b4c64c18dbf22a7b5d0d8bc8b52d382eb2f;hpb=320622191b3de640da6ba0e347a94d28493711ae;p=pspp diff --git a/src/language/lexer/segment.c b/src/language/lexer/segment.c index 35240b4c64..4a6fefb4c2 100644 --- a/src/language/lexer/segment.c +++ b/src/language/lexer/segment.c @@ -48,6 +48,7 @@ enum segmenter_state S_DEFINE_2, S_DEFINE_3, S_DEFINE_4, + S_DEFINE_5, S_BEGIN_DATA_1, S_BEGIN_DATA_2, S_BEGIN_DATA_3, @@ -291,13 +292,11 @@ skip_digits (const char *input, size_t n, bool eof, int ofs) static int segmenter_parse_number__ (struct segmenter *s, const char *input, size_t n, - bool eof, enum segment_type *type) + bool eof, enum segment_type *type, int ofs) { - int ofs; - assert (s->state == S_GENERAL); - ofs = skip_digits (input, n, eof, 0); + ofs = skip_digits (input, n, eof, ofs); if (ofs < 0) return -1; @@ -682,6 +681,7 @@ next_id_in_command (const struct segmenter *s, const char *input, size_t n, case SEG_DO_REPEAT_COMMAND: case SEG_INLINE_DATA: case SEG_MACRO_ID: + case SEG_MACRO_NAME: case SEG_MACRO_BODY: case SEG_START_DOCUMENT: case SEG_DOCUMENT: @@ -939,7 +939,24 @@ segmenter_parse_mid_command__ (struct segmenter *s, *type = SEG_PUNCT; return 1; - case '(': case ')': case ',': case '=': case '-': + case '-': + ofs = skip_spaces (input, n, eof, 1); + if (ofs < 0) + return -1; + else if (c_isdigit (input[ofs])) + return segmenter_parse_number__ (s, input, n, eof, type, ofs); + else if (input[ofs] == '.') + { + if (ofs + 1 >= n) + { + if (!eof) + return -1; + } + else if (c_isdigit (input[ofs + 1])) + return segmenter_parse_number__ (s, input, n, eof, type, ofs); + } + /* Fall through. */ + case '(': case ')': case ',': case '=': case '[': case ']': case '&': case '|': case '+': *type = SEG_PUNCT; s->substate = 0; @@ -971,7 +988,7 @@ segmenter_parse_mid_command__ (struct segmenter *s, return -1; } else if (c_isdigit (input[1])) - return segmenter_parse_number__ (s, input, n, eof, type); + return segmenter_parse_number__ (s, input, n, eof, type, 0); int eol = at_end_of_line (input, n, eof, 1); if (eol < 0) @@ -988,7 +1005,7 @@ segmenter_parse_mid_command__ (struct segmenter *s, case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - return segmenter_parse_number__ (s, input, n, eof, type); + return segmenter_parse_number__ (s, input, n, eof, type, 0); case 'u': case 'U': return segmenter_maybe_parse_string__ (SEG_UNICODE_STRING, @@ -1003,7 +1020,20 @@ segmenter_parse_mid_command__ (struct segmenter *s, s, input, n, eof, type); case '!': - return segmenter_parse_id__ (s, input, n, eof, type); + if (n < 2) + { + if (!eof) + return -1; + *type = SEG_PUNCT; + return 1; + } + else if (input[1] == '*') + { + *type = SEG_MACRO_ID; + return 2; + } + else + return segmenter_parse_id__ (s, input, n, eof, type); default: if (lex_uc_is_space (uc)) @@ -1012,7 +1042,7 @@ segmenter_parse_mid_command__ (struct segmenter *s, if (ofs < 0) return -1; - if (input[ofs - 1] == '\r' && input[ofs] == '\n') + if (ofs < n && input[ofs - 1] == '\r' && input[ofs] == '\n') { if (ofs == 1) { @@ -1497,6 +1527,10 @@ segmenter_parse_do_repeat_3__ (struct segmenter *s, - The DEFINE keyword. + - An identifier. We transform this into SEG_MACRO_NAME instead of + SEG_IDENTIFIER or SEG_MACRO_NAME because this identifier must never be + macro-expanded. + - Anything but "(". - "(" followed by a sequence of tokens possibly including balanced parentheses @@ -1509,15 +1543,21 @@ segmenter_parse_do_repeat_3__ (struct segmenter *s, line, even. */ static int -segmenter_parse_define_1__ (struct segmenter *s, - const char *input, size_t n, bool eof, - enum segment_type *type) +segmenter_parse_define_1_2__ (struct segmenter *s, + const char *input, size_t n, bool eof, + enum segment_type *type) { int ofs = segmenter_subparse (s, input, n, eof, type); if (ofs < 0) return -1; - if (*type == SEG_SEPARATE_COMMANDS + if (s->state == S_DEFINE_1 + && (*type == SEG_IDENTIFIER || *type == SEG_MACRO_ID)) + { + *type = SEG_MACRO_NAME; + s->state = S_DEFINE_2; + } + else if (*type == SEG_SEPARATE_COMMANDS || *type == SEG_END_COMMAND || *type == SEG_START_COMMAND) { @@ -1528,7 +1568,7 @@ segmenter_parse_define_1__ (struct segmenter *s, } else if (*type == SEG_PUNCT && input[0] == '(') { - s->state = S_DEFINE_2; + s->state = S_DEFINE_3; s->nest = 1; return ofs; } @@ -1537,7 +1577,7 @@ segmenter_parse_define_1__ (struct segmenter *s, } static int -segmenter_parse_define_2__ (struct segmenter *s, +segmenter_parse_define_3__ (struct segmenter *s, const char *input, size_t n, bool eof, enum segment_type *type) { @@ -1565,7 +1605,7 @@ segmenter_parse_define_2__ (struct segmenter *s, s->nest--; if (!s->nest) { - s->state = S_DEFINE_3; + s->state = S_DEFINE_4; s->substate = 0; } return ofs; @@ -1579,17 +1619,39 @@ find_enddefine (struct substring input) { size_t n = input.length; const struct substring enddefine = ss_cstr ("!ENDDEFINE"); - for (size_t i = 0; i + enddefine.length <= n; i++) - if (input.string[i] == '!' - && ss_equals_case (ss_substr (input, i, enddefine.length), enddefine)) - return i; - return SIZE_MAX; + for (int ofs = 0;;) + { + /* Skip !ENDDEFINE in comment. */ + ofs = skip_spaces_and_comments (input.string, n, true, ofs); + if (ofs + enddefine.length > n) + return SIZE_MAX; + + char c = input.string[ofs]; + if (c == '!' + && ss_equals_case (ss_substr (input, ofs, enddefine.length), + enddefine)) + return ofs; + else if (c == '\'' || c == '"') + { + /* Skip quoted !ENDDEFINE. */ + ofs++; + for (;;) + { + if (ofs >= n) + return SIZE_MAX; + else if (input.string[ofs++] == c) + break; + } + } + else + ofs++; + } } /* We are in the body of a macro definition, looking for additional lines of the body or !ENDDEFINE. */ static int -segmenter_parse_define_3__ (struct segmenter *s, +segmenter_parse_define_4__ (struct segmenter *s, const char *input, size_t n, bool eof, enum segment_type *type) { @@ -1616,7 +1678,7 @@ segmenter_parse_define_3__ (struct segmenter *s, report it as spaces because it's not significant. */ *type = (s->substate == 0 && is_all_spaces (input, ofs) ? SEG_SPACES : SEG_MACRO_BODY); - s->state = S_DEFINE_4; + s->state = S_DEFINE_5; s->substate = 1; return ofs; } @@ -1648,7 +1710,7 @@ segmenter_parse_define_3__ (struct segmenter *s, } static int -segmenter_parse_define_4__ (struct segmenter *s, +segmenter_parse_define_5__ (struct segmenter *s, const char *input, size_t n, bool eof, enum segment_type *type) { @@ -1656,7 +1718,7 @@ segmenter_parse_define_4__ (struct segmenter *s, if (ofs < 0) return -1; - s->state = S_DEFINE_3; + s->state = S_DEFINE_4; return ofs; } @@ -1786,17 +1848,28 @@ segment_type_to_string (enum segment_type type) } } -/* Initializes S as a segmenter with the given syntax MODE. +/* Returns a segmenter with the given syntax MODE. + + If IS_SNIPPET is false, then the segmenter will parse as if it's being given + a whole file. This means, for example, that it will interpret - or + at the + beginning of the syntax as a separator between commands (since - or + at the + beginning of a line has this meaning). + + If IS_SNIPPET is true, then the segmenter will parse as if it's being given + an isolated piece of syntax. This means that, for example, that it will + interpret - or + at the beginning of the syntax as an operator token or (if + followed by a digit) as part of a number. A segmenter does not contain any external references, so nothing needs to be done to destroy one. For the same reason, segmenters may be copied with plain struct assignment (or memcpy). */ -void -segmenter_init (struct segmenter *s, enum segmenter_mode mode) +struct segmenter +segmenter_init (enum segmenter_mode mode, bool is_snippet) { - s->state = S_SHBANG; - s->substate = 0; - s->mode = mode; + return (struct segmenter) { + .state = is_snippet ? S_GENERAL : S_SHBANG, + .mode = mode, + }; } /* Returns the mode passed to segmenter_init() for S. */ @@ -1884,13 +1957,14 @@ segmenter_push (struct segmenter *s, const char *input, size_t n, bool eof, return segmenter_parse_do_repeat_3__ (s, input, n, eof, type); case S_DEFINE_1: - return segmenter_parse_define_1__ (s, input, n, eof, type); case S_DEFINE_2: - return segmenter_parse_define_2__ (s, input, n, eof, type); + return segmenter_parse_define_1_2__ (s, input, n, eof, type); case S_DEFINE_3: return segmenter_parse_define_3__ (s, input, n, eof, type); case S_DEFINE_4: return segmenter_parse_define_4__ (s, input, n, eof, type); + case S_DEFINE_5: + return segmenter_parse_define_5__ (s, input, n, eof, type); case S_BEGIN_DATA_1: return segmenter_parse_begin_data_1__ (s, input, n, eof, type); @@ -1944,9 +2018,10 @@ segmenter_get_prompt (const struct segmenter *s) case S_DEFINE_1: case S_DEFINE_2: - return s->substate & SS_START_OF_COMMAND ? PROMPT_FIRST : PROMPT_LATER; case S_DEFINE_3: + return s->substate & SS_START_OF_COMMAND ? PROMPT_FIRST : PROMPT_LATER; case S_DEFINE_4: + case S_DEFINE_5: return PROMPT_DEFINE; case S_BEGIN_DATA_1: