From: Ben Pfaff Date: Sun, 4 Jul 2021 05:00:47 +0000 (-0700) Subject: segment: Distinguish snippets from full files. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=dc25c013a7ce573b87fd27cbefddc732733c837f;p=pspp segment: Distinguish snippets from full files. The comment on segmenter_init() explains what this means: If IS_SNIPPET is false, then the segmenter will parse as if it's being given a whole file. This means, for example, that it will interpret - or + at the beginning of the syntax as a separator between commands (since - or + at the beginning of a line has this meaning). If IS_SNIPPET is true, then the segmenter will parse as if it's being given an isolated piece of syntax. This means that, for example, that it will interpret - or + at the beginning of the syntax as an operator token or (if followed by a digit) as part of a number. --- diff --git a/src/language/control/repeat.c b/src/language/control/repeat.c index 118e8d3ccd..86dd36f7f0 100644 --- a/src/language/control/repeat.c +++ b/src/language/control/repeat.c @@ -201,10 +201,7 @@ do_parse_commands (struct substring s, enum segmenter_mode mode, struct hmap *dummies, struct string *outputs, size_t n_outputs) { - struct segmenter segmenter; - - segmenter_init (&segmenter, mode); - + struct segmenter segmenter = segmenter_init (mode, false); while (!ss_is_empty (s)) { enum segment_type type; diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c index df2806eedf..718458f8da 100644 --- a/src/language/lexer/lexer.c +++ b/src/language/lexer/lexer.c @@ -988,7 +988,7 @@ lex_match_phrase (struct lexer *lexer, const char *s) int i; i = 0; - string_lexer_init (&slex, s, strlen (s), SEG_MODE_INTERACTIVE); + string_lexer_init (&slex, s, strlen (s), SEG_MODE_INTERACTIVE, true); while (string_lexer_next (&slex, &token)) if (token.type != SCAN_SKIP) { @@ -1238,7 +1238,8 @@ lex_interactive_reset (struct lexer *lexer) src->journal_pos = src->seg_pos = src->line_pos = 0; src->n_newlines = 0; src->suppress_next_newline = false; - segmenter_init (&src->segmenter, segmenter_get_mode (&src->segmenter)); + src->segmenter = segmenter_init (segmenter_get_mode (&src->segmenter), + false); while (!deque_is_empty (&src->deque)) lex_source_pop__ (src); lex_source_push_endcmd__ (src); @@ -1658,12 +1659,12 @@ lex_source_push_endcmd__ (struct lex_source *src) static struct lex_source * lex_source_create (struct lex_reader *reader) { - struct lex_source *src; - - src = xzalloc (sizeof *src); - src->reader = reader; - segmenter_init (&src->segmenter, reader->syntax); - src->tokens = deque_init (&src->deque, 4, sizeof *src->tokens); + struct lex_source *src = xmalloc (sizeof *src); + *src = (struct lex_source) { + .reader = reader, + .segmenter = segmenter_init (reader->syntax, false), + .tokens = deque_init (&src->deque, 4, sizeof *src->tokens), + }; lex_source_push_endcmd__ (src); diff --git a/src/language/lexer/scan.c b/src/language/lexer/scan.c index 0c92210540..0acff46cdd 100644 --- a/src/language/lexer/scan.c +++ b/src/language/lexer/scan.c @@ -651,12 +651,14 @@ scanner_push (struct scanner *scanner, enum segment_type type, INPUT must not be modified or freed while SLEX is still in use. */ void string_lexer_init (struct string_lexer *slex, const char *input, size_t length, - enum segmenter_mode mode) + enum segmenter_mode mode, bool is_snippet) { - slex->input = input; - slex->length = length; - slex->offset = 0; - segmenter_init (&slex->segmenter, mode); + *slex = (struct string_lexer) { + .input = input, + .length = length, + .offset = 0, + .segmenter = segmenter_init (mode, is_snippet), + }; } /* */ diff --git a/src/language/lexer/scan.h b/src/language/lexer/scan.h index 61bfc5b553..0dde273804 100644 --- a/src/language/lexer/scan.h +++ b/src/language/lexer/scan.h @@ -102,7 +102,7 @@ struct string_lexer }; void string_lexer_init (struct string_lexer *, const char *input, - size_t length, enum segmenter_mode); + size_t length, enum segmenter_mode, bool is_snippet); bool string_lexer_next (struct string_lexer *, struct token *); #endif /* scan.h */ diff --git a/src/language/lexer/segment.c b/src/language/lexer/segment.c index 35240b4c64..2689811593 100644 --- a/src/language/lexer/segment.c +++ b/src/language/lexer/segment.c @@ -1786,17 +1786,28 @@ segment_type_to_string (enum segment_type type) } } -/* Initializes S as a segmenter with the given syntax MODE. +/* Returns a segmenter with the given syntax MODE. + + If IS_SNIPPET is false, then the segmenter will parse as if it's being given + a whole file. This means, for example, that it will interpret - or + at the + beginning of the syntax as a separator between commands (since - or + at the + beginning of a line has this meaning). + + If IS_SNIPPET is true, then the segmenter will parse as if it's being given + an isolated piece of syntax. This means that, for example, that it will + interpret - or + at the beginning of the syntax as an operator token or (if + followed by a digit) as part of a number. A segmenter does not contain any external references, so nothing needs to be done to destroy one. For the same reason, segmenters may be copied with plain struct assignment (or memcpy). */ -void -segmenter_init (struct segmenter *s, enum segmenter_mode mode) +struct segmenter +segmenter_init (enum segmenter_mode mode, bool is_snippet) { - s->state = S_SHBANG; - s->substate = 0; - s->mode = mode; + return (struct segmenter) { + .state = is_snippet ? S_GENERAL : S_SHBANG, + .mode = mode, + }; } /* Returns the mode passed to segmenter_init() for S. */ diff --git a/src/language/lexer/segment.h b/src/language/lexer/segment.h index 02a269bdd2..5d550f531f 100644 --- a/src/language/lexer/segment.h +++ b/src/language/lexer/segment.h @@ -117,7 +117,7 @@ struct segmenter unsigned char mode; }; -void segmenter_init (struct segmenter *, enum segmenter_mode); +struct segmenter segmenter_init (enum segmenter_mode, bool is_snippet); enum segmenter_mode segmenter_get_mode (const struct segmenter *); diff --git a/tests/language/lexer/scan-test.c b/tests/language/lexer/scan-test.c index 1eb04338e3..2a77e127ac 100644 --- a/tests/language/lexer/scan-test.c +++ b/tests/language/lexer/scan-test.c @@ -73,7 +73,7 @@ main (int argc, char *argv[]) length--; } - string_lexer_init (&slex, input, length, mode); + string_lexer_init (&slex, input, length, mode, false); do { struct token token; diff --git a/tests/language/lexer/segment-test.c b/tests/language/lexer/segment-test.c index a3b67b89b2..acb444f200 100644 --- a/tests/language/lexer/segment-test.c +++ b/tests/language/lexer/segment-test.c @@ -108,8 +108,7 @@ main (int argc, char *argv[]) static void check_segmentation (const char *input, size_t length, bool print_segments) { - struct segmenter s; - segmenter_init (&s, mode); + struct segmenter s = segmenter_init (mode, false); size_t line_number = 1; size_t line_offset = 0;