From 35140ad55d6ce3cbce7d9ffc4b894598f603cd42 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 12 Jan 2013 11:10:08 -0800 Subject: [PATCH] scan: Introduce string_lexer for simple tokenizing of a string. The following commit will introduce a user outside of the tests. --- src/language/lexer/scan.c | 55 ++++++++++++++++++++++++++ src/language/lexer/scan.h | 16 +++++++- tests/language/lexer/scan-test.c | 66 ++++---------------------------- 3 files changed, 78 insertions(+), 59 deletions(-) diff --git a/src/language/lexer/scan.c b/src/language/lexer/scan.c index 5e3d59e19c..6e9fc618e1 100644 --- a/src/language/lexer/scan.c +++ b/src/language/lexer/scan.c @@ -592,3 +592,58 @@ scanner_push (struct scanner *scanner, enum segment_type type, NOT_REACHED (); } + +/* Initializes SLEX for parsing INPUT in the specified MODE. + + SLEX has no internal state to free, but it retains a reference to INPUT, so + INPUT must not be modified or freed while SLEX is still in use. */ +void +string_lexer_init (struct string_lexer *slex, const char *input, + enum segmenter_mode mode) +{ + slex->input = input; + slex->length = strlen (input) + 1; + slex->offset = 0; + segmenter_init (&slex->segmenter, mode); +} + +/* */ +bool +string_lexer_next (struct string_lexer *slex, struct token *token) +{ + struct segmenter saved_segmenter; + size_t saved_offset = 0; + + struct scanner scanner; + + scanner_init (&scanner, token); + for (;;) + { + const char *s = slex->input + slex->offset; + size_t left = slex->length - slex->offset; + enum segment_type type; + int n; + + n = segmenter_push (&slex->segmenter, s, left, &type); + assert (n >= 0); + + slex->offset += n; + switch (scanner_push (&scanner, type, ss_buffer (s, n), token)) + { + case SCAN_BACK: + slex->segmenter = saved_segmenter; + slex->offset = saved_offset; + /* Fall through. */ + case SCAN_DONE: + return token->type != T_STOP; + + case SCAN_MORE: + break; + + case SCAN_SAVE: + saved_segmenter = slex->segmenter; + saved_offset = slex->offset; + break; + } + } +} diff --git a/src/language/lexer/scan.h b/src/language/lexer/scan.h index fdb50801c4..73f208033b 100644 --- a/src/language/lexer/scan.h +++ b/src/language/lexer/scan.h @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 2010, 2011, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -89,5 +89,19 @@ enum scan_result void scanner_init (struct scanner *, struct token *); enum scan_result scanner_push (struct scanner *, enum segment_type, struct substring, struct token *); + +/* A simplified lexer for handling syntax in a string. */ + +struct string_lexer + { + const char *input; + size_t length; + size_t offset; + struct segmenter segmenter; + }; + +void string_lexer_init (struct string_lexer *, const char *input, + enum segmenter_mode); +bool string_lexer_next (struct string_lexer *, struct token *); #endif /* scan.h */ diff --git a/tests/language/lexer/scan-test.c b/tests/language/lexer/scan-test.c index 313fa16398..84eff8171f 100644 --- a/tests/language/lexer/scan-test.c +++ b/tests/language/lexer/scan-test.c @@ -45,21 +45,12 @@ static void usage (void) NO_RETURN; int main (int argc, char *argv[]) { - struct segment - { - enum segment_type type; - struct substring string; - }; - - size_t offset; const char *file_name; - char *input; - struct segmenter s; - struct segment *segs; - size_t n_segs, allocated_segs; size_t length; - size_t i; - int n; + char *input; + + struct string_lexer slex; + bool more; set_program_name (argv[0]); file_name = parse_options (argc, argv); @@ -76,53 +67,12 @@ main (int argc, char *argv[]) input[length++] = '\n'; input[length++] = '\0'; - segs = NULL; - n_segs = allocated_segs = 0; - - segmenter_init (&s, mode); - for (offset = 0; offset < length; offset += n) + string_lexer_init (&slex, input, mode); + do { - enum segment_type type; - - n = segmenter_push (&s, input + offset, length - offset, &type); - assert (n >= 0); - assert (offset + n <= length); - - if (n_segs >= allocated_segs) - segs = x2nrealloc (segs, &allocated_segs, sizeof *segs); - - segs[n_segs].type = type; - segs[n_segs].string.string = input + offset; - segs[n_segs].string.length = n; - n_segs++; - } - - for (i = 0; i < n_segs; ) - { - enum scan_result result; - struct scanner scanner; struct token token; - int saved = -1; - - scanner_init (&scanner, &token); - do - { - struct segment *seg; - - assert (i < n_segs); - seg = &segs[i++]; - result = scanner_push (&scanner, seg->type, seg->string, &token); - if (result == SCAN_SAVE) - saved = i; - } - while (result == SCAN_MORE || result == SCAN_SAVE); - - if (result == SCAN_BACK) - { - assert (saved >= 0); - i = saved; - } + more = string_lexer_next (&slex, &token); printf ("%s", scan_type_to_string (token.type)); if (token.number != 0.0) @@ -138,9 +88,9 @@ main (int argc, char *argv[]) token_destroy (&token); } + while (more); free (input); - free (segs); return 0; } -- 2.30.2