From cfc02088ad19e0efa0ebd8becc84854c7989a3a8 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 24 Aug 2024 08:32:09 -0700 Subject: [PATCH] work --- rust/pspp/src/command.rs | 33 +++-- rust/pspp/src/engine.rs | 1 + rust/pspp/src/lex/lexer.rs | 189 ++++++++------------------- rust/pspp/src/lex/scan/mod.rs | 98 +++++++------- rust/pspp/src/lex/scan/test.rs | 52 ++++++-- rust/pspp/src/lex/segment/mod.rs | 207 +++++++++++++++--------------- rust/pspp/src/lex/segment/test.rs | 54 +------- 7 files changed, 279 insertions(+), 355 deletions(-) diff --git a/rust/pspp/src/command.rs b/rust/pspp/src/command.rs index a40aae5f2a..ce6bde716c 100644 --- a/rust/pspp/src/command.rs +++ b/rust/pspp/src/command.rs @@ -86,7 +86,7 @@ struct Variable { fn parse_descriptives(context: &mut Context) { let subcommands = collect_subcommands(context); for subcommand in subcommands { - + } } @@ -108,16 +108,16 @@ fn commands() -> &'static [Command] { println!("hi"); }), }, -/* - Command { - allowed_states: State::Data.into(), - enhanced_only: false, - testing_only: false, - no_abbrev: false, - name: "DESCRIPTIVES", - run: Box::new(parse_descriptives), - }, -*/ + /* + Command { + allowed_states: State::Data.into(), + enhanced_only: false, + testing_only: false, + no_abbrev: false, + name: "DESCRIPTIVES", + run: Box::new(parse_descriptives), + }, + */ ] } @@ -215,28 +215,37 @@ pub fn end_of_command(context: &Context) -> Result { } fn parse_in_state(lexer: &mut Lexer, error: &Box, _state: State) { + println!("{}:{}", file!(), line!()); match lexer.token() { Token::End | Token::EndCommand => (), _ => { + println!("{}:{}", file!(), line!()); if let Ok((command, n_tokens)) = parse_command_name(lexer, error) { for _ in 0..n_tokens { lexer.get(); } + println!("{}:{}", file!(), line!()); let mut context = Context { error, lexer, command_name: Some(command.name), }; + println!("{}:{}", file!(), line!()); (command.run)(&mut context); + println!("{}:{}", file!(), line!()); end_of_command(&context); + println!("{}:{}", file!(), line!()); } - lexer.interactive_reset(); + println!("{}:{}", file!(), line!()); lexer.discard_rest_of_command(); + println!("{}:{}", file!(), line!()); } } + println!("{}:{}", file!(), line!()); while let Token::EndCommand = lexer.token() { lexer.get(); } + println!("{}:{}", file!(), line!()); } pub fn parse(lexer: &mut Lexer, error: &Box) { diff --git a/rust/pspp/src/engine.rs b/rust/pspp/src/engine.rs index f48c1948c1..2ccb1a6832 100644 --- a/rust/pspp/src/engine.rs +++ b/rust/pspp/src/engine.rs @@ -18,6 +18,7 @@ impl Engine { self.lexer.append(source); self.lexer.get(); while self.lexer.token() != &Token::End { + println!("{}:{}", file!(), line!()); let error: Box = Box::new(|diagnostic| { println!("{diagnostic}"); }); diff --git a/rust/pspp/src/lex/lexer.rs b/rust/pspp/src/lex/lexer.rs index f592da1144..928115af61 100644 --- a/rust/pspp/src/lex/lexer.rs +++ b/rust/pspp/src/lex/lexer.rs @@ -16,9 +16,9 @@ use thiserror::Error as ThisError; use unicode_width::{UnicodeWidthChar, UnicodeWidthStr}; use crate::{ + lex::scan::Incomplete, macros::{macro_tokens_to_syntax, MacroSet, ParseStatus, Parser}, message::{Category, Diagnostic, Location, Point, Severity}, - prompt::PromptStyle, settings::Settings, }; @@ -74,16 +74,6 @@ pub struct Source { /// `None` if this reader is not associated with a file. file_name: Option>, - /// True if we've reached EOF already. - eof: bool, - - /// Read some input from the source. If successful, returns the input that - /// was read. At end of file or on error, returns an empty string. - /// - /// `prompt` provides a hint to interactive readers as to what kind of - /// syntax is being read right now. - read: Box String>, - /// Source file contents. buffer: String, @@ -104,6 +94,8 @@ pub struct Source { /// end up in `parse`. merge: VecDeque, + eof: bool, + /// Tokens available to the client for parsing. parse: Vec, @@ -121,14 +113,13 @@ impl Default for Source { error_handling: ErrorHandling::default(), encoding: UTF_8, file_name: None, - eof: false, - read: Box::new(|_| String::new()), buffer: String::new(), journal_line: 0, seg_pos: 0, lines: vec![0], pp: VecDeque::new(), merge: VecDeque::new(), + eof: false, parse: Vec::new(), parse_ofs: 0, segmenter: Segmenter::new(Mode::default(), false), @@ -188,45 +179,13 @@ impl Source { } } - pub fn for_function( - read: Box String>, - file_name: Option, - encoding: &'static Encoding, - syntax: Mode, - error_handling: ErrorHandling, - ) -> Self { - Self { - read, - file_name: file_name.map(Arc::new), - encoding, - segmenter: Segmenter::new(syntax, false), - error_handling, - ..Self::default() - } - } - - fn read(&mut self) { - loop { - let prompt = self.segmenter.prompt(); - let s = (self.read)(prompt); - if s.is_empty() { - self.eof = true; - return; - } - self.buffer.push_str(&s); - if self.buffer[self.seg_pos..].contains('\n') { - return; - } - } - } - fn try_get_pp(&mut self, context: &Context) -> bool { - let (seg_len, seg_type) = loop { - if let Ok(result) = self.segmenter.push(&self.buffer[self.seg_pos..], self.eof) { - break result; - } - - debug_assert!(!self.eof); - self.read(); + fn get_pp(&mut self, context: &Context) -> bool { + let Some((seg_len, seg_type)) = self + .segmenter + .push(&self.buffer[self.seg_pos..], true) + .unwrap() + else { + return false; }; let pos = self.seg_pos..self.seg_pos + seg_len; @@ -267,16 +226,12 @@ impl Source { let pos = pos.start..pos.end; match scan_token { None => false, - Some(ScanToken::Token(Token::End)) => { - self.pp.push_back(LexToken { - token: Token::EndCommand, - pos, - macro_rep: None, - }); - self.eof = true; - true - } Some(ScanToken::Token(token)) => { + let token = if let Token::End = token { + Token::EndCommand + } else { + token + }; self.pp.push_back(LexToken { token, pos, @@ -298,19 +253,15 @@ impl Source { } } - fn get_pp(&mut self, context: &Context) -> bool { - while !self.eof { - if self.try_get_pp(context) { - return true; - } - } - false - } - - fn try_get_merge(&mut self, context: &Context) -> bool { + fn get_merge(&mut self, context: &Context) -> bool { + println!("{}:{}", file!(), line!()); if self.pp.is_empty() && !self.get_pp(context) { return false; } + println!("{}:{} pp.len()={}", file!(), line!(), self.pp.len()); + for pp in &self.pp { + println!("{:?}", &pp.token); + } if !Settings::global().macros.expand { self.merge.append(&mut self.pp); @@ -385,35 +336,23 @@ impl Source { retval } - /// Attempts to obtain at least one new token into `self.merge`. - /// - /// Returns true if successful, false on failure. In the latter case, this source - /// exhausted and 'self.eof' is now true. - fn get_merge(&mut self, context: &Context) -> bool { - while !self.eof { - if self.try_get_merge(context) { - return true; - } - } - false - } - - fn get_parse__(&mut self, context: &Context) -> bool { - for i in 0.. { - if self.merge.len() <= i && !self.get_merge(context) { - // We always get a `Token::EndCommand` at the end of an input - // file and the merger should return `Some(...)` for that token. - debug_assert_eq!(self.merge.len(), 0); - return false; - } - - match ScanToken::merge(&self.merge) { - None => (), - Some(MergeResult::Copy) => { + fn get_parse(&mut self, context: &Context) -> bool { + loop { + match ScanToken::merge(|index| { + if let Some(token) = self.merge.get(index) { + Ok(Some(&token.token)) + } else if self.eof { + Ok(None) + } else { + Err(Incomplete) + } + }) { + Ok(Some(MergeResult::Copy)) => { + println!("{}:{}", file!(), line!()); self.parse.push(self.merge.pop_front().unwrap()); return true; } - Some(MergeResult::Expand { n, token }) => { + Ok(Some(MergeResult::Expand { n, token })) => { let first = &self.merge[0]; let last = &self.merge[n - 1]; self.parse.push(LexToken { @@ -430,16 +369,18 @@ impl Source { }, }); self.merge.drain(..n); + println!("{}:{}", file!(), line!()); return true; } + Ok(None) => return false, + Err(Incomplete) => { + debug_assert!(!self.eof); + if !self.get_merge(context) { + self.eof = true; + } + } } } - unreachable!(); - } - - fn get_parse(&mut self, context: &Context) -> bool { - // XXX deal with accumulated messages - self.get_parse__(context) } fn offset_to_point(&self, offset: usize) -> Point { @@ -549,7 +490,7 @@ impl Source { } fn is_empty(&self) -> bool { - self.buffer.is_empty() && self.eof + self.buffer.is_empty() } fn diagnostic( @@ -602,23 +543,6 @@ impl Source { text: s, } } - - fn interactive_reset(&mut self) { - if self.error_handling == ErrorHandling::Terminal { - let Source { - error_handling, - encoding, - read, - .. - } = mem::take(self); - *self = Self { - error_handling, - encoding, - read, - ..Source::default() - }; - } - } } fn ellipsize(s: &str) -> Cow { @@ -706,13 +630,20 @@ impl Lexer { } while self.source.parse_ofs == self.source.parse.len() { + println!("{}:{}", file!(), line!()); let context = Context { macros: &self.macros, error: &self.error, }; - if !self.source.get_parse(&context) && !self.pop_stack() { - return &Token::End; + println!("{}:{}", file!(), line!()); + if !self.source.get_parse(&context) { + println!("{}:{}", file!(), line!()); + if !self.pop_stack() { + println!("{}:{}", file!(), line!()); + return &Token::End; + } } + println!("{}:{}", file!(), line!()); } self.source.token() } @@ -810,18 +741,6 @@ impl Lexer { } } - /// If the source that the lexer is currently reading has error mode - /// [ErrorHandling::Terminal], discards all buffered input and tokens, so - /// that the next token to be read comes directly from whatever is next read - /// from the stream. - /// - /// It makes sense to call this function after encountering an error in a - /// command entered on the console, because usually the user would prefer - /// not to have cascading errors. - pub fn interactive_reset(&mut self) { - self.source.interactive_reset() - } - /// Advances past any tokens up to [Token::EndCommand] or [Token::End]. pub fn discard_rest_of_command(&mut self) { while !matches!(self.token(), Token::EndCommand | Token::End) { diff --git a/rust/pspp/src/lex/scan/mod.rs b/rust/pspp/src/lex/scan/mod.rs index 05577a9259..c4d0814835 100644 --- a/rust/pspp/src/lex/scan/mod.rs +++ b/rust/pspp/src/lex/scan/mod.rs @@ -16,7 +16,7 @@ use super::{ segment::{Mode, Segment, Segmenter}, token::{Punct, Token}, }; -use std::{borrow::Borrow, collections::VecDeque}; +use std::collections::VecDeque; use thiserror::Error as ThisError; #[derive(ThisError, Clone, Debug, PartialEq, Eq)] @@ -89,6 +89,9 @@ pub enum MergeResult { }, } +#[derive(Copy, Clone, Debug)] +pub struct Incomplete; + impl ScanToken { pub fn from_segment(s: &str, segment: Segment) -> Option { match segment { @@ -236,7 +239,6 @@ impl ScanToken { Segment::StartCommand | Segment::SeparateCommands | Segment::EndCommand => { Some(Self::Token(Token::EndCommand)) } - Segment::End => Some(Self::Token(Token::End)), Segment::ExpectedQuote => Some(Self::Error(ScanError::ExpectedQuote)), Segment::ExpectedExponent => { Some(Self::Error(ScanError::ExpectedExponent(String::from(s)))) @@ -267,62 +269,52 @@ impl ScanToken { /// white space, as a negative number. It's only needed if we want /// intervening comments to be allowed or for part of the negative number /// token to be produced by macro expansion. - pub fn merge(tokens: &T) -> Option + pub fn merge<'a, F>(get_token: F) -> Result, Incomplete> where - T: Tokens, + F: Fn(usize) -> Result, Incomplete>, { - match tokens.get(0)? { - Token::Punct(Punct::Dash) => match tokens.get(1)? { - Token::Number(number) if number.is_sign_positive() => { + let Some(token) = get_token(0)? else { + return Ok(None); + }; + match token { + Token::Punct(Punct::Dash) => match get_token(1)? { + Some(Token::Number(number)) if number.is_sign_positive() => { let number = *number; - return Some(MergeResult::Expand { + Ok(Some(MergeResult::Expand { n: 2, token: Token::Number(-number), - }); + })) } - _ => Some(MergeResult::Copy), + _ => Ok(Some(MergeResult::Copy)), }, Token::String(_) => { let mut i = 0; - while matches!(tokens.get(i * 2 + 1)?, Token::Punct(Punct::Plus)) - && matches!(tokens.get(i * 2 + 2)?, Token::String(_)) + while matches!(get_token(i * 2 + 1)?, Some(Token::Punct(Punct::Plus))) + && matches!(get_token(i * 2 + 2)?, Some(Token::String(_))) { i += 1; } if i == 0 { - Some(MergeResult::Copy) + Ok(Some(MergeResult::Copy)) } else { let mut output = String::new(); for i in 0..=i { - let Token::String(s) = tokens.get(i * 2).unwrap() else { + let Token::String(s) = get_token(i * 2).unwrap().unwrap() else { unreachable!() }; output.push_str(&s); } - Some(MergeResult::Expand { + Ok(Some(MergeResult::Expand { n: i * 2 + 1, token: Token::String(output), - }) + })) } } - _ => Some(MergeResult::Copy), + _ => Ok(Some(MergeResult::Copy)), } } } -pub trait Tokens { - fn get(&self, index: usize) -> Option<&Token>; -} - -impl Tokens for VecDeque -where - T: Borrow, -{ - fn get(&self, index: usize) -> Option<&Token> { - self.get(index).map(|token| token.borrow()) - } -} - pub struct StringSegmenter<'a> { input: &'a str, segmenter: Segmenter, @@ -342,10 +334,7 @@ impl<'a> Iterator for StringSegmenter<'a> { fn next(&mut self) -> Option { loop { - let (seg_len, seg_type) = self.segmenter.push(self.input, true).unwrap(); - if seg_type == Segment::End { - return None; - } + let (seg_len, seg_type) = self.segmenter.push(self.input, true).unwrap()?; let (s, rest) = self.input.split_at(seg_len); self.input = rest; @@ -358,6 +347,7 @@ impl<'a> Iterator for StringSegmenter<'a> { pub struct StringScanner<'a> { input: &'a str, + eof: bool, segmenter: Segmenter, tokens: VecDeque, } @@ -366,19 +356,28 @@ impl<'a> StringScanner<'a> { pub fn new(input: &'a str, mode: Mode, is_snippet: bool) -> Self { Self { input, + eof: false, segmenter: Segmenter::new(mode, is_snippet), tokens: VecDeque::with_capacity(1), } } - fn merge(&mut self) -> Option { - let result = ScanToken::merge(&self.tokens)?; - match result { - MergeResult::Copy => Some(ScanToken::Token(self.tokens.pop_front().unwrap())), - MergeResult::Expand { n, token } => { + fn merge(&mut self, eof: bool) -> Result, Incomplete> { + match ScanToken::merge(|index| { + if let Some(token) = self.tokens.get(index) { + Ok(Some(token)) + } else if eof { + Ok(None) + } else { + Err(Incomplete) + } + })? { + Some(MergeResult::Copy) => Ok(Some(ScanToken::Token(self.tokens.pop_front().unwrap()))), + Some(MergeResult::Expand { n, token }) => { self.tokens.drain(..n); - Some(ScanToken::Token(token)) + Ok(Some(ScanToken::Token(token))) } + None => Ok(None), } } } @@ -387,14 +386,18 @@ impl<'a> Iterator for StringScanner<'a> { type Item = ScanToken; fn next(&mut self) -> Option { - if let Some(token) = self.merge() { - return Some(token); - } loop { - let (seg_len, seg_type) = self.segmenter.push(self.input, true).unwrap(); - if seg_type == Segment::End && self.tokens.is_empty() { - return None; + if let Ok(Some(token)) = self.merge(self.eof) { + return Some(token); } + + let Some((seg_len, seg_type)) = self.segmenter.push(self.input, true).unwrap() else { + if !self.eof { + self.eof = true; + self.tokens.push_back(Token::End); + } + return self.merge(true).unwrap(); + }; let (s, rest) = self.input.split_at(seg_len); self.input = rest; @@ -402,9 +405,6 @@ impl<'a> Iterator for StringScanner<'a> { Some(ScanToken::Error(error)) => return Some(ScanToken::Error(error)), Some(ScanToken::Token(token)) => { self.tokens.push_back(token); - if let Some(token) = self.merge() { - return Some(token); - } } None => (), } diff --git a/rust/pspp/src/lex/scan/test.rs b/rust/pspp/src/lex/scan/test.rs index 0ed9be6555..1e0f1007e2 100644 --- a/rust/pspp/src/lex/scan/test.rs +++ b/rust/pspp/src/lex/scan/test.rs @@ -1,7 +1,10 @@ -use crate::{identifier::Identifier, lex::{ - segment::Mode, - token::{Punct, Token}, -}}; +use crate::{ + identifier::Identifier, + lex::{ + segment::Mode, + token::{Punct, Token}, + }, +}; use super::{ScanError, ScanToken, StringScanner}; @@ -16,6 +19,7 @@ fn print_token(token: &Token) { } } +#[track_caller] fn check_scan(input: &str, mode: Mode, expected: &[ScanToken]) { let tokens = StringScanner::new(input, mode, false).collect::>(); @@ -84,6 +88,7 @@ WXYZ. /* unterminated end of line comment ScanToken::Token(Token::EndCommand), ScanToken::Error(ScanError::UnexpectedChar('�')), ScanToken::Token(Token::EndCommand), + ScanToken::Token(Token::End), ], ); } @@ -140,6 +145,7 @@ and. with. ScanToken::Token(Token::Id(Identifier::new("and.").unwrap())), ScanToken::Token(Token::Punct(Punct::With)), ScanToken::Token(Token::EndCommand), + ScanToken::Token(Token::End), ], ); } @@ -202,6 +208,7 @@ fn test_punctuation() { ScanToken::Token(Token::Punct(Punct::LCurly)), ScanToken::Token(Token::Punct(Punct::RCurly)), ScanToken::Token(Token::Punct(Punct::Not)), + ScanToken::Token(Token::End), ], ); } @@ -253,6 +260,7 @@ fn test_positive_numbers() { ScanToken::Token(Token::Id(Identifier::new("e1").unwrap())), ScanToken::Error(ScanError::ExpectedExponent(String::from("1e+"))), ScanToken::Error(ScanError::ExpectedExponent(String::from("1e-"))), + ScanToken::Token(Token::End), ], ); } @@ -308,6 +316,7 @@ fn test_negative_numbers() { ScanToken::Error(ScanError::ExpectedExponent(String::from("-1e-"))), ScanToken::Token(Token::Number(-1.0)), ScanToken::Token(Token::EndCommand), + ScanToken::Token(Token::End), ], ); } @@ -396,6 +405,7 @@ fn test_shbang() { ScanToken::Token(Token::Id(Identifier::new("bin").unwrap())), ScanToken::Token(Token::Punct(Punct::Slash)), ScanToken::Token(Token::Id(Identifier::new("pspp").unwrap())), + ScanToken::Token(Token::End), ], ); } @@ -443,6 +453,7 @@ next command. ScanToken::Token(Token::Id(Identifier::new("command").unwrap())), ScanToken::Token(Token::EndCommand), ScanToken::Token(Token::EndCommand), + ScanToken::Token(Token::End), ], ); } @@ -482,6 +493,7 @@ second paragraph. ScanToken::Token(Token::String(String::from("second paragraph."))), ScanToken::Token(Token::EndCommand), ScanToken::Token(Token::EndCommand), + ScanToken::Token(Token::End), ], ); } @@ -510,6 +522,7 @@ FILE /* ScanToken::Token(Token::Id(Identifier::new("lab").unwrap())), ScanToken::Token(Token::String(String::from("not quoted here either"))), ScanToken::Token(Token::EndCommand), + ScanToken::Token(Token::End), ], ); } @@ -548,6 +561,7 @@ end data ScanToken::Token(Token::Id(Identifier::new("end").unwrap())), ScanToken::Token(Token::Id(Identifier::new("data").unwrap())), ScanToken::Token(Token::EndCommand), + ScanToken::Token(Token::End), ], ); } @@ -590,6 +604,7 @@ end ScanToken::Token(Token::Id(Identifier::new("end").unwrap())), ScanToken::Token(Token::Id(Identifier::new("repeat").unwrap())), ScanToken::Token(Token::EndCommand), + ScanToken::Token(Token::End), ], ); } @@ -646,6 +661,7 @@ end repeat ScanToken::Token(Token::String(String::from(" inner command"))), ScanToken::Token(Token::Id(Identifier::new("end").unwrap())), ScanToken::Token(Token::Id(Identifier::new("repeat").unwrap())), + ScanToken::Token(Token::End), ], ); } @@ -683,16 +699,20 @@ fourth command. ScanToken::Token(Token::Id(Identifier::new("fifth").unwrap())), ScanToken::Token(Token::Id(Identifier::new("command").unwrap())), ScanToken::Token(Token::EndCommand), + ScanToken::Token(Token::End), ], ); } mod define { - use crate::{identifier::Identifier, lex::{ - scan::ScanToken, - segment::Mode, - token::{Punct, Token}, - }}; + use crate::{ + identifier::Identifier, + lex::{ + scan::ScanToken, + segment::Mode, + token::{Punct, Token}, + }, + }; use super::check_scan; @@ -712,6 +732,7 @@ var1 var2 var3 ScanToken::Token(Token::String(String::from("var1 var2 var3"))), ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())), ScanToken::Token(Token::EndCommand), + ScanToken::Token(Token::End), ], ); } @@ -731,6 +752,7 @@ var1 var2 var3 ScanToken::Token(Token::String(String::from(" var1 var2 var3"))), ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())), ScanToken::Token(Token::EndCommand), + ScanToken::Token(Token::End), ], ); } @@ -750,6 +772,7 @@ var1 var2 var3!enddefine. ScanToken::Token(Token::String(String::from("var1 var2 var3"))), ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())), ScanToken::Token(Token::EndCommand), + ScanToken::Token(Token::End), ], ); } @@ -768,6 +791,7 @@ var1 var2 var3!enddefine. ScanToken::Token(Token::String(String::from("var1 var2 var3"))), ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())), ScanToken::Token(Token::EndCommand), + ScanToken::Token(Token::End), ], ); } @@ -786,6 +810,7 @@ var1 var2 var3!enddefine. ScanToken::Token(Token::Punct(Punct::RParen)), ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())), ScanToken::Token(Token::EndCommand), + ScanToken::Token(Token::End), ], ); } @@ -808,6 +833,7 @@ var1 var2 var3!enddefine. ScanToken::Token(Token::String(String::from(""))), ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())), ScanToken::Token(Token::EndCommand), + ScanToken::Token(Token::End), ], ); } @@ -837,6 +863,7 @@ var1 var2 var3!enddefine. ScanToken::Token(Token::Punct(Punct::RParen)), ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())), ScanToken::Token(Token::EndCommand), + ScanToken::Token(Token::End), ], ); } @@ -870,6 +897,7 @@ var1 var2 var3!enddefine. ScanToken::Token(Token::Punct(Punct::RParen)), ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())), ScanToken::Token(Token::EndCommand), + ScanToken::Token(Token::End), ], ); } @@ -899,6 +927,7 @@ content 2 ScanToken::Token(Token::String(String::from("content 2"))), ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())), ScanToken::Token(Token::EndCommand), + ScanToken::Token(Token::End), ], ); } @@ -920,6 +949,7 @@ data list /x 1. ScanToken::Token(Token::Id(Identifier::new("x").unwrap())), ScanToken::Token(Token::Number(1.0)), ScanToken::Token(Token::EndCommand), + ScanToken::Token(Token::End), ], ); } @@ -943,6 +973,7 @@ data list /x 1. ScanToken::Token(Token::Id(Identifier::new("x").unwrap())), ScanToken::Token(Token::Number(1.0)), ScanToken::Token(Token::EndCommand), + ScanToken::Token(Token::End), ], ); } @@ -968,6 +999,7 @@ data list /x 1. ScanToken::Token(Token::Id(Identifier::new("x").unwrap())), ScanToken::Token(Token::Number(1.0)), ScanToken::Token(Token::EndCommand), + ScanToken::Token(Token::End), ], ); } @@ -991,6 +1023,7 @@ data list /x 1. ScanToken::Token(Token::Id(Identifier::new("x").unwrap())), ScanToken::Token(Token::Number(1.0)), ScanToken::Token(Token::EndCommand), + ScanToken::Token(Token::End), ], ); } @@ -1011,6 +1044,7 @@ content line 2 ScanToken::Token(Token::String(String::from("content line 1"))), ScanToken::Token(Token::String(String::from("content line 2"))), ScanToken::Token(Token::End), + ScanToken::Token(Token::End), ], ); } diff --git a/rust/pspp/src/lex/segment/mod.rs b/rust/pspp/src/lex/segment/mod.rs index befe5b0c53..7aed90e164 100644 --- a/rust/pspp/src/lex/segment/mod.rs +++ b/rust/pspp/src/lex/segment/mod.rs @@ -79,7 +79,6 @@ pub enum Segment { StartCommand, SeparateCommands, EndCommand, - End, ExpectedQuote, ExpectedExponent, UnexpectedChar, @@ -216,17 +215,17 @@ impl Segmenter { &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { + ) -> Result, Incomplete> { if input.is_empty() { if eof { - return Ok((input, Segment::End)); + return Ok(None); } else { return Err(Incomplete); }; } match self.state.0 { - State::Shbang => return self.parse_shbang(input, eof), + State::Shbang => self.parse_shbang(input, eof), State::General => { if self.start_of_line() { self.parse_start_of_line(input, eof) @@ -259,9 +258,10 @@ impl Segmenter { } } - pub fn push(&mut self, input: &str, eof: bool) -> Result<(usize, Segment), Incomplete> { - let (rest, seg_type) = self.push_rest(input, eof)?; - Ok((input.len() - rest.len(), seg_type)) + pub fn push(&mut self, input: &str, eof: bool) -> Result, Incomplete> { + Ok(self + .push_rest(input, eof)? + .map(|(rest, seg_type)| (input.len() - rest.len(), seg_type))) } } @@ -450,12 +450,12 @@ impl Segmenter { &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { + ) -> Result, Incomplete> { if let (Some('#'), rest) = take(input, eof)? { if let (Some('!'), rest) = take(rest, eof)? { let rest = self.parse_full_line(rest, eof)?; self.state = (State::General, Substate::START_OF_COMMAND); - return Ok((rest, Segment::Shbang)); + return Ok(Some((rest, Segment::Shbang))); } } @@ -476,7 +476,7 @@ impl Segmenter { &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { + ) -> Result, Incomplete> { debug_assert_eq!(self.state.0, State::General); debug_assert!(self.start_of_line()); debug_assert!(!input.is_empty()); @@ -488,16 +488,16 @@ impl Segmenter { '+' if is_start_of_string(skip_spaces_and_comments(rest, eof)?, eof)? => { // This `+` is punctuation that may separate pieces of a string. self.state = (State::General, Substate::empty()); - return Ok((rest, Segment::Punct)); + return Ok(Some((rest, Segment::Punct))); } '+' | '-' | '.' => { self.state = (State::General, Substate::START_OF_COMMAND); - return Ok((rest, Segment::StartCommand)); + return Ok(Some((rest, Segment::StartCommand))); } _ if c.is_whitespace() => { if at_end_of_line(input, eof)? { self.state = (State::General, Substate::START_OF_COMMAND); - return Ok((input, Segment::SeparateCommands)); + return Ok(Some((input, Segment::SeparateCommands))); } } _ => { @@ -505,7 +505,7 @@ impl Segmenter { && !self.state.1.contains(Substate::START_OF_COMMAND) { self.state = (State::General, Substate::START_OF_COMMAND); - return Ok((input, Segment::StartCommand)); + return Ok(Some((input, Segment::StartCommand))); } } } @@ -516,7 +516,7 @@ impl Segmenter { &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { + ) -> Result, Incomplete> { debug_assert!(self.state.0 == State::General); debug_assert!(!self.state.1.contains(Substate::START_OF_LINE)); let (Some(c), rest) = take(input, eof)? else { @@ -525,18 +525,18 @@ impl Segmenter { match c { '\r' | '\n' if is_end_of_line(input, eof)? => { self.state.1 |= Substate::START_OF_LINE; - Ok(( + Ok(Some(( self.parse_newline(input, eof).unwrap().unwrap(), Segment::Newline, - )) + ))) } '/' => { if let (Some('*'), rest) = take(rest, eof)? { let rest = skip_comment(rest, eof)?; - return Ok((rest, Segment::Comment)); + return Ok(Some((rest, Segment::Comment))); } else { self.state.1 = Substate::empty(); - return Ok((rest, Segment::Punct)); + return Ok(Some((rest, Segment::Punct))); } } '-' => { @@ -555,11 +555,11 @@ impl Segmenter { None | Some(_) => (), } self.state.1 = Substate::empty(); - return Ok((rest, Segment::Punct)); + return Ok(Some((rest, Segment::Punct))); } '(' | ')' | '[' | ']' | '{' | '}' | ',' | '=' | ';' | ':' | '&' | '|' | '+' => { self.state.1 = Substate::empty(); - return Ok((rest, Segment::Punct)); + return Ok(Some((rest, Segment::Punct))); } '*' => { if self.state.1.contains(Substate::START_OF_COMMAND) { @@ -574,11 +574,11 @@ impl Segmenter { '~' => self.parse_digraph(&['='], rest, eof), '.' if at_end_of_line(rest, eof)? => { self.state.1 = Substate::START_OF_COMMAND; - Ok((rest, Segment::EndCommand)) + Ok(Some((rest, Segment::EndCommand))) } '.' => match take(rest, eof)? { (Some(c), _) if c.is_ascii_digit() => self.parse_number(input, eof), - _ => Ok((rest, Segment::Punct)), + _ => Ok(Some((rest, Segment::Punct))), }, '0'..='9' => self.parse_number(input, eof), 'u' | 'U' => self.maybe_parse_string(Segment::UnicodeString, (input, rest), eof), @@ -587,20 +587,20 @@ impl Segmenter { '!' => { let (c, rest2) = take(rest, eof)?; match c { - Some('*') => Ok((rest2, Segment::Punct)), + Some('*') => Ok(Some((rest2, Segment::Punct))), Some(_) => self.parse_id(input, eof), - None => Ok((rest, Segment::Punct)), + None => Ok(Some((rest, Segment::Punct))), } } - c if c.is_whitespace() => Ok((skip_spaces(rest, eof)?, Segment::Spaces)), + c if c.is_whitespace() => Ok(Some((skip_spaces(rest, eof)?, Segment::Spaces))), c if c.may_start_id() => self.parse_id(input, eof), '#'..='~' if c != '\\' && c != '^' => { self.state.1 = Substate::empty(); - Ok((rest, Segment::Punct)) + Ok(Some((rest, Segment::Punct))) } _ => { self.state.1 = Substate::empty(); - Ok((rest, Segment::UnexpectedChar)) + Ok(Some((rest, Segment::UnexpectedChar))) } } } @@ -610,14 +610,14 @@ impl Segmenter { quote: char, mut input: &'a str, eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { + ) -> Result, Incomplete> { while let (Some(c), rest) = take(input, eof)? { match c { _ if c == quote => { let (c, rest2) = take(rest, eof)?; if c != Some(quote) { self.state.1 = Substate::empty(); - return Ok((rest, segment)); + return Ok(Some((rest, segment))); } input = rest2; } @@ -626,14 +626,14 @@ impl Segmenter { } } self.state.1 = Substate::empty(); - Ok((input, Segment::ExpectedQuote)) + Ok(Some((input, Segment::ExpectedQuote))) } fn maybe_parse_string<'a>( &mut self, segment: Segment, input: (&'a str, &'a str), eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { + ) -> Result, Incomplete> { match take(input.1, eof)? { (Some(c), rest) if c == '\'' || c == '"' => self.parse_string(segment, c, rest, eof), _ => self.parse_id(input.0, eof), @@ -646,7 +646,9 @@ impl Segmenter { ) -> Result<(&'a str, &'a str), Incomplete> { let mut sub = Segmenter::new(self.mode, true); loop { - let (seg_len, seg_type) = sub.push(input, eof)?; + let Some((seg_len, seg_type)) = sub.push(input, eof)? else { + return Ok((input, input)); + }; let (segment, rest) = input.split_at(seg_len); match seg_type { Segment::Shbang | Segment::Spaces | Segment::Comment | Segment::Newline => (), @@ -670,7 +672,6 @@ impl Segmenter { | Segment::StartCommand | Segment::SeparateCommands | Segment::EndCommand - | Segment::End | Segment::ExpectedQuote | Segment::ExpectedExponent | Segment::UnexpectedChar => return Ok(("", rest)), @@ -682,7 +683,7 @@ impl Segmenter { &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { + ) -> Result, Incomplete> { let (Some(_), mut end) = take(input, eof).unwrap() else { unreachable!() }; @@ -705,18 +706,18 @@ impl Segmenter { return self.parse_comment_1(input, eof); } else if id_match("DOCUMENT", identifier) { self.state = (State::Document1, Substate::empty()); - return Ok((input, Segment::StartDocument)); + return Ok(Some((input, Segment::StartDocument))); } else if id_match_n("DEFINE", identifier, 6) { self.state = (State::Define1, Substate::empty()); } else if id_match("FILE", identifier) { if id_match("LABEL", self.next_id_in_command(rest, eof)?.0) { self.state = (State::FileLabel1, Substate::empty()); - return Ok((rest, Segment::Identifier)); + return Ok(Some((rest, Segment::Identifier))); } } else if id_match("DO", identifier) { if id_match("REPEAT", self.next_id_in_command(rest, eof)?.0) { self.state = (State::DoRepeat1, Substate::empty()); - return Ok((rest, Segment::Identifier)); + return Ok(Some((rest, Segment::Identifier))); } } else if id_match("BEGIN", identifier) { let (next_id, rest2) = self.next_id_in_command(rest, eof)?; @@ -737,43 +738,43 @@ impl Segmenter { }, Substate::empty(), ); - return Ok((rest, Segment::Identifier)); + return Ok(Some((rest, Segment::Identifier))); } } } } self.state.1 = Substate::empty(); - Ok(( + Ok(Some(( rest, if identifier != "!" { Segment::Identifier } else { Segment::Punct }, - )) + ))) } fn parse_digraph<'a>( &mut self, seconds: &[char], input: &'a str, eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { + ) -> Result, Incomplete> { let (c, rest) = take(input, eof)?; self.state.1 = Substate::empty(); - Ok(( + Ok(Some(( match c { Some(c) if seconds.contains(&c) => rest, _ => input, }, Segment::Punct, - )) + ))) } fn parse_number<'a>( &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { + ) -> Result, Incomplete> { let mut input = skip_digits(input, eof)?; if let Some(rest) = match_char(|c| c == '.', input, eof)? { let rest2 = skip_digits(rest, eof)?; @@ -786,18 +787,18 @@ impl Segmenter { let rest2 = skip_digits(rest, eof)?; if rest2.len() == rest.len() { self.state.1 = Substate::empty(); - return Ok((rest, Segment::ExpectedExponent)); + return Ok(Some((rest, Segment::ExpectedExponent))); } input = rest2; } self.state.1 = Substate::empty(); - Ok((input, Segment::Number)) + Ok(Some((input, Segment::Number))) } fn parse_comment_1<'a>( &mut self, mut input: &'a str, eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { + ) -> Result, Incomplete> { enum CommentState<'a> { Blank, NotBlank, @@ -808,7 +809,7 @@ impl Segmenter { let (Some(c), rest) = take(input, eof)? else { // End of file. self.state = (State::General, Substate::START_OF_COMMAND); - return Ok((input, Segment::SeparateCommands)); + return Ok(Some((input, Segment::SeparateCommands))); }; match c { '.' => state = CommentState::Period(input), @@ -817,17 +818,17 @@ impl Segmenter { CommentState::Blank => { // Blank line ends comment command. self.state = (State::General, Substate::START_OF_COMMAND); - return Ok((input, Segment::SeparateCommands)); + return Ok(Some((input, Segment::SeparateCommands))); } CommentState::Period(period) => { // '.' at end of line ends comment command. self.state = (State::General, Substate::empty()); - return Ok((period, Segment::CommentCommand)); + return Ok(Some((period, Segment::CommentCommand))); } CommentState::NotBlank => { // Comment continues onto next line. self.state = (State::Comment2, Substate::empty()); - return Ok((input, Segment::CommentCommand)); + return Ok(Some((input, Segment::CommentCommand))); } } } @@ -841,7 +842,7 @@ impl Segmenter { &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { + ) -> Result, Incomplete> { let rest = self.parse_newline(input, eof)?.unwrap(); let new_command = match take(rest, eof)?.0 { @@ -857,18 +858,18 @@ impl Segmenter { } else { self.state = (State::Comment1, Substate::empty()); } - Ok((rest, Segment::Newline)) + Ok(Some((rest, Segment::Newline))) } fn parse_document_1<'a>( &mut self, mut input: &'a str, eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { + ) -> Result, Incomplete> { let mut end_cmd = false; loop { let (Some(c), rest) = take(input, eof)? else { self.state = (State::Document3, Substate::empty()); - return Ok((input, Segment::Document)); + return Ok(Some((input, Segment::Document))); }; match c { '.' => end_cmd = true, @@ -878,7 +879,7 @@ impl Segmenter { } else { State::Document2 }; - return Ok((input, Segment::Document)); + return Ok(Some((input, Segment::Document))); } c if !c.is_whitespace() => end_cmd = false, _ => (), @@ -890,21 +891,21 @@ impl Segmenter { &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { + ) -> Result, Incomplete> { let rest = self.parse_newline(input, eof)?.unwrap(); self.state = (State::Document1, Substate::empty()); - Ok((rest, Segment::Newline)) + Ok(Some((rest, Segment::Newline))) } fn parse_document_3<'a>( &mut self, input: &'a str, _eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { + ) -> Result, Incomplete> { self.state = ( State::General, Substate::START_OF_COMMAND | Substate::START_OF_LINE, ); - Ok((input, Segment::EndCommand)) + Ok(Some((input, Segment::EndCommand))) } fn quoted_file_label(input: &str, eof: bool) -> Result { let input = skip_spaces_and_comments(input, eof)?; @@ -917,12 +918,12 @@ impl Segmenter { &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { + ) -> Result, Incomplete> { let mut sub = Segmenter { state: (State::General, self.state.1), ..*self }; - let (rest, segment) = sub.push_rest(input, eof)?; + let (rest, segment) = sub.push_rest(input, eof)?.unwrap(); if segment == Segment::Identifier { let id = &input[..input.len() - rest.len()]; debug_assert!(id_match("LABEL", id), "{id} should be LABEL"); @@ -934,29 +935,29 @@ impl Segmenter { } else { self.state.1 = sub.state.1; } - Ok((rest, segment)) + Ok(Some((rest, segment))) } fn parse_file_label_2<'a>( &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { + ) -> Result, Incomplete> { let input = skip_spaces(input, eof)?; self.state = (State::FileLabel3, Substate::empty()); - Ok((input, Segment::Spaces)) + Ok(Some((input, Segment::Spaces))) } fn parse_file_label_3<'a>( &mut self, mut input: &'a str, eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { + ) -> Result, Incomplete> { let mut end_cmd = None; loop { let (c, rest) = take(input, eof)?; match c { None | Some('\n') | Some('\r') if is_end_of_line(input, eof)? => { self.state = (State::General, Substate::empty()); - return Ok((end_cmd.unwrap_or(input), Segment::UnquotedString)); + return Ok(Some((end_cmd.unwrap_or(input), Segment::UnquotedString))); } None => unreachable!(), Some('.') => end_cmd = Some(input), @@ -970,7 +971,7 @@ impl Segmenter { &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { + ) -> Result, Incomplete> { let mut sub = Segmenter { mode: self.mode, state: (State::General, self.state.1), @@ -987,8 +988,8 @@ impl Segmenter { &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { - let (rest, segment) = self.subparse(input, eof)?; + ) -> Result, Incomplete> { + let (rest, segment) = self.subparse(input, eof)?.unwrap(); if segment == Segment::SeparateCommands { // We reached a blank line that separates the head from the body. self.state.0 = State::DoRepeat2; @@ -997,7 +998,7 @@ impl Segmenter { self.state.0 = State::DoRepeat3; self.nest = 1; } - Ok((rest, segment)) + Ok(Some((rest, segment))) } /// We are segmenting a `DO REPEAT` command, currently reading a blank line /// that separates the head from the body. @@ -1005,14 +1006,14 @@ impl Segmenter { &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { - let (rest, segment) = self.subparse(input, eof)?; + ) -> Result, Incomplete> { + let (rest, segment) = self.subparse(input, eof)?.unwrap(); if segment == Segment::Newline { // We reached the body. self.state.0 = State::DoRepeat3; self.nest = 1; } - Ok((rest, segment)) + Ok(Some((rest, segment))) } fn parse_newline<'a>( &mut self, @@ -1070,9 +1071,9 @@ impl Segmenter { &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { + ) -> Result, Incomplete> { if let Some(rest) = self.parse_newline(input, eof)? { - return Ok((rest, Segment::Newline)); + return Ok(Some((rest, Segment::Newline))); } let rest = self.parse_full_line(input, eof)?; let direction = self.check_repeat_command(input, eof)?; @@ -1094,11 +1095,11 @@ impl Segmenter { return self.push_rest(input, eof); } } - return Ok((rest, Segment::DoRepeatCommand)); + return Ok(Some((rest, Segment::DoRepeatCommand))); } - fn parse_do_repeat_4<'a>(&mut self, input: &'a str) -> Result<(&'a str, Segment), Incomplete> { + fn parse_do_repeat_4<'a>(&mut self, input: &'a str) -> Result, Incomplete> { self.state.0 = State::DoRepeat3; - Ok((input, Segment::DoRepeatOverflow)) + Ok(Some((input, Segment::DoRepeatOverflow))) } /// We are segmenting a `DEFINE` command, which consists of: /// @@ -1121,12 +1122,12 @@ impl Segmenter { &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { - let (rest, segment) = self.subparse(input, eof)?; + ) -> Result, Incomplete> { + let (rest, segment) = self.subparse(input, eof)?.unwrap(); match segment { Segment::Identifier if self.state.0 == State::Define1 => { self.state.0 = State::Define2; - return Ok((rest, Segment::MacroName)); + return Ok(Some((rest, Segment::MacroName))); } Segment::SeparateCommands | Segment::EndCommand | Segment::StartCommand => { // The DEFINE command is malformed because we reached its end @@ -1140,14 +1141,14 @@ impl Segmenter { } _ => (), } - Ok((rest, segment)) + Ok(Some((rest, segment))) } fn parse_define_3<'a>( &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { - let (rest, segment) = self.subparse(input, eof)?; + ) -> Result, Incomplete> { + let (rest, segment) = self.subparse(input, eof)?.unwrap(); match segment { Segment::SeparateCommands | Segment::EndCommand | Segment::StartCommand => { // The DEFINE command is malformed because we reached its end @@ -1166,7 +1167,7 @@ impl Segmenter { } _ => (), } - Ok((rest, segment)) + Ok(Some((rest, segment))) } fn find_enddefine<'a>(mut input: &'a str) -> Option<&'a str> { loop { @@ -1197,7 +1198,7 @@ impl Segmenter { &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { + ) -> Result, Incomplete> { let rest = self.parse_full_line(input, eof)?; let line = &input[..input.len() - rest.len()]; if let Some(end) = Self::find_enddefine(line) { @@ -1209,10 +1210,10 @@ impl Segmenter { self.push_rest(input, eof) } else if prefix.trim_start().is_empty() { // Line starts with spaces followed by `!ENDDEFINE`. - Ok((rest, Segment::Spaces)) + Ok(Some((rest, Segment::Spaces))) } else { // Line starts with some content followed by `!ENDDEFINE`. - Ok((rest, Segment::MacroBody)) + Ok(Some((rest, Segment::MacroBody))) } } else { // No `!ENDDEFINE`. We have a full line of macro body. @@ -1232,39 +1233,39 @@ impl Segmenter { Segment::MacroBody }; self.state.0 = State::Define6; - Ok((rest, segment)) + Ok(Some((rest, segment))) } } fn parse_define_6<'a>( &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { + ) -> Result, Incomplete> { let rest = self.parse_newline(input, eof)?.unwrap(); self.state.0 = State::Define5; - Ok((rest, Segment::Newline)) + Ok(Some((rest, Segment::Newline))) } fn parse_begin_data_1<'a>( &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { - let (rest, segment) = self.subparse(input, eof)?; + ) -> Result, Incomplete> { + let (rest, segment) = self.subparse(input, eof)?.unwrap(); if segment == Segment::Newline { self.state.0 = State::BeginData2; } - Ok((rest, segment)) + Ok(Some((rest, segment))) } fn parse_begin_data_2<'a>( &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { - let (rest, segment) = self.subparse(input, eof)?; + ) -> Result, Incomplete> { + let (rest, segment) = self.subparse(input, eof)?.unwrap(); if segment == Segment::Newline { self.state.0 = State::BeginData3; } - Ok((rest, segment)) + Ok(Some((rest, segment))) } fn is_end_data(line: &str) -> bool { let Some(rest) = strip_prefix_ignore_ascii_case(line, "END") else { @@ -1295,7 +1296,7 @@ impl Segmenter { &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { + ) -> Result, Incomplete> { let rest = self.parse_full_line(input, eof)?; let line = &input[..input.len() - rest.len()]; if Self::is_end_data(line) { @@ -1306,17 +1307,17 @@ impl Segmenter { self.push_rest(input, eof) } else { self.state.0 = State::BeginData4; - Ok((rest, Segment::InlineData)) + Ok(Some((rest, Segment::InlineData))) } } fn parse_begin_data_4<'a>( &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Segment), Incomplete> { + ) -> Result, Incomplete> { let rest = self.parse_newline(input, eof)?.unwrap(); self.state.0 = State::BeginData3; - Ok((rest, Segment::Newline)) + Ok(Some((rest, Segment::Newline))) } } diff --git a/rust/pspp/src/lex/segment/test.rs b/rust/pspp/src/lex/segment/test.rs index d8c337dcdf..0553c7d1a7 100644 --- a/rust/pspp/src/lex/segment/test.rs +++ b/rust/pspp/src/lex/segment/test.rs @@ -6,7 +6,7 @@ fn push_segment<'a>( segmenter: &mut Segmenter, input: &'a str, one_byte: bool, -) -> (usize, Segment) { +) -> Option<(usize, Segment)> { if one_byte { for len in input.char_indices().map(|(pos, _c)| pos) { if let Ok(result) = segmenter.push(&input[..len], false) { @@ -27,14 +27,11 @@ fn _check_segmentation( let mut segments = Vec::with_capacity(expect_segments.len()); let mut prompts = Vec::new(); let mut segmenter = Segmenter::new(mode, false); - loop { - let (seg_len, seg_type) = push_segment(&mut segmenter, input, one_byte); + while let Some((seg_len, seg_type)) = push_segment(&mut segmenter, input, one_byte) { let (token, rest) = input.split_at(seg_len); segments.push((seg_type, token)); - match seg_type { - Segment::End => break, - Segment::Newline => prompts.push(segmenter.prompt()), - _ => (), + if let Segment::Newline = seg_type { + prompts.push(segmenter.prompt()); } input = rest; } @@ -94,14 +91,12 @@ fn check_segmentation( if let Some(input) = input.strip_suffix('\n') { println!("running {one_byte_name} segmentation test without final newline..."); let mut expect_segments: Vec<_> = expect_segments.iter().copied().collect(); - assert_eq!(expect_segments.pop(), Some((Segment::End, ""))); assert_eq!(expect_segments.pop(), Some((Segment::Newline, "\n"))); while let Some((Segment::SeparateCommands | Segment::EndCommand, "")) = expect_segments.last() { expect_segments.pop(); } - expect_segments.push((Segment::End, "")); _check_segmentation( input, mode, @@ -116,14 +111,11 @@ fn check_segmentation( #[allow(dead_code)] fn print_segmentation(mut input: &str) { let mut segmenter = Segmenter::new(Mode::Interactive, false); - loop { - let (seg_len, seg_type) = segmenter.push(input, true).unwrap(); + while let Some((seg_len, seg_type)) = segmenter.push(input, true).unwrap() { let (token, rest) = input.split_at(seg_len); print!("{seg_type:?} {token:?}"); - match seg_type { - Segment::Newline => print!(" ({:?})", segmenter.prompt()), - Segment::End => break, - _ => (), + if let Segment::Newline = seg_type { + print!(" ({:?})", segmenter.prompt()) } println!(); input = rest; @@ -239,7 +231,6 @@ GhIjK (Segment::Identifier, "abc"), (Segment::Punct, "!"), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[ PromptStyle::Later, @@ -356,7 +347,6 @@ WxYz./* unterminated end of line comment (Segment::EndCommand, "."), (Segment::Comment, "/* unterminated end of line comment "), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[ PromptStyle::First, @@ -473,7 +463,6 @@ and. with. (Segment::Identifier, "with"), (Segment::EndCommand, "."), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[ PromptStyle::Later, @@ -575,7 +564,6 @@ fn test_punctuation() { (Segment::Spaces, " "), (Segment::Punct, "!*"), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[PromptStyle::Later, PromptStyle::Later, PromptStyle::Later], ); @@ -665,7 +653,6 @@ fn test_positive_numbers() { (Segment::Number, "1"), (Segment::EndCommand, "."), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[ PromptStyle::First, @@ -776,7 +763,6 @@ fn test_negative_numbers() { (Segment::Number, "-1"), (Segment::EndCommand, "."), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[ PromptStyle::First, @@ -861,7 +847,6 @@ u'fffd' U"041" (Segment::Spaces, " "), (Segment::QuotedString, "'new command'"), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[ PromptStyle::Later, @@ -908,7 +893,6 @@ title my title. (Segment::Punct, "/"), (Segment::Identifier, "pspp"), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[PromptStyle::First, PromptStyle::First, PromptStyle::Later], ); @@ -990,7 +974,6 @@ next command. (Segment::Newline, "\n"), (Segment::SeparateCommands, ""), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[ PromptStyle::Comment, @@ -1058,7 +1041,6 @@ second paragraph. (Segment::EndCommand, ""), (Segment::SeparateCommands, ""), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[ PromptStyle::First, @@ -1114,7 +1096,6 @@ FILE /* (Segment::Newline, "\n"), (Segment::SeparateCommands, ""), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[ PromptStyle::First, @@ -1241,7 +1222,6 @@ not data (Segment::Spaces, " "), (Segment::Identifier, "data"), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[ PromptStyle::Data, @@ -1348,7 +1328,6 @@ end repeat. (Segment::Identifier, "repeat"), (Segment::EndCommand, "."), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[ PromptStyle::Later, @@ -1422,7 +1401,6 @@ fn test_do_repeat_overflow() { (Segment::Newline, "\n"), ]); } - expect_output.push((Segment::End, "")); let expect_prompts: Vec<_> = (0..N * 2 - 3) .map(|_| PromptStyle::DoRepeat) @@ -1506,7 +1484,6 @@ end repeat (Segment::Spaces, " "), (Segment::Identifier, "repeat"), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[ PromptStyle::Later, @@ -1555,7 +1532,6 @@ var1 var2 var3 "!enddefine" (Segment::Identifier, "!enddefine"), (Segment::EndCommand, "."), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[PromptStyle::Define, PromptStyle::Define, PromptStyle::First], ); @@ -1579,7 +1555,6 @@ var1 var2 var3 "!enddefine" (Segment::Identifier, "!enddefine"), (Segment::EndCommand, "."), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[PromptStyle::Define, PromptStyle::First], ); @@ -1603,7 +1578,6 @@ var1 var2 var3!enddefine. (Segment::Identifier, "!enddefine"), (Segment::EndCommand, "."), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[PromptStyle::Define, PromptStyle::First], ); @@ -1625,7 +1599,6 @@ var1 var2 var3!enddefine. (Segment::Identifier, "!enddefine"), (Segment::EndCommand, "."), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[PromptStyle::First], ); @@ -1648,7 +1621,6 @@ var1 var2 var3!enddefine. (Segment::Identifier, "!enddefine"), (Segment::EndCommand, "."), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[PromptStyle::Define, PromptStyle::First], ); @@ -1677,7 +1649,6 @@ var1 var2 var3!enddefine. (Segment::Identifier, "!enddefine"), (Segment::EndCommand, "."), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[ PromptStyle::Define, @@ -1718,7 +1689,6 @@ var1 var2 var3!enddefine. (Segment::Identifier, "!enddefine"), (Segment::EndCommand, "."), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[PromptStyle::Define, PromptStyle::First], ); @@ -1764,7 +1734,6 @@ var1 var2 var3!enddefine. (Segment::Identifier, "!enddefine"), (Segment::EndCommand, "."), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[ PromptStyle::Later, @@ -1809,7 +1778,6 @@ content 2 (Segment::Identifier, "!enddefine"), (Segment::EndCommand, "."), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[ PromptStyle::Later, @@ -1845,7 +1813,6 @@ data list /x 1. (Segment::Number, "1"), (Segment::EndCommand, "."), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[PromptStyle::First, PromptStyle::First], ); @@ -1877,7 +1844,6 @@ data list /x 1. (Segment::Number, "1"), (Segment::EndCommand, "."), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[PromptStyle::Later, PromptStyle::First, PromptStyle::First], ); @@ -1911,7 +1877,6 @@ data list /x 1. (Segment::Number, "1"), (Segment::EndCommand, "."), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[PromptStyle::First, PromptStyle::First, PromptStyle::First], ); @@ -1942,7 +1907,6 @@ data list /x 1. (Segment::Number, "1"), (Segment::EndCommand, "."), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[PromptStyle::First, PromptStyle::First], ); @@ -1967,7 +1931,6 @@ content line 2 (Segment::Newline, "\n"), (Segment::MacroBody, "content line 2"), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[ PromptStyle::Define, @@ -1990,7 +1953,6 @@ content line 2 (Segment::Punct, "("), (Segment::Punct, ")"), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[PromptStyle::Define], ); @@ -2049,7 +2011,6 @@ fourth command. (Segment::Identifier, "command"), (Segment::EndCommand, "."), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[ PromptStyle::Later, @@ -2151,7 +2112,6 @@ fourth command. (Segment::Identifier, "command"), (Segment::EndCommand, "."), (Segment::Newline, "\n"), - (Segment::End, ""), ], &[ PromptStyle::Later, -- 2.30.2