From: Ben Pfaff Date: Mon, 26 Aug 2024 15:31:07 +0000 (-0700) Subject: work X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2c0adcfd4c36ab68cd22138b788d4f92e72f398c;p=pspp work --- diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 3d7a9ebc7d..12bbc1e2ba 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -26,6 +26,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "aliasable" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd" + [[package]] name = "android-tzdata" version = "0.1.1" @@ -243,7 +249,7 @@ version = "4.5.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "501d359d5f3dcaf6ecdeee48833ae73ec6e42723a1e52419c79abf9507eec0a0" dependencies = [ - "heck", + "heck 0.5.0", "proc-macro2", "quote", "syn", @@ -295,6 +301,12 @@ version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + [[package]] name = "encoding_rs" version = "0.8.34" @@ -489,6 +501,12 @@ version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + [[package]] name = "heck" version = "0.5.0" @@ -581,6 +599,15 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.11" @@ -778,6 +805,31 @@ dependencies = [ "num-traits", ] +[[package]] +name = "ouroboros" +version = "0.18.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "944fa20996a25aded6b4795c6d63f10014a7a83f8be9828a11860b08c5fc4a67" +dependencies = [ + "aliasable", + "ouroboros_macro", + "static_assertions", +] + +[[package]] +name = "ouroboros_macro" +version = "0.18.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39b0deead1528fd0e5947a8546a9642a9777c25f6e1e26f34c97b204bbb465bd" +dependencies = [ + "heck 0.4.1", + "itertools", + "proc-macro2", + "proc-macro2-diagnostics", + "quote", + "syn", +] + [[package]] name = "parking_lot" version = "0.12.3" @@ -848,6 +900,19 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "proc-macro2-diagnostics" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "version_check", + "yansi", +] + [[package]] name = "pspp" version = "1.0.0" @@ -872,6 +937,7 @@ dependencies = [ "num-derive", "num-traits", "ordered-float", + "ouroboros", "thiserror", "unicase", "unicode-width", @@ -1051,6 +1117,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strsim" version = "0.11.1" @@ -1560,3 +1632,9 @@ name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "yansi" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" diff --git a/rust/pspp/Cargo.toml b/rust/pspp/Cargo.toml index 41b2f02c6f..9ed3cd32cb 100644 --- a/rust/pspp/Cargo.toml +++ b/rust/pspp/Cargo.toml @@ -28,6 +28,7 @@ unicode-width = "0.1.13" chardetng = "0.1.17" enum-map = "2.7.3" flagset = "0.4.6" +ouroboros = "0.18.4" [target.'cfg(windows)'.dependencies] windows-sys = { version = "0.48.0", features = ["Win32_Globalization"] } diff --git a/rust/pspp/src/engine.rs b/rust/pspp/src/engine.rs index fa4940fccb..59021a184e 100644 --- a/rust/pspp/src/engine.rs +++ b/rust/pspp/src/engine.rs @@ -10,10 +10,10 @@ use crate::{ pub struct Engine; impl Engine { - fn new() -> Self { + pub fn new() -> Self { Self } - fn run(&mut self, mut source: NewSource) { + pub fn run(&mut self, mut source: NewSource) { let macros = MacroSet::new(); while let Some(tokens) = source.read_command(¯os) { let error: Box = Box::new(|diagnostic| { @@ -35,7 +35,7 @@ mod tests { #[test] fn test_echo() { let mut engine = Engine::new(); - engine.run(NewSource::new_default(SourceFile::for_file_contents( + engine.run(NewSource::new_default(&SourceFile::for_file_contents( "ECHO 'hi there'.\nECHO 'bye there'.\n".to_string(), Some("test.sps".to_string()), UTF_8, diff --git a/rust/pspp/src/lex/lexer.rs b/rust/pspp/src/lex/lexer.rs index 1cb694bbd6..f335f67729 100644 --- a/rust/pspp/src/lex/lexer.rs +++ b/rust/pspp/src/lex/lexer.rs @@ -1,13 +1,14 @@ use std::{ borrow::{Borrow, Cow}, - collections::{HashMap, VecDeque}, + collections::VecDeque, fmt::{Debug, Formatter, Result as FmtResult, Write}, fs, io::Result as IoResult, iter::once, - mem::{self, take}, + mem::take, ops::{Bound, Range, RangeBounds, RangeInclusive}, path::Path, + ptr, sync::Arc, }; @@ -17,7 +18,6 @@ use thiserror::Error as ThisError; use unicode_width::{UnicodeWidthChar, UnicodeWidthStr}; use crate::{ - lex::scan::Incomplete, macros::{macro_tokens_to_syntax, MacroSet, ParseStatus, Parser}, message::{Category, Diagnostic, Location, Point, Severity}, settings::Settings, @@ -25,7 +25,7 @@ use crate::{ use super::{ scan::{MergeResult, ScanError, ScanToken}, - segment::{Segment, Segmenter, Syntax}, + segment::{Segmenter, Syntax}, token::Token, }; @@ -155,76 +155,6 @@ impl Default for SourceFile { } } -/// # Token pipeline -/// -/// Tokens pass through a pipeline with the following stages. Each token -/// eventually made available to the parser passes through of these stages. -/// The stages are named after the processing that happens in each one. -/// -/// Initially, tokens come from the segmenter and scanner to `pp`: -/// -/// - `pp`: Tokens that need to pass through the macro preprocessor to end up -/// in `merge`. -/// -/// - `merge`: Tokens that need to pass through -/// [`super::scan::ScanToken::merge`] to end up in `parse`. -/// -/// - `parse`: Tokens available to the client for parsing. -/// -/// `pp` and `merge` store tokens only temporarily until they pass into `parse`. -/// Tokens then live in `parse` until the command is fully consumed, at which -/// time they are freed together. -pub struct Source { - /// Error-handling mode. - error_handling: ErrorHandling, - - file: SourceFile, - - /// 0-based line number of the first line not yet written to the journal. - journal_line: usize, - - /// Byte offset of first character not yet scanned as token. - seg_pos: usize, - - /// Tokens that need to pass through the macro preprocessor to end up in - /// `merge`. - pp: VecDeque, - - /// Tokens that need to pass through [`super::scan::ScanToken::merge`] to - /// end up in `parse`. - merge: VecDeque, - - eof: bool, - - /// Tokens available to the client for parsing. - parse: Vec, - - /// Offset in `parse` of the current token. - parse_ofs: usize, - - segmenter: Segmenter, - - suppress_next_newline: bool, -} - -impl Default for Source { - fn default() -> Self { - Self { - error_handling: ErrorHandling::default(), - file: SourceFile::default(), - journal_line: 0, - seg_pos: 0, - pp: VecDeque::new(), - merge: VecDeque::new(), - eof: false, - parse: Vec::new(), - parse_ofs: 0, - segmenter: Segmenter::new(Syntax::default(), false), - suppress_next_newline: false, - } - } -} - trait StripNewline { fn strip_newline(&self) -> &str; } @@ -236,318 +166,6 @@ impl StripNewline for str { } } -impl Source { - pub fn new(file: SourceFile, syntax: Syntax, error_handling: ErrorHandling) -> Self { - Self { - file, - error_handling, - segmenter: Segmenter::new(syntax, false), - ..Source::default() - } - } - - pub fn new_default(file: SourceFile) -> Self { - Self::new(file, Syntax::default(), ErrorHandling::default()) - } - - fn get_pp(&mut self, context: &Context) -> bool { - let Some((seg_len, seg_type)) = self - .segmenter - .push(&self.file.buffer[self.seg_pos..], true) - .unwrap() - else { - return false; - }; - - let pos = self.seg_pos..self.seg_pos + seg_len; - self.seg_pos += seg_len; - - let scan_token = ScanToken::from_segment(&self.file.buffer[pos.clone()], seg_type); - - let n_lines = match (seg_type, self.suppress_next_newline) { - (Segment::EndCommand, false) => { - self.suppress_next_newline = true; - 1 - } - (Segment::Newline, true) => { - self.suppress_next_newline = false; - 0 - } - (Segment::Newline, false) => 1, - _ => 0, - }; - for line_num in self.journal_line..self.journal_line + n_lines { - let _line = &self.file.get_line(line_num as i32).strip_newline(); - // XXX submit the line as syntax - } - self.journal_line += n_lines; - - match scan_token { - None => false, - Some(ScanToken::Token(token)) => { - self.pp.push_back(LexToken { - token, - pos, - macro_rep: None, - }); - true - } - Some(ScanToken::Error(error)) => { - (context.error)( - Location { - file_name: self.file.file_name.clone(), - span: Some( - self.file.offset_to_point(pos.start) - ..self.file.offset_to_point(pos.end), - ), - omit_underlines: false, - }, - error.into(), - ); - false - } - } - } - - fn get_merge(&mut self, context: &Context) -> bool { - if self.pp.is_empty() && !self.get_pp(context) { - return false; - } - - if !Settings::global().macros.expand { - self.merge.append(&mut self.pp); - return true; - } - - // Now pass tokens one-by-one to the macro expander. - let Some(mut parser) = Parser::new(context.macros, &self.pp[0].token) else { - // Common case where there is no macro to expand. - self.merge.push_back(self.pp.pop_front().unwrap()); - return true; - }; - for ofs in 1.. { - if self.pp.len() <= ofs && !self.get_pp(context) { - // This should not be reachable because we always get a - // `Token::EndCommand` at the end of an input file, which should - // always terminate macro expansion. - unreachable!(); - } - let token = &self.pp[ofs]; - if parser.push(&token.token, &self.file.buffer[token.pos.clone()], &|e| { - println!("{e:?}") - }) == ParseStatus::Complete - { - break; - } - } - let call = parser.finish(); - if call.len() == 0 { - // False alarm: no macro to expand after all. - self.merge.push_back(self.pp.pop_front().unwrap()); - return true; - } - - // Expand the tokens. - let c0 = &self.pp[0]; - let c1 = &self.pp[call.len() - 1]; - let mut expansion = Vec::new(); - call.expand( - self.segmenter.syntax(), - self.file.token_location(c0..=c1), - &mut expansion, - |e| println!("{e:?}"), - ); - let retval = !expansion.is_empty(); - - if Settings::global().macros.print_expansions { - // XXX - } - - // Append the macro expansion tokens to the lookahead. - let mut macro_rep = String::new(); - let mut pos = Vec::with_capacity(expansion.len()); - for [prefix, token] in macro_tokens_to_syntax(expansion.as_slice()) { - macro_rep.push_str(prefix); - let len = macro_rep.len(); - pos.push(len..=len + token.len() - 1); - } - let macro_rep = Arc::new(macro_rep); - for (index, token) in expansion.into_iter().enumerate() { - let lt = LexToken { - token: token.token, - pos: c0.pos.start..c1.pos.end, - macro_rep: Some(MacroRepresentation { - expansion: Arc::clone(¯o_rep), - pos: pos[index].clone(), - }), - }; - self.merge.push_back(lt); - } - self.pp.drain(..call.len()); - retval - } - - fn get_parse(&mut self, context: &Context) -> bool { - loop { - match ScanToken::merge(|index| { - if let Some(token) = self.merge.get(index) { - Ok(Some(&token.token)) - } else if self.eof { - Ok(None) - } else { - Err(Incomplete) - } - }) { - Ok(Some(MergeResult::Copy)) => { - self.parse.push(self.merge.pop_front().unwrap()); - return true; - } - Ok(Some(MergeResult::Expand { n, token })) => { - let first = &self.merge[0]; - let last = &self.merge[n - 1]; - self.parse.push(LexToken { - token, - pos: first.pos.start..last.pos.end, - macro_rep: match (&first.macro_rep, &last.macro_rep) { - (Some(a), Some(b)) if Arc::ptr_eq(&a.expansion, &b.expansion) => { - Some(MacroRepresentation { - expansion: a.expansion.clone(), - pos: *a.pos.start()..=*b.pos.end(), - }) - } - _ => None, - }, - }); - self.merge.drain(..n); - return true; - } - Ok(None) => return false, - Err(Incomplete) => { - debug_assert!(!self.eof); - if !self.get_merge(context) { - self.eof = true; - } - } - } - } - } - - fn ofs_location(&self, range: RangeInclusive) -> Location { - if *range.start() <= *range.end() && *range.end() < self.parse.len() { - self.file - .token_location(&self.parse[*range.start()]..=&self.parse[*range.end()]) - } else { - Location { - file_name: self.file.file_name.clone(), - span: None, - omit_underlines: false, - } - } - } - - fn token(&self) -> &Token { - &self.parse[self.parse_ofs].token - } - - fn next(&mut self, offset: isize, context: &Context) -> &Token { - let Some(index) = offset.checked_add(self.parse_ofs as isize) else { - return &Token::EndCommand; - }; - let Ok(index) = usize::try_from(index) else { - return &Token::EndCommand; - }; - - while index >= self.parse.len() { - if let Some(token) = self.parse.last() { - match token.token { - Token::EndCommand => return &Token::EndCommand, - _ => (), - } - } - self.get_parse(context); - } - &self.parse[index].token - } - - /// If the tokens in `ofs` contains a macro call, this returns the raw - /// syntax for the macro call (not for the expansion) and for any other - /// tokens included in that range. The syntax is encoded in UTF-8 and in - /// the original form supplied to the lexer so that, for example, it may - /// include comments, spaces, and new-lines if it spans multiple tokens. - /// - /// Returns `None` if the token range doesn't include a macro call. - fn get_macro_call(&self, ofs: RangeInclusive) -> Option<&str> { - if self - .parse - .get(ofs.clone()) - .unwrap_or_default() - .iter() - .all(|token| token.macro_rep.is_none()) - { - return None; - } - - let token0 = &self.parse[*ofs.start()]; - let token1 = &self.parse[*ofs.end()]; - Some(&self.file.buffer[token0.pos.start..token1.pos.end]) - } - - fn is_empty(&self) -> bool { - self.file.buffer.is_empty() - } - - fn diagnostic( - &self, - severity: Severity, - ofs: RangeInclusive, - text: String, - ) -> Diagnostic { - let mut s = String::with_capacity(text.len() + 16); - if self.is_empty() { - s.push_str("At end of input: "); - } else if let Some(call) = self.get_macro_call(ofs.clone()) { - write!(&mut s, "In syntax expanded from `{}`: ", ellipsize(call)).unwrap(); - } - - if !text.is_empty() { - s.push_str(&text); - } else { - s.push_str("Syntax error."); - } - - if !s.ends_with('.') { - s.push('.'); - } - - let location = self.ofs_location(ofs); - let mut source = Vec::new(); - if let Some(Range { - start: Point { line: l0, .. }, - end: Point { line: l1, .. }, - }) = location.span - { - let lines = if l1 - l0 > 3 { - vec![l0, l0 + 1, l1] - } else { - (l0..=l1).collect() - }; - for line_number in lines { - source.push((line_number, self.file.get_line(line_number).to_string())); - } - } - - Diagnostic { - category: Category::Syntax, - severity, - location, - source, - stack: Vec::new(), - command_name: None, // XXX - text: s, - } - } -} - fn ellipsize(s: &str) -> Cow { if s.width() > 64 { let mut out = String::new(); @@ -567,10 +185,12 @@ fn ellipsize(s: &str) -> Cow { } /// A token in a [`Source`]. -struct LexToken { +struct LexToken<'a> { /// The regular token. token: Token, + file: &'a SourceFile, + /// For a token obtained through the lexer in an ordinary way, this is the /// location of the token in the [`Source`]'s buffer. /// @@ -591,15 +211,15 @@ struct LexError { pos: Range, } -impl Borrow for LexToken { +impl Borrow for LexToken<'_> { fn borrow(&self) -> &Token { &self.token } } -impl LexToken { - fn representation<'a>(&self, source: &'a SourceFile) -> &'a str { - &source.buffer[self.pos.clone()] +impl LexToken<'_> { + fn representation(&self) -> &str { + &self.file.buffer[self.pos.clone()] } } @@ -611,169 +231,6 @@ struct MacroRepresentation { pos: RangeInclusive, } -pub struct Lexer { - source: Source, - stack: Vec, - macros: MacroSet, - error: Box, -} - -struct Context<'a> { - macros: &'a MacroSet, - error: &'a Box, -} - -impl Lexer { - pub fn new(error: Box) -> Self { - Self { - source: Source::default(), - stack: Vec::new(), - macros: HashMap::new(), - error, - } - } - - pub fn get(&mut self) -> &Token { - if self.source.parse_ofs < self.source.parse.len() { - if let Token::EndCommand = self.source.token() { - self.source.parse.clear(); - self.source.parse_ofs = 0; - } else { - self.source.parse_ofs += 1; - } - } - - while self.source.parse_ofs == self.source.parse.len() { - let context = Context { - macros: &self.macros, - error: &self.error, - }; - if !self.source.get_parse(&context) { - if !self.pop_stack() { - return &Token::EndCommand; - } - } - } - self.source.token() - } - - fn pop_stack(&mut self) -> bool { - if let Some(new_source) = self.stack.pop() { - self.source = new_source; - true - } else { - self.source = Source::default(); - self.source.parse.push(LexToken { - token: Token::EndCommand, - pos: 0..0, - macro_rep: None, - }); - false - } - } - - /// Inserts `source` so that the next token comes from it. This is only - /// permitted when the lexer is either empty or at `Token::EndCommand`. - pub fn include(&mut self, mut source: Source) { - // XXX what's the right assertion? - let context = Context { - macros: &self.macros, - error: &self.error, - }; - source.get_parse(&context); - let old_source = mem::replace(&mut self.source, source); - self.stack.push(old_source); - } - - /// Inserts `source` so that it will be read after all the other sources. - pub fn append(&mut self, mut source: Source) { - let context = Context { - macros: &self.macros, - error: &self.error, - }; - source.get_parse(&context); - self.stack.insert(0, source); - } - - pub fn token(&self) -> &Token { - self.source.token() - } - - pub fn next(&mut self, offset: isize) -> &Token { - let context = Context { - macros: &self.macros, - error: &self.error, - }; - self.source.next(offset, &context) - } - - pub fn error(&self, text: S) -> Diagnostic - where - S: ToString, - { - self.diagnostic( - Severity::Error, - self.source.parse_ofs..=self.source.parse_ofs, - text, - ) - } - - pub fn diagnostic( - &self, - severity: Severity, - ofs: RangeInclusive, - text: S, - ) -> Diagnostic - where - S: ToString, - { - self.source.diagnostic(severity, ofs, text.to_string()) - } - - pub fn error_handling(&self) -> ErrorHandling { - self.source.error_handling - } - - /// Discards all lookahead tokens, then discards all input sources - /// until it encounters one with error mode [ErrorHandling::Terminal] or until it - /// runs out of input sources. - pub fn discard_noninteractive(&mut self) { - while self.source.error_handling != ErrorHandling::Ignore { - self.source.pp.clear(); - self.source.merge.clear(); - self.source.parse.clear(); - self.source.parse_ofs = 0; - - if self.source.error_handling == ErrorHandling::Terminal || !self.pop_stack() { - return; - } - } - } - - /// Advances past any tokens up to [Token::EndCommand] or [Token::End]. - pub fn discard_rest_of_command(&mut self) { - while !matches!(self.token(), Token::EndCommand) { - self.get(); - } - } - - pub fn at_end(&self) -> bool { - match self.source.token() { - Token::EndCommand => true, - _ => false, - } - } - - pub fn match_(&mut self, token: &Token) -> bool { - if self.token() == token { - self.get(); - true - } else { - false - } - } -} - #[derive(ThisError, Clone, Debug, PartialEq, Eq)] pub enum Error { /// Error forming tokens from the input. @@ -871,69 +328,114 @@ lis|.\0", } } */ -pub struct Tokens { - file: Arc, - tokens: Vec, +pub struct Tokens<'a> { + tokens: Vec>, } -impl Debug for Tokens { +impl Debug for Tokens<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { write!(f, "Tokens {{ ")?; for (index, token) in self.tokens.iter().enumerate() { if index > 0 { write!(f, ", ")?; } - write!(f, "{:?}", token.representation(&self.file))?; + write!(f, "{:?}", token.representation())?; } write!(f, " }}") } } -impl Tokens { - /// If the tokens in `ofs` contains a macro call, this returns the raw +impl Tokens<'_> {} + +pub struct NewLexer<'a> { + backing: &'a Tokens<'a>, + tokens: &'a [LexToken<'a>], + start: usize, +} + +impl<'a> NewLexer<'a> { + pub fn new(backing: &'a Tokens) -> Self { + Self { + backing, + tokens: backing.tokens.as_slice(), + start: 0, + } + } + + pub fn get(&self, index: usize) -> Option<&Token> { + self.tokens.get(index).map(|token| &token.token) + } + + pub fn error(&self, range: B, text: S) -> Diagnostic + where + S: ToString, + B: RangeBounds, + { + self.sublexer(range) + .diagnostic(Severity::Error, text.to_string()) + } + + pub fn sublexer(&self, range: B) -> Self + where + B: RangeBounds, + { + Self { + backing: self.backing, + start: self.start + + match range.start_bound() { + Bound::Included(index) => *index, + Bound::Excluded(index) => *index + 1, + Bound::Unbounded => 0, + }, + tokens: &self.backing.tokens + [(range.start_bound().cloned(), range.end_bound().cloned())], + } + } + + fn file(&self) -> Option<&SourceFile> { + if !self.tokens.is_empty() { + let first = &self.tokens[0]; + let last = &self.tokens[self.tokens.len() - 1]; + if ptr::eq(first.file, last.file) { + return Some(first.file); + } + } + None + } + + /// If the tokens contains a macro call, this returns the raw /// syntax for the macro call (not for the expansion) and for any other /// tokens included in that range. The syntax is encoded in UTF-8 and in /// the original form supplied to the lexer so that, for example, it may /// include comments, spaces, and new-lines if it spans multiple tokens. /// /// Returns `None` if the token range doesn't include a macro call. - fn get_macro_call(&self, ofs: RangeInclusive) -> Option<&str> { - if self - .tokens - .get(ofs.clone()) - .unwrap_or_default() - .iter() - .all(|token| token.macro_rep.is_none()) - { - return None; + fn get_macro_call(&self) -> Option<&str> { + if self.tokens.iter().any(|token| token.macro_rep.is_some()) { + let token0 = &self.tokens[0]; + let token1 = &self.tokens[self.tokens.len() - 1]; + if let Some(file) = self.file() { + let start = token0.pos.start; + let end = token1.pos.end; + if start < end { + return Some(&file.buffer[start..end]); + } + } } - - let token0 = &self.tokens[*ofs.start()]; - let token1 = &self.tokens[*ofs.end()]; - Some(&self.file.buffer[token0.pos.start..token1.pos.end]) + None } - fn ofs_location(&self, range: RangeInclusive) -> Location { - if *range.start() <= *range.end() && *range.end() < self.tokens.len() { - self.file - .token_location(&self.tokens[*range.start()]..=&self.tokens[*range.end()]) + fn location(&self) -> Location { + if let Some(file) = self.file() { + file.token_location(self.tokens.first().unwrap()..=self.tokens.last().unwrap()) } else { - Location { - file_name: self.file.file_name.clone(), - span: None, - omit_underlines: false, - } + Location::default() } } - pub fn diagnostic( - &self, - severity: Severity, - ofs: RangeInclusive, - text: String, - ) -> Diagnostic { + pub fn diagnostic(&self, severity: Severity, text: String) -> Diagnostic { let mut s = String::new(); - if let Some(call) = self.get_macro_call(ofs.clone()) { + if let Some(call) = self.get_macro_call() { write!(&mut s, "In syntax expanded from `{}`: ", ellipsize(call)).unwrap(); } @@ -947,20 +449,21 @@ impl Tokens { s.push('.'); } - let location = self.ofs_location(ofs); + let location = self.location(); let mut source = Vec::new(); if let Some(Range { start: Point { line: l0, .. }, end: Point { line: l1, .. }, }) = location.span { + let file = self.file().unwrap(); let lines = if l1 - l0 > 3 { vec![l0, l0 + 1, l1] } else { (l0..=l1).collect() }; for line_number in lines { - source.push((line_number, self.file.get_line(line_number).to_string())); + source.push((line_number, file.get_line(line_number).to_string())); } } @@ -976,78 +479,21 @@ impl Tokens { } } -pub struct NewLexer<'a> { - backing: &'a Tokens, - tokens: &'a [LexToken], - start: usize, -} - -impl<'a> NewLexer<'a> { - pub fn new(backing: &'a Tokens) -> Self { - Self { - backing, - tokens: backing.tokens.as_slice(), - start: 0, - } - } - - pub fn get(&self, index: usize) -> Option<&Token> { - self.tokens.get(index).map(|token| &token.token) - } - - pub fn error(&self, range: B, text: S) -> Diagnostic - where - S: ToString, - B: RangeBounds, - { - let start = match range.start_bound() { - Bound::Included(&index) => index, - Bound::Excluded(&index) => index + 1, - Bound::Unbounded => 0, - }; - let end = match range.end_bound() { - Bound::Included(&index) => index + 1, - Bound::Excluded(&index) => index, - Bound::Unbounded => self.tokens.len(), - }; - let abs_range = (start + self.start)..=(end + self.start - 1); - self.backing - .diagnostic(Severity::Error, abs_range, text.to_string()) - } - - pub fn sublexer(&self, range: B) -> Self - where - B: RangeBounds, - { - Self { - backing: self.backing, - start: self.start - + match range.start_bound() { - Bound::Included(index) => *index, - Bound::Excluded(index) => *index + 1, - Bound::Unbounded => 0, - }, - tokens: &self.backing.tokens - [(range.start_bound().cloned(), range.end_bound().cloned())], - } - } -} - -pub struct NewSource { - file: Arc, +pub struct NewSource<'a> { + file: &'a SourceFile, segmenter: Segmenter, seg_pos: usize, - lookahead: VecDeque, + lookahead: VecDeque>, } -impl NewSource { - pub fn new_default(file: SourceFile) -> Self { +impl<'a> NewSource<'a> { + pub fn new_default(file: &'a SourceFile) -> Self { Self::new(file, Syntax::default()) } - pub fn new(file: SourceFile, syntax: Syntax) -> Self { + pub fn new(file: &'a SourceFile, syntax: Syntax) -> Self { Self { - file: Arc::new(file), + file, segmenter: Segmenter::new(syntax, false), seg_pos: 0, lookahead: VecDeque::new(), @@ -1062,7 +508,6 @@ impl NewSource { .position(|token| token.token == Token::EndCommand) { return Some(Tokens { - file: self.file.clone(), tokens: self.lookahead.drain(..=end).collect(), }); } @@ -1088,6 +533,7 @@ impl NewSource { Some(ScanToken::Token(token)) => { let end = token == Token::EndCommand; pp.push_back(LexToken { + file: self.file, token, pos, macro_rep: None, @@ -1123,6 +569,7 @@ impl NewSource { let first = &merge[0]; let last = &merge[n - 1]; self.lookahead.push_back(LexToken { + file: self.file, token, pos: first.pos.start..last.pos.end, macro_rep: match (&first.macro_rep, &last.macro_rep) { @@ -1145,8 +592,8 @@ impl NewSource { fn expand_macro( &self, macros: &MacroSet, - src: &mut VecDeque, - dst: &mut VecDeque, + src: &mut VecDeque>, + dst: &mut VecDeque>, ) { // Now pass tokens one-by-one to the macro expander. let Some(mut parser) = Parser::new(macros, &src[0].token) else { @@ -1196,6 +643,7 @@ impl NewSource { let macro_rep = Arc::new(macro_rep); for (index, token) in expansion.into_iter().enumerate() { let lt = LexToken { + file: self.file, token: token.token, pos: c0.pos.start..c1.pos.end, macro_rep: Some(MacroRepresentation { @@ -1231,7 +679,7 @@ CROSSTABS VARIABLES X (1,7) Y (1,7) /TABLES X BY Y. Some(String::from("crosstabs.sps")), UTF_8, ); - let mut source = NewSource::new_default(file); + let mut source = NewSource::new_default(&file); while let Some(tokens) = source.read_command(&MacroSet::new()) { println!("{tokens:?}"); } diff --git a/rust/pspp/src/message.rs b/rust/pspp/src/message.rs index a3ba1d8e9f..ffc09c142c 100644 --- a/rust/pspp/src/message.rs +++ b/rust/pspp/src/message.rs @@ -49,7 +49,7 @@ impl Point { } /// Location relevant to an diagnostic message. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Default)] pub struct Location { /// File name, if any. pub file_name: Option>,