From: Ben Pfaff Date: Sat, 14 Sep 2024 16:55:12 +0000 (-0700) Subject: parsing approach works, descriptives can be parsed X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6059d0e6b1efff83a3762d70fac36cebb69a4bdc;p=pspp parsing approach works, descriptives can be parsed --- diff --git a/rust/pspp-derive/src/lib.rs b/rust/pspp-derive/src/lib.rs index 58bcbd2c60..6fd7a8678d 100644 --- a/rust/pspp-derive/src/lib.rs +++ b/rust/pspp-derive/src/lib.rs @@ -1,7 +1,7 @@ use proc_macro::TokenStream; use proc_macro2::{Literal, TokenStream as TokenStream2}; -use quote::{format_ident, quote}; -use syn::{spanned::Spanned, Attribute, DataEnum, DataStruct, DeriveInput, Error, Token}; +use quote::{quote, ToTokens}; +use syn::{spanned::Spanned, Attribute, DataEnum, DataStruct, DeriveInput, Error, Fields, Token}; #[proc_macro_derive(FromTokens, attributes(pspp))] pub fn from_tokens_derive(input: TokenStream) -> TokenStream { @@ -27,30 +27,37 @@ fn parse_derive_input(ast: DeriveInput) -> Result { } fn derive_enum(ast: &DeriveInput, e: &DataEnum) -> Result { + let struct_attrs = StructAttrs::parse(&ast.attrs)?; let mut body = TokenStream2::new(); for (index, variant) in e.variants.iter().enumerate() { - let field_attrs = parse_attributes(&variant.attrs)?; + let field_attrs = FieldAttrs::parse(&variant.attrs)?; if index > 0 { body.extend(quote! { else }.into_iter()); } let ident = &variant.ident; - if let Some(syntax) = field_attrs.syntax { - body.extend(quote! { if cursor.match_syntax(#syntax) { Self::#ident }}); + let ident_string = ident.to_string(); + let match_expr = if let Some(syntax) = field_attrs.syntax { + quote! { cursor.match_syntax(#syntax) } + } else if ident_string.eq_ignore_ascii_case("all") { + quote! { cursor.match_(&Token::Punct(Punct::All))} } else { - let ident_string = ident.to_string(); - if ident_string.eq_ignore_ascii_case("all") { - body.extend(quote! { if cursor.match_(&Token::Punct(Punct::All)) { Self::#ident }}); - } else { - body.extend(quote! { if cursor.match_keyword(#ident_string) { Self::#ident }}); - }; - } + quote! { cursor.match_keyword(#ident_string)} + }; + let construction = construct_fields(&variant.fields); + let check_equals = if struct_attrs.required_equals && !variant.fields.is_empty() { + quote! { cursor.force(&Token::Punct(Punct::Equals))?; } + } else { + quote!{} + }; + body.extend(quote! { if #match_expr { #check_equals Self::#ident #construction } }); } body.extend(quote! { else { return Err(cursor.error("Syntax error.")); } }); let name = &ast.ident; + let lifetime = struct_attrs.lifetime(); let output = quote! { - impl FromTokens for #name { - fn from_tokens<'a>(cursor: &Cursor<'a>) -> Result { + impl<'a> FromTokens<'a> for #name #lifetime { + fn from_tokens(cursor: &Cursor<'a>) -> Result { Ok(#body) } } @@ -59,25 +66,33 @@ fn derive_enum(ast: &DeriveInput, e: &DataEnum) -> Result { Ok(output) } -fn derive_struct(ast: &DeriveInput, s: &DataStruct) -> Result { +fn construct_fields(fields: &Fields) -> impl ToTokens { let mut construction = TokenStream2::new(); - let mut body = TokenStream2::new(); - for (index, field) in s.fields.iter().enumerate() { - let varname = format_ident!("field{}", index); - let ty = &field.ty; - body.extend(quote! { let #varname = <#ty>::from_tokens(cursor)?; }); - let name = field.ident.as_ref().unwrap(); - if index > 0 { - construction.extend(quote! { , }); + for field in fields { + let value = quote! { FromTokens::from_tokens(cursor)? }; + if let Some(name) = field.ident.as_ref() { + construction.extend(quote! { #name: #value, }); + } else { + construction.extend(quote! { #value, }); } - construction.extend(quote! { #name: #varname }); } + match fields { + Fields::Named(_) => quote! { { #construction } }, + Fields::Unnamed(_) => quote! { ( #construction ) }, + Fields::Unit => quote! {}, + } +} + +fn derive_struct(ast: &DeriveInput, s: &DataStruct) -> Result { + let struct_attrs = StructAttrs::parse(&ast.attrs)?; let name = &ast.ident; + let construction = construct_fields(&s.fields); + let lifetime = struct_attrs.lifetime(); let output = quote! { - impl FromTokens for #name { - fn from_tokens<'a>(cursor: &Cursor<'a>) -> Result { - #body Ok(#name { #construction }) + impl<'a> FromTokens<'a> for #name #lifetime { + fn from_tokens(cursor: &Cursor<'a>) -> Result { + Ok(#name #construction) } } }; @@ -90,23 +105,58 @@ struct FieldAttrs { syntax: Option, } -fn parse_attributes(attributes: &[Attribute]) -> Result { - println!("{:?}", &attributes); - let mut field_attrs = FieldAttrs::default(); - for attr in attributes { - if !attr.path().is_ident("pspp") { - continue; +impl FieldAttrs { + fn parse(attributes: &[Attribute]) -> Result { + let mut field_attrs = Self::default(); + for attr in attributes { + if !attr.path().is_ident("pspp") { + continue; + } + attr.parse_nested_meta(|meta| { + if meta.path.is_ident("syntax") { + //println!("{}:{} {:?} {:?}", file!(), line!(), meta.path, meta.input); + meta.input.parse::()?; + let syntax = meta.input.parse::()?; + //println!("{}:{} {:?} {:?}", file!(), line!(), meta.path, meta.input); + field_attrs.syntax = Some(syntax); + } else { + return Err(Error::new(meta.path.span(), "Unknown attribute")); + } + Ok(()) + })?; } - attr.parse_nested_meta(|meta| { - if meta.path.is_ident("syntax") { - println!("{}:{} {:?} {:?}", file!(), line!(), meta.path, meta.input); - meta.input.parse::()?; - let syntax = meta.input.parse::()?; - println!("{}:{} {:?} {:?}", file!(), line!(), meta.path, meta.input); - field_attrs.syntax = Some(syntax); + Ok(field_attrs) + } +} + +#[derive(Default)] +struct StructAttrs { + add_lifetime: bool, + required_equals: bool, +} + +impl StructAttrs { + fn lifetime(&self) -> Option { + self.add_lifetime.then(|| quote! { <'a> }) + } + fn parse(attributes: &[Attribute]) -> Result { + //println!("{:?}", &attributes); + let mut field_attrs = Self::default(); + for attr in attributes { + if !attr.path().is_ident("pspp") { + continue; } - Ok(()) - })?; + attr.parse_nested_meta(|meta| { + if meta.path.is_ident("add_lifetime") { + field_attrs.add_lifetime = true; + } else if meta.path.is_ident("required_equals") { + field_attrs.required_equals = true; + } else { + return Err(Error::new(meta.path.span(), "Unknown attribute")); + } + Ok(()) + })?; + } + Ok(field_attrs) } - Ok(field_attrs) } diff --git a/rust/pspp/src/command.rs b/rust/pspp/src/command.rs index 2f43b1e93a..6a20e85473 100644 --- a/rust/pspp/src/command.rs +++ b/rust/pspp/src/command.rs @@ -5,6 +5,7 @@ use flagset::{flags, FlagSet}; use pspp_derive::FromTokens; use crate::{ + identifier::Identifier, integer::ToInteger, lex::{ command_name::CommandMatcher, @@ -97,16 +98,6 @@ trait ParsedCommand { */ /* -struct Descriptives<'a> { - subcommands: Vec> -} -enum DescriptivesSubcommand<'a> { - Variables(TokenSlice<'a>), - Missing(TokenSlice<'a>), - Save, - Statistics(Vec), - Sort(Sort), -} @@ -115,34 +106,210 @@ struct Subcommand { name: &str, }*/ -trait FromTokens { - fn from_tokens<'a>(tokens: &Cursor<'a>) -> Result +trait FromTokens<'a> { + fn from_tokens(cursor: &Cursor<'a>) -> Result where Self: Sized; } -impl FromTokens for Option +impl<'a, T> FromTokens<'a> for Option where - T: FromTokens, + T: FromTokens<'a>, { - fn from_tokens<'a>(tokens: &Cursor<'a>) -> Result + fn from_tokens(cursor: &Cursor<'a>) -> Result where Self: Sized, { - match T::from_tokens(tokens) { + let saved_position = cursor.get_pos(); + match T::from_tokens(cursor) { Ok(result) => Ok(Some(result)), - Err(_error) => Ok(None) + Err(_error) => { + cursor.set_pos(saved_position); + Ok(None) + } + } + } +} + +impl<'a, T> FromTokens<'a> for Vec +where + T: FromTokens<'a>, +{ + fn from_tokens(cursor: &Cursor<'a>) -> Result + where + Self: Sized, + { + let mut vector = Vec::new(); + while let Ok(result) = cursor.with_pos(|| T::from_tokens(cursor)) { + vector.push(result); + } + Ok(vector) + } +} + +impl<'a> FromTokens<'a> for TokenSlice<'a> { + fn from_tokens(cursor: &Cursor<'a>) -> Result + where + Self: Sized, + { + Ok(cursor.take_remainder()) + } +} + +/* +impl < 'a > FromTokens < 'a > for DescriptivesSubcommand<'a> +{ + fn from_tokens(cursor : & Cursor < 'a >) -> Result < Self, Diagnostic > + { + Ok(if cursor.match_keyword("Variables") { Self :: Variables(cursor.take_remainder()) } else if + cursor.match_keyword("Missing") { Self :: Missing(cursor.take_remainder()) } else if + cursor.match_keyword("Save") { Self :: Save } else if + cursor.match_keyword("Statistics") { Self :: Statistics(Vec::new()) } else if + cursor.match_keyword("Sort") { Self :: Sort(Sort::from_tokens(cursor)?) } else + { return Err(cursor.error("Syntax error.")); }) + } +}*/ + +#[derive(FromTokens, Debug)] +#[pspp(add_lifetime)] +struct Descriptives<'a> { + subcommands: Vec>>, +} + +#[derive(Debug)] +struct Subcommand(pub T); + +impl<'a, T> FromTokens<'a> for Subcommand +where + T: FromTokens<'a>, +{ + fn from_tokens(cursor: &Cursor<'a>) -> Result + where + Self: Sized, + { + cursor.advance_until(|token| token != &Token::Punct(Punct::Slash)); + if cursor.at_end() { + return Err(cursor.error("Syntax error at end of input.")); + } + let start = cursor.get_pos(); + cursor.advance_until(|token| token == &Token::Punct(Punct::Slash)); + let subcommand = cursor.subcursor(start..cursor.get_pos()); + match T::from_tokens(&subcommand) { + Ok(result) => Ok(Self(result)), + Err(error) => { + cursor.set_pos(start); + Err(error) + } } } } -#[derive(FromTokens)] +/* +#[derive(FromTokens, Debug)] +#[pspp(add_lifetime, required_equals)]*/ +#[derive(Debug)] +enum DescriptivesSubcommand<'a> { + Variables(Vec>), + Missing(Vec), + Save, + Statistics(Vec), + Sort(Sort), +} + +impl<'a> FromTokens<'a> for DescriptivesSubcommand<'a> { + fn from_tokens(cursor: &Cursor<'a>) -> Result { + println!("{}:{}", file!(), line!()); + Ok(if cursor.match_keyword("Variables") { + println!("{}:{}", file!(), line!()); + cursor.force(&Token::Punct(Punct::Equals))?; + println!("{}:{}", file!(), line!()); + Self::Variables(FromTokens::from_tokens(cursor)?) + } else if cursor.match_keyword("Missing") { + cursor.force(&Token::Punct(Punct::Equals))?; + Self::Missing(FromTokens::from_tokens(cursor)?) + } else if cursor.match_keyword("Save") { + Self::Save + } else if cursor.match_keyword("Statistics") { + cursor.force(&Token::Punct(Punct::Equals))?; + Self::Statistics(FromTokens::from_tokens(cursor)?) + } else if cursor.match_keyword("Sort") { + cursor.force(&Token::Punct(Punct::Equals))?; + Self::Sort(FromTokens::from_tokens(cursor)?) + } else { + return Err(cursor.error("Syntax error.")); + }) + } +} + +#[derive(FromTokens, Debug)] +enum Missing { + Variable, + Listwise, + Include, +} + +#[derive(FromTokens, Debug)] +#[pspp(add_lifetime)] +struct DescriptivesVarRange<'a> { + vars: VarRange<'a>, + z_name: Option>, +} + +#[derive(Debug)] +struct InParens(pub T); + +impl<'a, T> FromTokens<'a> for InParens +where + T: FromTokens<'a>, +{ + fn from_tokens(cursor: &Cursor<'a>) -> Result + where + Self: Sized, + { + cursor.force(&Token::Punct(Punct::LParen))?; + let inner = T::from_tokens(cursor)?; + cursor.force(&Token::Punct(Punct::RParen))?; + Ok(Self(inner)) + } +} + +#[derive(Debug)] +struct VarRange<'a> { + from: &'a Identifier, + to: Option<&'a Identifier>, +} + +impl<'a> FromTokens<'a> for VarRange<'a> { + fn from_tokens(cursor: &Cursor<'a>) -> Result + where + Self: Sized, + { + Ok(Self { + from: cursor.force_id()?, + to: cursor + .match_(&Token::Punct(Punct::To)) + .then(|| cursor.force_id()) + .transpose()?, + }) + } +} + +impl<'a> FromTokens<'a> for &'a Identifier { + fn from_tokens(cursor: &Cursor<'a>) -> Result + where + Self: Sized, + { + cursor.force_id() + } +} + +#[derive(FromTokens, Debug)] struct Sort { key: SortKey, direction: Option, } -#[derive(FromTokens)] +#[derive(FromTokens, Debug)] enum SortKey { Mean, SMean, @@ -157,7 +324,7 @@ enum SortKey { Name, } -#[derive(FromTokens)] +#[derive(FromTokens, Debug)] enum Direction { #[pspp(syntax = "(A)")] Ascending, @@ -165,7 +332,7 @@ enum Direction { Descending, } -#[derive(FromTokens)] +#[derive(FromTokens, Debug)] enum Statistic { Default, Mean, @@ -180,41 +347,6 @@ enum Statistic { Kurtosis, All, } -/* -impl FromTokens for Statistic { - fn from_tokens<'a>(tokens: &TokenSlice<'a>) -> Result { - let cursor = Cursor::new(&tokens); - let statistic = if cursor.match_keyword("default") { - Self::Default - } else if cursor.match_keyword("stddev") { - Self::Stddev - } else if cursor.match_keyword("variance") { - Self::Variance - } else if cursor.match_keyword("mean") { - Self::Mean - } else if cursor.match_keyword("semean") { - Self::SeMean - } else if cursor.match_keyword("sum") { - Self::Sum - } else if cursor.match_keyword("min") { - Self::Min - } else if cursor.match_keyword("max") { - Self::Max - } else if cursor.match_keyword("skewness") { - Self::Skewness - } else if cursor.match_keyword("kurtosis") { - Self::Kurtosis - } else if cursor.match_(&Token::Punct(Punct::All)) { - Self::All - } else { - return Err(tokens.error("Syntax error expecting statistic.")) - }; - // XXX warn for trailing tokens - Ok(statistic) - } -}*/ - -struct Foo; fn collect_subcommands<'a>(src: &'a TokenSlice) -> Vec> { src.split(|token| token.token == Token::Punct(Punct::Slash)) @@ -225,6 +357,25 @@ fn collect_subcommands<'a>(src: &'a TokenSlice) -> Vec> { fn commands() -> &'static [Command] { fn new_commands() -> Vec { vec![ + Command { + allowed_states: FlagSet::full(), + enhanced_only: false, + testing_only: false, + no_abbrev: false, + name: "DESCRIPTIVES", + run: Box::new(|context| { + let cursor = context.lexer.cursor(); + println!("{}:{}", file!(), line!()); + while let Ok(subcommand) = >::from_tokens(&cursor) { + println!("{subcommand:?}"); + println!( + "{:?}", + DescriptivesSubcommand::from_tokens(&subcommand.0.cursor()) + ); + } + println!("{}:{}", file!(), line!()); + }), + }, Command { allowed_states: FlagSet::full(), enhanced_only: false, @@ -353,9 +504,9 @@ pub fn end_of_command(context: &Context, range: RangeFrom) -> Result, _state: State) { println!("{}:{}", file!(), line!()); match lexer.get_token(0) { - None | Some(Token::End) => (), - _ => { - if let Ok((command, n_tokens)) = parse_command_name(&mut lexer, error) { + None | Some(Token::End) => println!("{}:{}", file!(), line!()), + _ => match parse_command_name(&mut lexer, error) { + Ok((command, n_tokens)) => { let mut context = Context { error, lexer: lexer.subslice(n_tokens..lexer.len()), @@ -363,7 +514,8 @@ fn parse_in_state(mut lexer: TokenSlice, error: &Box, _state }; (command.run)(&mut context); } - } + Err(error) => println!("{error:?}"), + }, } } diff --git a/rust/pspp/src/engine.rs b/rust/pspp/src/engine.rs index 6ee4623303..6e9248dbb3 100644 --- a/rust/pspp/src/engine.rs +++ b/rust/pspp/src/engine.rs @@ -1,8 +1,6 @@ use crate::{ command::parse_command, - lex::{ - lexer::{TokenSlice, Source}, - }, + lex::lexer::{Source, TokenSlice}, macros::MacroSet, message::Diagnostic, }; @@ -15,17 +13,23 @@ impl Engine { } pub fn run(&mut self, mut source: Source) { let macros = MacroSet::new(); + println!("{}:{}", file!(), line!()); while let Some(tokens) = source.read_command(¯os) { + println!("{}:{}", file!(), line!()); let error: Box = Box::new(|diagnostic| { println!("{diagnostic}"); }); + println!("{}:{}", file!(), line!()); parse_command(TokenSlice::new(&tokens), &error); + println!("{}:{}", file!(), line!()); } } } #[cfg(test)] mod tests { + use std::sync::Arc; + use encoding_rs::UTF_8; use crate::lex::lexer::{Source, SourceFile}; @@ -35,10 +39,25 @@ mod tests { #[test] fn test_echo() { let mut engine = Engine::new(); - engine.run(Source::new_default(&SourceFile::for_file_contents( - "ECHO 'hi there'.\nECHO 'bye there'.\n".to_string(), - Some("test.sps".to_string()), - UTF_8, + engine.run(Source::new_default(&Arc::new( + SourceFile::for_file_contents( + "ECHO 'hi there'.\nECHO 'bye there'.\n".to_string(), + Some("test.sps".to_string()), + UTF_8, + ), + ))); + } + + #[test] + fn test_descriptives() { + println!("{}:{}", file!(), line!()); + let mut engine = Engine::new(); + engine.run(Source::new_default(&Arc::new( + SourceFile::for_file_contents( + "DESCRIPTIVES VARIABLES=a (za) b to c/MISSING=x y z/MISSING=VARIABLE INCLUDE/STATISTICS=DEFAULT/SAVE/SORT=SKEWNESS(A)\n".to_string(), + Some("test.sps".to_string()), + UTF_8, + ), ))); } } diff --git a/rust/pspp/src/lex/lexer.rs b/rust/pspp/src/lex/lexer.rs index 51721cbe0c..dd514acc05 100644 --- a/rust/pspp/src/lex/lexer.rs +++ b/rust/pspp/src/lex/lexer.rs @@ -9,7 +9,6 @@ use std::{ mem::take, ops::{Range, RangeInclusive}, path::Path, - ptr, sync::Arc, }; @@ -19,6 +18,8 @@ use thiserror::Error as ThisError; use unicode_width::{UnicodeWidthChar, UnicodeWidthStr}; use crate::{ + identifier::Identifier, + lex::scan::StringScanner, macros::{macro_tokens_to_syntax, MacroSet, ParseStatus, Parser}, message::{Category, Diagnostic, Location, Point, Severity}, settings::Settings, @@ -186,11 +187,11 @@ fn ellipsize(s: &str) -> Cow { } /// A token in a [`Source`]. -pub struct LexToken<'a> { +pub struct LexToken { /// The regular token. pub token: Token, - pub file: &'a SourceFile, + pub file: Arc, /// For a token obtained through the lexer in an ordinary way, this is the /// location of the token in the [`Source`]'s buffer. @@ -207,19 +208,25 @@ pub struct LexToken<'a> { macro_rep: Option, } +impl Debug for LexToken { + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { + self.token.fmt(f) + } +} + #[allow(dead_code)] struct LexError { error: ScanError, pos: Range, } -impl Borrow for LexToken<'_> { +impl Borrow for LexToken { fn borrow(&self) -> &Token { &self.token } } -impl LexToken<'_> { +impl LexToken { fn representation(&self) -> &str { &self.file.buffer[self.pos.clone()] } @@ -330,18 +337,18 @@ lis|.\0", } } */ -pub struct Tokens<'a> { - tokens: Vec>, +pub struct Tokens { + tokens: Vec, } -impl<'a> Tokens<'a> { - fn new(tokens: Vec>) -> Self { +impl Tokens { + fn new(tokens: Vec) -> Self { assert!(matches!(tokens.last().unwrap().token, Token::End)); Self { tokens } } } -impl Debug for Tokens<'_> { +impl Debug for Tokens { fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { write!(f, "Tokens {{ ")?; for (index, token) in self.tokens.iter().enumerate() { @@ -354,8 +361,22 @@ impl Debug for Tokens<'_> { } } +#[derive(Clone)] pub struct TokenSlice<'a> { - tokens: &'a [LexToken<'a>], + tokens: &'a [LexToken], +} + +impl<'a> Debug for TokenSlice<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { + write!(f, "TokenSlice {{ ")?; + for (index, token) in self.tokens[..self.tokens.len() - 1].iter().enumerate() { + if index > 0 { + write!(f, ", ")?; + } + write!(f, "{:?}", token.representation())?; + } + write!(f, " }}") + } } impl<'a> TokenSlice<'a> { @@ -369,11 +390,16 @@ impl<'a> TokenSlice<'a> { Cursor::new(self) } - pub fn get_token(&self, index: usize) -> Option<&Token> { - self.get(index).map(|token| &token.token) + pub fn get_token(&self, index: usize) -> Option<&'a Token> { + //self.get(index).map(|token| &token.token) + if index < self.len() { + Some(&self.tokens[index].token) + } else { + None + } } - pub fn get(&self, index: usize) -> Option<&LexToken> { + pub fn get(&self, index: usize) -> Option<&'a LexToken> { if index < self.len() { Some(&self.tokens[index]) } else { @@ -403,11 +429,11 @@ impl<'a> TokenSlice<'a> { self.tokens.last().unwrap() } - fn file(&self) -> Option<&SourceFile> { + fn file(&self) -> Option<&Arc> { let first = self.first(); let last = self.last(); - if ptr::eq(first.file, last.file) { - Some(first.file) + if Arc::ptr_eq(&first.file, &last.file) { + Some(&first.file) } else { None } @@ -520,7 +546,7 @@ impl<'a> TokenSlice<'a> { #[derive(Clone)] pub struct Cursor<'a> { - slice: &'a TokenSlice<'a>, + slice: TokenSlice<'a>, /// This allows [Self::force_string] etc. to advance while returning the /// token without cloning it. @@ -528,27 +554,75 @@ pub struct Cursor<'a> { } impl<'a> Cursor<'a> { - pub fn new(slice: &'a TokenSlice<'a>) -> Self { + pub fn new(slice: &TokenSlice<'a>) -> Self { Self { - slice, + slice: slice.clone(), pos: Cell::new(0), } } + pub fn get_pos(&self) -> usize { + self.pos.get() + } + + pub fn set_pos(&self, position: usize) { + self.pos.set(position); + } + + pub fn with_pos(&self, f: F) -> Result + where + F: FnOnce() -> Result, + { + let position = self.get_pos(); + let retval = f(); + if retval.is_err() { + self.set_pos(position); + } + retval + } + + pub fn subcursor(&self, range: Range) -> Cursor<'a> { + Self::new(&self.slice.subslice(range)) + } + pub fn remainder(&self) -> TokenSlice<'a> { self.slice.subslice(self.pos.get()..self.slice.len()) } + pub fn take_remainder(&self) -> TokenSlice<'a> { + let remainder = self.remainder(); + self.pos.set(self.slice.len()); + remainder + } + pub fn force_string(&self) -> Result<&str, Diagnostic> { - let pos = self.pos.get(); - if let Some(Token::String(s)) = self.slice.get_token(pos) { - self.pos.set(pos + 1); + if let Some(Token::String(s)) = self.token() { + self.next(); Ok(s.as_str()) } else { Err(self.error("Syntax error expecting string.")) } } + pub fn force_id(&self) -> Result<&'a Identifier, Diagnostic> { + if let Some(Token::Id(id)) = self.token() { + self.next(); + Ok(id) + } else { + Err(self.error("Syntax error expecting identifier.")) + } + } + + pub fn force(&self, token: &Token) -> Result<(), Diagnostic> { + match self.token() { + Some(t) if t == token => { + self.next(); + Ok(()) + } + _ => Err(self.error(format!("Syntax error expecting {token}."))), + } + } + pub fn error(&self, text: S) -> Diagnostic where S: ToString, @@ -590,15 +664,20 @@ impl<'a> Cursor<'a> { } pub fn match_keyword(&self, keyword: &str) -> bool { - self.token() - .map_or(false, |token| token.matches_keyword(keyword)) + if let Some(token) = self.token() { + if token.matches_keyword(keyword) { + self.next(); + return true; + } + } + false } pub fn at_end(&self) -> bool { self.pos.get() >= self.slice.len() } - pub fn token(&self) -> Option<&Token> { + pub fn token(&self) -> Option<&'a Token> { self.slice.get_token(self.pos.get()) } @@ -614,26 +693,38 @@ impl<'a> Cursor<'a> { } } - pub fn match_syntax(&self, _syntax: &str) -> bool { - todo!() + pub fn match_syntax(&self, syntax: &str) -> bool { + self.with_pos(|| { + let syntax_scanner = StringScanner::new(syntax, Syntax::Interactive, true); + for scan_token in syntax_scanner { + let ScanToken::Token(token) = scan_token else { + unreachable!() + }; + if !self.match_(&token) { + return Err(()); + }; + } + Ok(()) + }) + .is_ok() } } -pub struct Source<'a> { - file: &'a SourceFile, +pub struct Source { + file: Arc, segmenter: Segmenter, seg_pos: usize, - lookahead: VecDeque>, + lookahead: VecDeque, } -impl<'a> Source<'a> { - pub fn new_default(file: &'a SourceFile) -> Self { +impl Source { + pub fn new_default(file: &Arc) -> Self { Self::new(file, Syntax::default()) } - pub fn new(file: &'a SourceFile, syntax: Syntax) -> Self { + pub fn new(file: &Arc, syntax: Syntax) -> Self { Self { - file, + file: file.clone(), segmenter: Segmenter::new(syntax, false), seg_pos: 0, lookahead: VecDeque::new(), @@ -642,6 +733,10 @@ impl<'a> Source<'a> { pub fn read_command(&mut self, macros: &MacroSet) -> Option { loop { + println!("{}:{}", file!(), line!()); + for token in self.lookahead.iter() { + println!("{}", &token.token); + } if let Some(end) = self .lookahead .iter() @@ -649,8 +744,18 @@ impl<'a> Source<'a> { { return Some(Tokens::new(self.lookahead.drain(..=end).collect())); } + println!("{}:{}", file!(), line!()); if !self.read_lookahead(macros) { - return None; + if self.lookahead.is_empty() { + return None; + } + let len = self.file.buffer.len(); + self.lookahead.push_back(LexToken { + token: Token::End, + file: self.file.clone(), + pos: len..len, + macro_rep: None, + }); } } } @@ -671,7 +776,7 @@ impl<'a> Source<'a> { Some(ScanToken::Token(token)) => { let end = token == Token::End; pp.push_back(LexToken { - file: self.file, + file: self.file.clone(), token, pos, macro_rep: None, @@ -707,7 +812,7 @@ impl<'a> Source<'a> { let first = &merge[0]; let last = &merge[n - 1]; self.lookahead.push_back(LexToken { - file: self.file, + file: self.file.clone(), token, pos: first.pos.start..last.pos.end, macro_rep: match (&first.macro_rep, &last.macro_rep) { @@ -730,8 +835,8 @@ impl<'a> Source<'a> { fn expand_macro( &self, macros: &MacroSet, - src: &mut VecDeque>, - dst: &mut VecDeque>, + src: &mut VecDeque, + dst: &mut VecDeque, ) { // Now pass tokens one-by-one to the macro expander. let Some(mut parser) = Parser::new(macros, &src[0].token) else { @@ -781,7 +886,7 @@ impl<'a> Source<'a> { let macro_rep = Arc::new(macro_rep); for (index, token) in expansion.into_iter().enumerate() { let lt = LexToken { - file: self.file, + file: self.file.clone(), token: token.token, pos: c0.pos.start..c1.pos.end, macro_rep: Some(MacroRepresentation { @@ -797,6 +902,8 @@ impl<'a> Source<'a> { #[cfg(test)] mod new_lexer_tests { + use std::sync::Arc; + use encoding_rs::UTF_8; use crate::macros::MacroSet; @@ -812,11 +919,11 @@ END DATA. CROSSTABS VARIABLES X (1,7) Y (1,7) /TABLES X BY Y. "#; - let file = SourceFile::for_file_contents( + let file = Arc::new(SourceFile::for_file_contents( String::from(code), Some(String::from("crosstabs.sps")), UTF_8, - ); + )); let mut source = Source::new_default(&file); while let Some(tokens) = source.read_command(&MacroSet::new()) { println!("{tokens:?}"); diff --git a/rust/pspp/src/lex/segment/test.rs b/rust/pspp/src/lex/segment/test.rs index 3e01ee3ee8..79f92fed36 100644 --- a/rust/pspp/src/lex/segment/test.rs +++ b/rust/pspp/src/lex/segment/test.rs @@ -122,6 +122,17 @@ fn print_segmentation(mut input: &str) { } } +#[test] +fn test_end_command() { + check_segmentation( + r#"DATA LIST/ X 1 +"#, + Syntax::Auto, + &[], + &[], + ); +} + #[test] fn test_identifiers() { check_segmentation( diff --git a/rust/pspp/src/message.rs b/rust/pspp/src/message.rs index ffc09c142c..aa7aef640a 100644 --- a/rust/pspp/src/message.rs +++ b/rust/pspp/src/message.rs @@ -1,6 +1,6 @@ use std::{ cmp::{max, min}, - fmt::{Display, Formatter, Result as FmtResult}, + fmt::{Debug, Display, Formatter, Result as FmtResult}, ops::Range, sync::Arc, }; @@ -250,3 +250,9 @@ impl Display for Diagnostic { Ok(()) } } + +impl Debug for Diagnostic { + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { + Display::fmt(&self, f) + } +}