From 875d1e7635749feab0a20b333a350fbdf3fe20d3 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 1 Dec 2024 11:16:25 -0800 Subject: [PATCH] descriptives parser works --- rust/pspp-derive/src/lib.rs | 38 ++++- rust/pspp/src/command.rs | 319 ++++++++++++++++++++++-------------- rust/pspp/src/engine.rs | 7 +- rust/pspp/src/lex/lexer.rs | 111 ++----------- rust/pspp/src/message.rs | 10 +- 5 files changed, 246 insertions(+), 239 deletions(-) diff --git a/rust/pspp-derive/src/lib.rs b/rust/pspp-derive/src/lib.rs index d5b344de8b..5ea20581a9 100644 --- a/rust/pspp-derive/src/lib.rs +++ b/rust/pspp-derive/src/lib.rs @@ -29,14 +29,26 @@ fn parse_derive_input(ast: DeriveInput) -> Result { fn derive_enum(ast: &DeriveInput, e: &DataEnum) -> Result { let struct_attrs = StructAttrs::parse(&ast.attrs)?; let mut body = TokenStream2::new(); + let mut variants = Vec::new(); + let mut default = None; for (index, variant) in e.variants.iter().enumerate() { let field_attrs = FieldAttrs::parse(&variant.attrs)?; + if field_attrs.default { + if default.is_none() { + default = Some(index); + } else { + return Err(Error::new(variant.span(), "Duplicate default variant")); + } + } + variants.push((variant, field_attrs)); + } + for (index, (variant, field_attrs)) in variants.iter().enumerate() { if index > 0 { body.extend(quote! { else }.into_iter()); } let ident = &variant.ident; let ident_string = ident.to_string(); - let match_expr = if let Some(syntax) = field_attrs.syntax { + let match_expr = if let Some(syntax) = &field_attrs.syntax { quote! { input.skip_syntax(#syntax) } } else if ident_string.eq_ignore_ascii_case("all") { quote! { input.skip(&Token::Punct(Punct::All))} @@ -45,15 +57,22 @@ fn derive_enum(ast: &DeriveInput, e: &DataEnum) -> Result { }; let construction = construct_fields(&variant.fields, quote! { Self::#ident}, true); let check_equals = if struct_attrs.required_equals && !variant.fields.is_empty() { - quote! { let ((), input) = parse_token(input, &Token::Punct(Punct::Equals)).mismatch_to_error()?; } + quote! { let (Parsed { value: (), rest: input, diagnostics: _}) = parse_token(input, &Token::Punct(Punct::Equals)).mismatch_to_error()?; } } else { quote! {} }; + body.extend(quote! { if let Some(input) = #match_expr { #check_equals #construction } }); + } + if let Some(default) = default { + let (variant, _field_attrs) = &variants[default]; + let ident = &variant.ident; + let construction = construct_fields(&variant.fields, quote! { Self::#ident}, true); + body.extend(quote! { else { #construction } }); + } else { body.extend( - quote! { if let Some(input) = #match_expr { #check_equals #construction } }, + quote! { else { Err(ParseError::Mismatch(input.error("Syntax error.").into())) } }, ); } - body.extend(quote! { else { Err(ParseError::Mismatch(input.error("Syntax error."))) } }); let name = &ast.ident; let lifetime = struct_attrs.lifetime(); @@ -82,7 +101,7 @@ fn construct_fields( for (index, _field) in fields.iter().enumerate() { let varname = format_ident!("field{index}"); construction - .extend(quote! { let (#varname, input) = FromTokens::from_tokens(input) #convert ?; }); + .extend(quote! { let Parsed { value: #varname, rest: input, diagnostics: _ } = FromTokens::from_tokens(input) #convert ?; }); } match fields { Fields::Named(named) => { @@ -92,7 +111,7 @@ fn construct_fields( let field_name = &field.ident; body.extend(quote! { #field_name: #varname, }); } - quote! { #construction Ok((#name { #body }, input)) } + quote! { #construction Ok(Parsed::ok(#name { #body }, input)) } } Fields::Unnamed(unnamed) => { let mut body = TokenStream2::new(); @@ -100,9 +119,9 @@ fn construct_fields( let varname = format_ident!("field{index}"); body.extend(quote! { #varname, }); } - quote! { #construction Ok((#name ( #body ), input)) } + quote! { #construction Ok(Parsed::ok(#name ( #body ), input)) } } - Fields::Unit => quote! { Ok((#name, input)) }, + Fields::Unit => quote! { Ok(Parsed::ok(#name, input)) }, } } @@ -125,6 +144,7 @@ fn derive_struct(ast: &DeriveInput, s: &DataStruct) -> Result, + default: bool, } impl FieldAttrs { @@ -141,6 +161,8 @@ impl FieldAttrs { let syntax = meta.input.parse::()?; //println!("{}:{} {:?} {:?}", file!(), line!(), meta.path, meta.input); field_attrs.syntax = Some(syntax); + } else if meta.path.is_ident("default") { + field_attrs.default = true; } else { return Err(Error::new(meta.path.span(), "Unknown attribute")); } diff --git a/rust/pspp/src/command.rs b/rust/pspp/src/command.rs index a1e4be5e0a..143d60f6aa 100644 --- a/rust/pspp/src/command.rs +++ b/rust/pspp/src/command.rs @@ -46,73 +46,61 @@ struct Command { run: Box, //-> Box + Send + Sync>, } -/* -struct Subcommand { - name: Identifier, - tokens: Vec, +#[derive(Debug)] +enum ParseError { + Error(Diagnostics), + Mismatch(Diagnostics), } -fn collect_subcommands(context: &mut Context) -> Vec { - let mut subcommands = Vec::new(); - while !context.lexer.at_end() { - let Some(name) = context.force_id() else { - todo!() - }; - let mut tokens = Vec::new(); - context.lexer.match_(&Token::Punct(Punct::Equals)); - while !context.lexer.at_end() && !context.lexer.match_(&Token::Punct(Punct::Slash)) { - tokens.push(context.lexer.token().clone()); - context.lexer.get(); +#[derive(Debug)] +struct Parsed<'a, T> { + value: T, + rest: TokenSlice<'a>, + diagnostics: Diagnostics, +} + +impl<'a, T> Parsed<'a, T> { + pub fn new(value: T, rest: TokenSlice<'a>, warnings: Diagnostics) -> Self { + Self { + value, + rest: rest, + diagnostics: warnings, } - subcommands.push(Subcommand { name, tokens }); } - subcommands -} - -struct DescriptivesCommand { - variables: Variables, - missing: Option, - save: bool, - statistics: Option, - sort: Option, -} - -struct Variables(Vec); - -struct Variable { - var1: Identifier, - var2: Option, - zname: Option, -} - -fn parse_descriptives(context: &mut Context) { - let subcommands = collect_subcommands(context); - for subcommand in subcommands { - + pub fn ok(value: T, rest: TokenSlice<'a>) -> Self { + Self { + value, + rest: rest, + diagnostics: Diagnostics::default(), + } + } + pub fn into_tuple(self) -> (T, TokenSlice<'a>, Diagnostics) { + (self.value, self.rest, self.diagnostics) + } + pub fn map(self, f: F) -> Parsed<'a, R> + where + F: FnOnce(T) -> R, + { + Parsed { + value: f(self.value), + rest: self.rest, + diagnostics: self.diagnostics, + } + } + pub fn warn(self, mut warnings: Diagnostics) -> Self { + Self { + value: self.value, + rest: self.rest, + diagnostics: { + let mut vec = self.diagnostics.0; + vec.append(&mut warnings.0); + Diagnostics(vec) + }, + } } } -trait ParsedCommand { - fn format(&self) -> String; -} - */ - -/* - - - - -struct Subcommand { - name: &str, -}*/ - -#[derive(Debug)] -enum ParseError { - Error(Diagnostics), - Mismatch(Diagnostics), -} - -type ParseResult<'a, T> = Result<(T, TokenSlice<'a>, Diagnostics), ParseError>; +type ParseResult<'a, T> = Result, ParseError>; trait MismatchToError { fn mismatch_to_error(self) -> Self; @@ -142,8 +130,8 @@ where Self: Sized, { match T::from_tokens(input) { - Ok((value, rest, diagnostics)) => Ok((Some(value), rest, diagnostics)), - Err(ParseError::Mismatch(_)) => Ok((None, input)), + Ok(p) => Ok(p.map(Some)), + Err(ParseError::Mismatch(_)) => Ok(Parsed::ok(None, input)), Err(ParseError::Error(error)) => Err(ParseError::Error(error)), } } @@ -157,18 +145,28 @@ where where Self: Sized, { - let mut vector = Vec::new(); + let mut values_vec = Vec::new(); + let mut warnings_vec = Vec::new(); loop { match T::from_tokens(input) { - Ok((value, rest)) => { - vector.push(value); + Ok(Parsed { + value, + rest, + diagnostics: mut warnings, + }) => { + values_vec.push(value); + warnings_vec.append(&mut warnings.0); input = rest; } Err(ParseError::Mismatch(_)) => break, Err(ParseError::Error(e)) => return Err(ParseError::Error(e)), } } - Ok((vector, input)) + Ok(Parsed { + value: values_vec, + rest: input, + diagnostics: Diagnostics(warnings_vec), + }) } } @@ -177,24 +175,10 @@ impl<'a> FromTokens<'a> for TokenSlice<'a> { where Self: Sized, { - Ok((input, input.end())) + Ok(Parsed::ok(input, input.end())) } } -/* -impl < 'a > FromTokens < 'a > for DescriptivesSubcommand<'a> -{ - fn from_tokens(cursor : & Cursor < 'a >) -> Result < Self, Diagnostic > - { - Ok(if cursor.match_keyword("Variables") { Self :: Variables(cursor.take_remainder()) } else if - cursor.match_keyword("Missing") { Self :: Missing(cursor.take_remainder()) } else if - cursor.match_keyword("Save") { Self :: Save } else if - cursor.match_keyword("Statistics") { Self :: Statistics(Vec::new()) } else if - cursor.match_keyword("Sort") { Self :: Sort(Sort::from_tokens(cursor)?) } else - { return Err(cursor.error("Syntax error.")); }) - } -}*/ - #[derive(Debug, FromTokens)] #[pspp(add_lifetime)] struct Descriptives<'a> { @@ -215,24 +199,31 @@ where let start = input.skip_until(|token| token != &Token::Punct(Punct::Slash)); if start.is_empty() { return Err(ParseError::Error( - input.error("Syntax error at end of input."), + input.error("Syntax error at end of input.").into(), )); } let end = start.skip_to(&Token::Punct(Punct::Slash)); let subcommand = start.subslice(0..start.len() - end.len()); - let (value, rest) = T::from_tokens(subcommand)?; - Ok((Self(value), end)) + let (value, rest, mut warnings) = T::from_tokens(subcommand)?.into_tuple(); + if !rest.is_empty() { + warnings + .0 + .push(rest.warning("Syntax error expecting end of subcommand.")); + } + Ok(Parsed::new(Self(value), end, warnings)) } } #[derive(Debug, FromTokens)] #[pspp(add_lifetime, required_equals)] enum DescriptivesSubcommand<'a> { - Variables(Vec>), + #[pspp(default)] + Variables(Vec>), Missing(Vec), Save, Statistics(Vec), Sort(Sort), + Format(Vec), } #[derive(Debug, FromTokens)] @@ -242,10 +233,20 @@ enum Missing { Include, } +#[derive(Debug, FromTokens)] +enum Format { + Labels, + NoLabels, + Index, + NoIndex, + Line, + Serial, +} + #[derive(Debug, FromTokens)] #[pspp(add_lifetime)] -struct DescriptivesVarRange<'a> { - vars: VarRange<'a>, +struct DescriptivesVars<'a> { + vars: Vars<'a>, z_name: Option>, } @@ -260,51 +261,60 @@ where where Self: Sized, { - let ((), input) = parse_token(input, &Token::Punct(Punct::LParen))?; - let (inner, input) = T::from_tokens(input)?; - let ((), input) = parse_token(input, &Token::Punct(Punct::RParen))?; - Ok((Self(inner), input)) + let ((), rest, _) = parse_token(input, &Token::Punct(Punct::LParen))?.into_tuple(); + let (value, rest, warnings) = T::from_tokens(rest)?.into_tuple(); + let ((), rest, _) = parse_token(rest, &Token::Punct(Punct::RParen))?.into_tuple(); + Ok(Parsed { + value: Self(value), + rest, + diagnostics: warnings, + }) } } fn parse_token<'a>(input: TokenSlice<'a>, token: &Token) -> ParseResult<'a, ()> { if let Some(rest) = input.skip(token) { - Ok(((), rest)) + Ok(Parsed::ok((), rest)) } else { Err(ParseError::Mismatch( - input.error(format!("expecting {token}")), + input.error(format!("expecting {token}")).into(), )) } } fn parse_keyword<'a>(input: TokenSlice<'a>, keyword: &str) -> ParseResult<'a, ()> { if let Some(rest) = input.skip_if(|token| token.matches_keyword(keyword)) { - Ok(((), rest)) + Ok(Parsed::ok((), rest)) } else { Err(ParseError::Mismatch( - input.error(format!("expecting {keyword}")), + input.error(format!("expecting {keyword}")).into(), )) } } #[derive(Debug)] -struct VarRange<'a> { - from: &'a Identifier, - to: Option<&'a Identifier>, +enum Vars<'a> { + Single(&'a Identifier), + Range(&'a Identifier, &'a Identifier), + All, } -impl<'a> FromTokens<'a> for VarRange<'a> { +impl<'a> FromTokens<'a> for Vars<'a> { fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> where Self: Sized, { - let (from, input) = parse_id(input)?; - if let Ok(((), input)) = parse_token(input, &Token::Punct(Punct::To)) { - if let Ok((to, input)) = parse_id(input) { - return Ok((Self { from, to: Some(to) }, input)); + if let Ok(Parsed { rest, .. }) = parse_token(input, &Token::Punct(Punct::All)) { + Ok(Parsed::ok(Self::All, rest)) + } else { + let (from, rest, _) = parse_id(input)?.into_tuple(); + if let Ok(Parsed { rest, .. }) = parse_token(rest, &Token::Punct(Punct::To)) { + if let Ok(p) = parse_id(rest) { + return Ok(p.map(|to| Self::Range(from, to))); + } } + Ok(Parsed::ok(Self::Single(from), rest)) } - Ok((Self { from, to: None }, input)) } } @@ -315,10 +325,10 @@ fn parse_id<'a>(input: TokenSlice<'a>) -> ParseResult<'a, &'a Identifier> { .. }) = iter.next() { - Ok((id, iter.remainder())) + Ok(Parsed::ok(id, iter.remainder())) } else { Err(ParseError::Mismatch( - input.error("Syntax error expecting identifier."), + input.error("Syntax error expecting identifier.").into(), )) } } @@ -393,13 +403,18 @@ fn commands() -> &'static [Command] { no_abbrev: false, name: "DESCRIPTIVES", run: Box::new(|context| { - println!("{}:{}", file!(), line!()); let mut input = context.lexer; - loop { - match >::from_tokens(input) { - Ok((subcommand, rest)) => { - println!("{subcommand:?}"); - println!("{:?}", DescriptivesSubcommand::from_tokens(subcommand.0)); + while !input.is_empty() { + match >::from_tokens(input) { + Ok(Parsed { + value: subcommand, + rest, + diagnostics, + }) => { + println!("\n{subcommand:?}"); + //println!("rest: {rest:?}"); + println!("warnings: {diagnostics:?}"); + //println!("{:?}", DescriptivesSubcommand::from_tokens(subcommand.0)); input = rest; } Err(error) => { @@ -408,7 +423,6 @@ fn commands() -> &'static [Command] { } } } - println!("{}:{}", file!(), line!()); }), }, Command { @@ -419,16 +433,6 @@ fn commands() -> &'static [Command] { name: "ECHO", run: Box::new(|_context| todo!()), }, - /* - Command { - allowed_states: State::Data.into(), - enhanced_only: false, - testing_only: false, - no_abbrev: false, - name: "DESCRIPTIVES", - run: Box::new(parse_descriptives), - }, - */ ] } @@ -531,9 +535,8 @@ pub fn end_of_command(context: &Context, range: RangeFrom) -> Result, _state: State) { - println!("{}:{}", file!(), line!()); match lexer.get_token(0) { - None | Some(Token::End) => println!("{}:{}", file!(), line!()), + None | Some(Token::End) => (), _ => match parse_command_name(&mut lexer, error) { Ok((command, n_tokens)) => { let mut context = Context { @@ -563,3 +566,69 @@ impl<'a> Context<'a> { (self.error)(diagnostic); } } + +#[cfg(test)] +mod tests { + mod descriptives { + use std::sync::Arc; + + use encoding_rs::UTF_8; + + use crate::{ + engine::Engine, + lex::lexer::{Source, SourceFile}, + }; + + fn test(syntax: &str) { + let mut engine = Engine::new(); + engine.run(Source::new_default(&Arc::new( + SourceFile::for_file_contents( + syntax.to_string(), + Some("test.sps".to_string()), + UTF_8, + ), + ))); + } + + #[test] + fn basics() { + test("descript a to b (c) all/stat=all/format=serial."); + } + + #[test] + fn include_missing() { + test("descript all/stat=all/format=serial/missing=include."); + } + + #[test] + fn include_missing_listwise() { + test("descript all/stat=all/format=serial/missing=listwise."); + test("descript all/stat=all/format=serial/missing=listwise include."); + } + + #[test] + fn mean_only() { + test("descript all/stat=mean."); + } + + #[test] + fn z_scores() { + test("DESCRIPTIVES /VAR=a b /SAVE."); + } + + #[test] + fn syntax_errors() { + test("\ +DESCRIPTIVES MISSING=**. +DESCRIPTIVES FORMAT=**. +DESCRIPTIVES STATISTICS=**. +DESCRIPTIVES SORT=**. +DESCRIPTIVES SORT=NAME (**). +DESCRIPTIVES SORT=NAME (A **). +DESCRIPTIVES **. +DESCRIPTIVES x/ **. +DESCRIPTIVES MISSING=INCLUDE. +"); + } + } +} diff --git a/rust/pspp/src/engine.rs b/rust/pspp/src/engine.rs index 6e9248dbb3..28982e4007 100644 --- a/rust/pspp/src/engine.rs +++ b/rust/pspp/src/engine.rs @@ -13,15 +13,11 @@ impl Engine { } pub fn run(&mut self, mut source: Source) { let macros = MacroSet::new(); - println!("{}:{}", file!(), line!()); while let Some(tokens) = source.read_command(¯os) { - println!("{}:{}", file!(), line!()); let error: Box = Box::new(|diagnostic| { println!("{diagnostic}"); }); - println!("{}:{}", file!(), line!()); parse_command(TokenSlice::new(&tokens), &error); - println!("{}:{}", file!(), line!()); } } } @@ -50,11 +46,10 @@ mod tests { #[test] fn test_descriptives() { - println!("{}:{}", file!(), line!()); let mut engine = Engine::new(); engine.run(Source::new_default(&Arc::new( SourceFile::for_file_contents( - "DESCRIPTIVES VARIABLES=a (za) b to c/MISSING=x y z/MISSING=VARIABLE INCLUDE/STATISTICS=DEFAULT/SAVE/SORT=SKEWNESS(A)\n".to_string(), + "DESCRIPTIVES VARIABLES=a (za) b to c/MISSING=x y z/MISSING=VARIABLE INCLUDE/STATISTICS=DEFAULT/SAVE/SORT=SKEWNESS (A)\n".to_string(), Some("test.sps".to_string()), UTF_8, ), diff --git a/rust/pspp/src/lex/lexer.rs b/rust/pspp/src/lex/lexer.rs index f9c1ced0bc..6ffddc35ad 100644 --- a/rust/pspp/src/lex/lexer.rs +++ b/rust/pspp/src/lex/lexer.rs @@ -244,96 +244,6 @@ pub enum Error { TokenError(#[from] ScanError), } -/* -#[cfg(test)] -mod tests { - use encoding_rs::UTF_8; - - use crate::lex::token::Token; - - use super::{Lexer, NewLexer, Source, SourceFile}; - - #[test] - fn test() { - let mut lexer = NewLexer::new(Box::new(|location, error| println!("{location}: {error}"))); - lexer.include(Source::new_default(SourceFile::for_string( - String::from( - r#"#! /usr/local/bin/pspp -DATA LIST LIST NOTABLE /a. -BEGIN DATA. -1 -2 -END DATA. -LIST. -"#, - ), - UTF_8, - ))); - while let Some(tokens) = lexer.read_command() { - - loop { - lexer.get(); - let token = lexer.token(); - println!("{token:?}"); - if let Token::End = token { - break; - } - } - } - - #[test] - fn test_scan_errors() { - let mut lexer = Lexer::new(Box::new(|location, error| println!("{location}: {error}"))); - lexer.include(Source::new_default(SourceFile::for_file_contents( - String::from( - r#"x'123' -x'1x' -u'' -u'012345678' -u'd800' -u'110000' -'foo -'very long unterminated string that be ellipsized in its error message -1e .x -^ -� -"#, - ), - Some(String::from("syntax.sps")), - UTF_8, - ))); - loop { - lexer.get(); - let token = lexer.token(); - println!("{token:?}"); - if let Token::End = token { - break; - } - } - } - - #[test] - fn test_null_byte() { - let mut lexer = Lexer::new(Box::new(|location, error| println!("{location}: {error}"))); - lexer.include(Source::new_default(SourceFile::for_file_contents( - String::from( - "datA dist list notable file='input.txt'/a b c. -lis|.\0", - ), - Some(String::from("syntax.sps")), - UTF_8, - ))); - loop { - lexer.get(); - let token = lexer.token(); - println!("{token:?}"); - if let Token::End = token { - break; - } - } - } -} -*/ pub struct Tokens { tokens: Vec, } @@ -433,6 +343,13 @@ impl<'a> TokenSlice<'a> { self.diagnostic(Severity::Error, text.to_string()) } + pub fn warning(&self, text: S) -> Diagnostic + where + S: ToString, + { + self.diagnostic(Severity::Warning, text.to_string()) + } + pub fn subslice(&self, range: Range) -> Self { debug_assert!(range.start <= range.end); debug_assert!(range.end <= self.len()); @@ -547,10 +464,13 @@ impl<'a> TokenSlice<'a> { let syntax_scanner = StringScanner::new(syntax, Syntax::Interactive, true); let mut input = *self; for scan_token in syntax_scanner { - let ScanToken::Token(token) = scan_token else { - unreachable!() + let token = match scan_token { + ScanToken::Token(token) => token, + ScanToken::Error(error) => { + unreachable!("syntax parameter {syntax:?} contains PSPP syntax error ({error})") + } }; - input = self.skip(&token)?; + input = input.skip(&token)?; } Some(input) } @@ -639,10 +559,6 @@ impl Source { pub fn read_command(&mut self, macros: &MacroSet) -> Option { loop { - println!("{}:{}", file!(), line!()); - for token in self.lookahead.iter() { - println!("{}", &token.token); - } if let Some(end) = self .lookahead .iter() @@ -650,7 +566,6 @@ impl Source { { return Some(Tokens::new(self.lookahead.drain(..=end).collect())); } - println!("{}:{}", file!(), line!()); if !self.read_lookahead(macros) { if self.lookahead.is_empty() { return None; diff --git a/rust/pspp/src/message.rs b/rust/pspp/src/message.rs index 58e9671a26..5b99802bf5 100644 --- a/rust/pspp/src/message.rs +++ b/rust/pspp/src/message.rs @@ -164,8 +164,14 @@ pub struct Stack { description: String, } -#[derive(Debug)] -pub struct Diagnostics(Vec); +#[derive(Debug, Default)] +pub struct Diagnostics(pub Vec); + +impl From for Diagnostics { + fn from(value: Diagnostic) -> Self { + Self(vec![value]) + } +} pub struct Diagnostic { pub severity: Severity, -- 2.30.2