From: Ben Pfaff Date: Sun, 1 Dec 2024 19:17:21 +0000 (-0800) Subject: move around command module X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=53103e8aa6fca53ac6ce0cb5802b9a007616356e;p=pspp move around command module --- diff --git a/rust/pspp/src/command.rs b/rust/pspp/src/command.rs deleted file mode 100644 index 143d60f6aa..0000000000 --- a/rust/pspp/src/command.rs +++ /dev/null @@ -1,634 +0,0 @@ -#![allow(dead_code)] -use std::{fmt::Write, ops::RangeFrom, sync::OnceLock}; - -use flagset::{flags, FlagSet}; -use pspp_derive::FromTokens; - -use crate::{ - identifier::Identifier, - integer::ToInteger, - lex::{ - command_name::CommandMatcher, - lexer::{LexToken, TokenSlice}, - token::{Punct, Token}, - }, - message::{Diagnostic, Diagnostics}, -}; - -flags! { - enum State: u8 { - /// No active dataset yet defined. - Initial, - - /// Active dataset has been defined. - Data, - - /// Inside `INPUT PROGRAM`. - InputProgram, - - /// Inside `FILE TYPE`. - FileType, - - /// State nested inside `LOOP` or `DO IF`, inside [State::Data]. - NestedData, - - /// State nested inside `LOOP` or `DO IF`, inside [State::InputProgram]. - NestedInputProgram, - } -} - -struct Command { - allowed_states: FlagSet, - enhanced_only: bool, - testing_only: bool, - no_abbrev: bool, - name: &'static str, - run: Box, //-> Box + Send + Sync>, -} - -#[derive(Debug)] -enum ParseError { - Error(Diagnostics), - Mismatch(Diagnostics), -} - -#[derive(Debug)] -struct Parsed<'a, T> { - value: T, - rest: TokenSlice<'a>, - diagnostics: Diagnostics, -} - -impl<'a, T> Parsed<'a, T> { - pub fn new(value: T, rest: TokenSlice<'a>, warnings: Diagnostics) -> Self { - Self { - value, - rest: rest, - diagnostics: warnings, - } - } - pub fn ok(value: T, rest: TokenSlice<'a>) -> Self { - Self { - value, - rest: rest, - diagnostics: Diagnostics::default(), - } - } - pub fn into_tuple(self) -> (T, TokenSlice<'a>, Diagnostics) { - (self.value, self.rest, self.diagnostics) - } - pub fn map(self, f: F) -> Parsed<'a, R> - where - F: FnOnce(T) -> R, - { - Parsed { - value: f(self.value), - rest: self.rest, - diagnostics: self.diagnostics, - } - } - pub fn warn(self, mut warnings: Diagnostics) -> Self { - Self { - value: self.value, - rest: self.rest, - diagnostics: { - let mut vec = self.diagnostics.0; - vec.append(&mut warnings.0); - Diagnostics(vec) - }, - } - } -} - -type ParseResult<'a, T> = Result, ParseError>; - -trait MismatchToError { - fn mismatch_to_error(self) -> Self; -} - -impl<'a, T> MismatchToError for ParseResult<'a, T> { - fn mismatch_to_error(self) -> Self { - match self { - Err(ParseError::Mismatch(diagnostic)) => Err(ParseError::Error(diagnostic)), - rest => rest, - } - } -} - -trait FromTokens<'a> { - fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> - where - Self: Sized; -} - -impl<'a, T> FromTokens<'a> for Option -where - T: FromTokens<'a>, -{ - fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> - where - Self: Sized, - { - match T::from_tokens(input) { - Ok(p) => Ok(p.map(Some)), - Err(ParseError::Mismatch(_)) => Ok(Parsed::ok(None, input)), - Err(ParseError::Error(error)) => Err(ParseError::Error(error)), - } - } -} - -impl<'a, T> FromTokens<'a> for Vec -where - T: FromTokens<'a>, -{ - fn from_tokens(mut input: TokenSlice<'a>) -> ParseResult<'a, Self> - where - Self: Sized, - { - let mut values_vec = Vec::new(); - let mut warnings_vec = Vec::new(); - loop { - match T::from_tokens(input) { - Ok(Parsed { - value, - rest, - diagnostics: mut warnings, - }) => { - values_vec.push(value); - warnings_vec.append(&mut warnings.0); - input = rest; - } - Err(ParseError::Mismatch(_)) => break, - Err(ParseError::Error(e)) => return Err(ParseError::Error(e)), - } - } - Ok(Parsed { - value: values_vec, - rest: input, - diagnostics: Diagnostics(warnings_vec), - }) - } -} - -impl<'a> FromTokens<'a> for TokenSlice<'a> { - fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> - where - Self: Sized, - { - Ok(Parsed::ok(input, input.end())) - } -} - -#[derive(Debug, FromTokens)] -#[pspp(add_lifetime)] -struct Descriptives<'a> { - subcommands: Vec>>, -} - -#[derive(Debug)] -struct Subcommand(pub T); - -impl<'a, T> FromTokens<'a> for Subcommand -where - T: FromTokens<'a>, -{ - fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> - where - Self: Sized, - { - let start = input.skip_until(|token| token != &Token::Punct(Punct::Slash)); - if start.is_empty() { - return Err(ParseError::Error( - input.error("Syntax error at end of input.").into(), - )); - } - let end = start.skip_to(&Token::Punct(Punct::Slash)); - let subcommand = start.subslice(0..start.len() - end.len()); - let (value, rest, mut warnings) = T::from_tokens(subcommand)?.into_tuple(); - if !rest.is_empty() { - warnings - .0 - .push(rest.warning("Syntax error expecting end of subcommand.")); - } - Ok(Parsed::new(Self(value), end, warnings)) - } -} - -#[derive(Debug, FromTokens)] -#[pspp(add_lifetime, required_equals)] -enum DescriptivesSubcommand<'a> { - #[pspp(default)] - Variables(Vec>), - Missing(Vec), - Save, - Statistics(Vec), - Sort(Sort), - Format(Vec), -} - -#[derive(Debug, FromTokens)] -enum Missing { - Variable, - Listwise, - Include, -} - -#[derive(Debug, FromTokens)] -enum Format { - Labels, - NoLabels, - Index, - NoIndex, - Line, - Serial, -} - -#[derive(Debug, FromTokens)] -#[pspp(add_lifetime)] -struct DescriptivesVars<'a> { - vars: Vars<'a>, - z_name: Option>, -} - -#[derive(Debug)] -struct InParens(pub T); - -impl<'a, T> FromTokens<'a> for InParens -where - T: FromTokens<'a>, -{ - fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> - where - Self: Sized, - { - let ((), rest, _) = parse_token(input, &Token::Punct(Punct::LParen))?.into_tuple(); - let (value, rest, warnings) = T::from_tokens(rest)?.into_tuple(); - let ((), rest, _) = parse_token(rest, &Token::Punct(Punct::RParen))?.into_tuple(); - Ok(Parsed { - value: Self(value), - rest, - diagnostics: warnings, - }) - } -} - -fn parse_token<'a>(input: TokenSlice<'a>, token: &Token) -> ParseResult<'a, ()> { - if let Some(rest) = input.skip(token) { - Ok(Parsed::ok((), rest)) - } else { - Err(ParseError::Mismatch( - input.error(format!("expecting {token}")).into(), - )) - } -} - -fn parse_keyword<'a>(input: TokenSlice<'a>, keyword: &str) -> ParseResult<'a, ()> { - if let Some(rest) = input.skip_if(|token| token.matches_keyword(keyword)) { - Ok(Parsed::ok((), rest)) - } else { - Err(ParseError::Mismatch( - input.error(format!("expecting {keyword}")).into(), - )) - } -} - -#[derive(Debug)] -enum Vars<'a> { - Single(&'a Identifier), - Range(&'a Identifier, &'a Identifier), - All, -} - -impl<'a> FromTokens<'a> for Vars<'a> { - fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> - where - Self: Sized, - { - if let Ok(Parsed { rest, .. }) = parse_token(input, &Token::Punct(Punct::All)) { - Ok(Parsed::ok(Self::All, rest)) - } else { - let (from, rest, _) = parse_id(input)?.into_tuple(); - if let Ok(Parsed { rest, .. }) = parse_token(rest, &Token::Punct(Punct::To)) { - if let Ok(p) = parse_id(rest) { - return Ok(p.map(|to| Self::Range(from, to))); - } - } - Ok(Parsed::ok(Self::Single(from), rest)) - } - } -} - -fn parse_id<'a>(input: TokenSlice<'a>) -> ParseResult<'a, &'a Identifier> { - let mut iter = input.iter(); - if let Some(LexToken { - token: Token::Id(id), - .. - }) = iter.next() - { - Ok(Parsed::ok(id, iter.remainder())) - } else { - Err(ParseError::Mismatch( - input.error("Syntax error expecting identifier.").into(), - )) - } -} - -impl<'a> FromTokens<'a> for &'a Identifier { - fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> - where - Self: Sized, - { - parse_id(input) - } -} - -#[derive(Debug, FromTokens)] -struct Sort { - key: SortKey, - direction: Option, -} - -#[derive(Debug, FromTokens)] -enum SortKey { - Mean, - SMean, - Stddev, - Variance, - Range, - Min, - Max, - Sum, - Skewness, - Kurtosis, - Name, -} - -#[derive(Debug, FromTokens)] -enum Direction { - #[pspp(syntax = "(A)")] - Ascending, - #[pspp(syntax = "(D)")] - Descending, -} - -#[derive(Debug, FromTokens)] -enum Statistic { - Default, - Mean, - SeMean, - Stddev, - Variance, - Range, - Sum, - Min, - Max, - Skewness, - Kurtosis, - All, -} - -fn collect_subcommands<'a>(src: &'a TokenSlice) -> Vec> { - src.split(|token| token.token == Token::Punct(Punct::Slash)) - .filter(|slice| !slice.is_empty()) - .collect() -} - -fn commands() -> &'static [Command] { - fn new_commands() -> Vec { - vec![ - Command { - allowed_states: FlagSet::full(), - enhanced_only: false, - testing_only: false, - no_abbrev: false, - name: "DESCRIPTIVES", - run: Box::new(|context| { - let mut input = context.lexer; - while !input.is_empty() { - match >::from_tokens(input) { - Ok(Parsed { - value: subcommand, - rest, - diagnostics, - }) => { - println!("\n{subcommand:?}"); - //println!("rest: {rest:?}"); - println!("warnings: {diagnostics:?}"); - //println!("{:?}", DescriptivesSubcommand::from_tokens(subcommand.0)); - input = rest; - } - Err(error) => { - println!("{error:?}"); - break; - } - } - } - }), - }, - Command { - allowed_states: FlagSet::full(), - enhanced_only: false, - testing_only: false, - no_abbrev: false, - name: "ECHO", - run: Box::new(|_context| todo!()), - }, - ] - } - - static COMMANDS: OnceLock> = OnceLock::new(); - COMMANDS.get_or_init(|| new_commands()).as_slice() -} - -fn parse_command_word(lexer: &mut TokenSlice, s: &mut String, n: usize) -> bool { - let separator = match s.chars().next_back() { - Some(c) if c != '-' => " ", - _ => "", - }; - - match lexer.get_token(n) { - Some(Token::Punct(Punct::Dash)) => { - s.push('-'); - true - } - Some(Token::Id(id)) => { - write!(s, "{separator}{id}").unwrap(); - true - } - Some(Token::Number(number)) if number.is_sign_positive() => { - if let Some(integer) = number.to_exact_usize() { - write!(s, "{separator}{integer}").unwrap(); - true - } else { - false - } - } - _ => false, - } -} - -fn find_best_match(s: &str) -> (Option<&'static Command>, isize) { - let mut cm = CommandMatcher::new(s); - for command in commands() { - cm.add(command.name, command); - } - cm.get_match() -} - -fn parse_command_name( - lexer: &mut TokenSlice, - error: &Box, -) -> Result<(&'static Command, usize), ()> { - let mut s = String::new(); - let mut word = 0; - let mut missing_words = 0; - let mut command = None; - while parse_command_word(lexer, &mut s, word) { - (command, missing_words) = find_best_match(&s); - if missing_words <= 0 { - break; - } - word += 1; - } - if command.is_none() && missing_words > 0 { - s.push_str(" ."); - (command, missing_words) = find_best_match(&s); - s.truncate(s.len() - 2); - } - - match command { - Some(command) => Ok((command, ((word as isize + 1) + missing_words) as usize)), - None => { - if word == 0 { - error( - lexer - .subslice(0..1) - .error("Syntax error expecting command name"), - ) - } else { - error(lexer.subslice(0..word + 1).error("Unknown command `{s}`.")) - }; - Err(()) - } - } -} - -pub enum Success { - Success, - Eof, - Finish, -} - -pub fn end_of_command(context: &Context, range: RangeFrom) -> Result { - match context.lexer.get_token(range.start) { - None | Some(Token::End) => Ok(Success::Success), - _ => { - context.error( - context - .lexer - .subslice(range.start..context.lexer.len()) - .error("Syntax error expecting end of command."), - ); - Err(()) - } - } -} - -fn parse_in_state(mut lexer: TokenSlice, error: &Box, _state: State) { - match lexer.get_token(0) { - None | Some(Token::End) => (), - _ => match parse_command_name(&mut lexer, error) { - Ok((command, n_tokens)) => { - let mut context = Context { - error, - lexer: lexer.subslice(n_tokens..lexer.len()), - command_name: Some(command.name), - }; - (command.run)(&mut context); - } - Err(error) => println!("{error:?}"), - }, - } -} - -pub fn parse_command(lexer: TokenSlice, error: &Box) { - parse_in_state(lexer, error, State::Initial) -} - -pub struct Context<'a> { - error: &'a Box, - lexer: TokenSlice<'a>, - command_name: Option<&'static str>, -} - -impl<'a> Context<'a> { - pub fn error(&self, diagnostic: Diagnostic) { - (self.error)(diagnostic); - } -} - -#[cfg(test)] -mod tests { - mod descriptives { - use std::sync::Arc; - - use encoding_rs::UTF_8; - - use crate::{ - engine::Engine, - lex::lexer::{Source, SourceFile}, - }; - - fn test(syntax: &str) { - let mut engine = Engine::new(); - engine.run(Source::new_default(&Arc::new( - SourceFile::for_file_contents( - syntax.to_string(), - Some("test.sps".to_string()), - UTF_8, - ), - ))); - } - - #[test] - fn basics() { - test("descript a to b (c) all/stat=all/format=serial."); - } - - #[test] - fn include_missing() { - test("descript all/stat=all/format=serial/missing=include."); - } - - #[test] - fn include_missing_listwise() { - test("descript all/stat=all/format=serial/missing=listwise."); - test("descript all/stat=all/format=serial/missing=listwise include."); - } - - #[test] - fn mean_only() { - test("descript all/stat=mean."); - } - - #[test] - fn z_scores() { - test("DESCRIPTIVES /VAR=a b /SAVE."); - } - - #[test] - fn syntax_errors() { - test("\ -DESCRIPTIVES MISSING=**. -DESCRIPTIVES FORMAT=**. -DESCRIPTIVES STATISTICS=**. -DESCRIPTIVES SORT=**. -DESCRIPTIVES SORT=NAME (**). -DESCRIPTIVES SORT=NAME (A **). -DESCRIPTIVES **. -DESCRIPTIVES x/ **. -DESCRIPTIVES MISSING=INCLUDE. -"); - } - } -} diff --git a/rust/pspp/src/command/mod.rs b/rust/pspp/src/command/mod.rs new file mode 100644 index 0000000000..143d60f6aa --- /dev/null +++ b/rust/pspp/src/command/mod.rs @@ -0,0 +1,634 @@ +#![allow(dead_code)] +use std::{fmt::Write, ops::RangeFrom, sync::OnceLock}; + +use flagset::{flags, FlagSet}; +use pspp_derive::FromTokens; + +use crate::{ + identifier::Identifier, + integer::ToInteger, + lex::{ + command_name::CommandMatcher, + lexer::{LexToken, TokenSlice}, + token::{Punct, Token}, + }, + message::{Diagnostic, Diagnostics}, +}; + +flags! { + enum State: u8 { + /// No active dataset yet defined. + Initial, + + /// Active dataset has been defined. + Data, + + /// Inside `INPUT PROGRAM`. + InputProgram, + + /// Inside `FILE TYPE`. + FileType, + + /// State nested inside `LOOP` or `DO IF`, inside [State::Data]. + NestedData, + + /// State nested inside `LOOP` or `DO IF`, inside [State::InputProgram]. + NestedInputProgram, + } +} + +struct Command { + allowed_states: FlagSet, + enhanced_only: bool, + testing_only: bool, + no_abbrev: bool, + name: &'static str, + run: Box, //-> Box + Send + Sync>, +} + +#[derive(Debug)] +enum ParseError { + Error(Diagnostics), + Mismatch(Diagnostics), +} + +#[derive(Debug)] +struct Parsed<'a, T> { + value: T, + rest: TokenSlice<'a>, + diagnostics: Diagnostics, +} + +impl<'a, T> Parsed<'a, T> { + pub fn new(value: T, rest: TokenSlice<'a>, warnings: Diagnostics) -> Self { + Self { + value, + rest: rest, + diagnostics: warnings, + } + } + pub fn ok(value: T, rest: TokenSlice<'a>) -> Self { + Self { + value, + rest: rest, + diagnostics: Diagnostics::default(), + } + } + pub fn into_tuple(self) -> (T, TokenSlice<'a>, Diagnostics) { + (self.value, self.rest, self.diagnostics) + } + pub fn map(self, f: F) -> Parsed<'a, R> + where + F: FnOnce(T) -> R, + { + Parsed { + value: f(self.value), + rest: self.rest, + diagnostics: self.diagnostics, + } + } + pub fn warn(self, mut warnings: Diagnostics) -> Self { + Self { + value: self.value, + rest: self.rest, + diagnostics: { + let mut vec = self.diagnostics.0; + vec.append(&mut warnings.0); + Diagnostics(vec) + }, + } + } +} + +type ParseResult<'a, T> = Result, ParseError>; + +trait MismatchToError { + fn mismatch_to_error(self) -> Self; +} + +impl<'a, T> MismatchToError for ParseResult<'a, T> { + fn mismatch_to_error(self) -> Self { + match self { + Err(ParseError::Mismatch(diagnostic)) => Err(ParseError::Error(diagnostic)), + rest => rest, + } + } +} + +trait FromTokens<'a> { + fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> + where + Self: Sized; +} + +impl<'a, T> FromTokens<'a> for Option +where + T: FromTokens<'a>, +{ + fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> + where + Self: Sized, + { + match T::from_tokens(input) { + Ok(p) => Ok(p.map(Some)), + Err(ParseError::Mismatch(_)) => Ok(Parsed::ok(None, input)), + Err(ParseError::Error(error)) => Err(ParseError::Error(error)), + } + } +} + +impl<'a, T> FromTokens<'a> for Vec +where + T: FromTokens<'a>, +{ + fn from_tokens(mut input: TokenSlice<'a>) -> ParseResult<'a, Self> + where + Self: Sized, + { + let mut values_vec = Vec::new(); + let mut warnings_vec = Vec::new(); + loop { + match T::from_tokens(input) { + Ok(Parsed { + value, + rest, + diagnostics: mut warnings, + }) => { + values_vec.push(value); + warnings_vec.append(&mut warnings.0); + input = rest; + } + Err(ParseError::Mismatch(_)) => break, + Err(ParseError::Error(e)) => return Err(ParseError::Error(e)), + } + } + Ok(Parsed { + value: values_vec, + rest: input, + diagnostics: Diagnostics(warnings_vec), + }) + } +} + +impl<'a> FromTokens<'a> for TokenSlice<'a> { + fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> + where + Self: Sized, + { + Ok(Parsed::ok(input, input.end())) + } +} + +#[derive(Debug, FromTokens)] +#[pspp(add_lifetime)] +struct Descriptives<'a> { + subcommands: Vec>>, +} + +#[derive(Debug)] +struct Subcommand(pub T); + +impl<'a, T> FromTokens<'a> for Subcommand +where + T: FromTokens<'a>, +{ + fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> + where + Self: Sized, + { + let start = input.skip_until(|token| token != &Token::Punct(Punct::Slash)); + if start.is_empty() { + return Err(ParseError::Error( + input.error("Syntax error at end of input.").into(), + )); + } + let end = start.skip_to(&Token::Punct(Punct::Slash)); + let subcommand = start.subslice(0..start.len() - end.len()); + let (value, rest, mut warnings) = T::from_tokens(subcommand)?.into_tuple(); + if !rest.is_empty() { + warnings + .0 + .push(rest.warning("Syntax error expecting end of subcommand.")); + } + Ok(Parsed::new(Self(value), end, warnings)) + } +} + +#[derive(Debug, FromTokens)] +#[pspp(add_lifetime, required_equals)] +enum DescriptivesSubcommand<'a> { + #[pspp(default)] + Variables(Vec>), + Missing(Vec), + Save, + Statistics(Vec), + Sort(Sort), + Format(Vec), +} + +#[derive(Debug, FromTokens)] +enum Missing { + Variable, + Listwise, + Include, +} + +#[derive(Debug, FromTokens)] +enum Format { + Labels, + NoLabels, + Index, + NoIndex, + Line, + Serial, +} + +#[derive(Debug, FromTokens)] +#[pspp(add_lifetime)] +struct DescriptivesVars<'a> { + vars: Vars<'a>, + z_name: Option>, +} + +#[derive(Debug)] +struct InParens(pub T); + +impl<'a, T> FromTokens<'a> for InParens +where + T: FromTokens<'a>, +{ + fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> + where + Self: Sized, + { + let ((), rest, _) = parse_token(input, &Token::Punct(Punct::LParen))?.into_tuple(); + let (value, rest, warnings) = T::from_tokens(rest)?.into_tuple(); + let ((), rest, _) = parse_token(rest, &Token::Punct(Punct::RParen))?.into_tuple(); + Ok(Parsed { + value: Self(value), + rest, + diagnostics: warnings, + }) + } +} + +fn parse_token<'a>(input: TokenSlice<'a>, token: &Token) -> ParseResult<'a, ()> { + if let Some(rest) = input.skip(token) { + Ok(Parsed::ok((), rest)) + } else { + Err(ParseError::Mismatch( + input.error(format!("expecting {token}")).into(), + )) + } +} + +fn parse_keyword<'a>(input: TokenSlice<'a>, keyword: &str) -> ParseResult<'a, ()> { + if let Some(rest) = input.skip_if(|token| token.matches_keyword(keyword)) { + Ok(Parsed::ok((), rest)) + } else { + Err(ParseError::Mismatch( + input.error(format!("expecting {keyword}")).into(), + )) + } +} + +#[derive(Debug)] +enum Vars<'a> { + Single(&'a Identifier), + Range(&'a Identifier, &'a Identifier), + All, +} + +impl<'a> FromTokens<'a> for Vars<'a> { + fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> + where + Self: Sized, + { + if let Ok(Parsed { rest, .. }) = parse_token(input, &Token::Punct(Punct::All)) { + Ok(Parsed::ok(Self::All, rest)) + } else { + let (from, rest, _) = parse_id(input)?.into_tuple(); + if let Ok(Parsed { rest, .. }) = parse_token(rest, &Token::Punct(Punct::To)) { + if let Ok(p) = parse_id(rest) { + return Ok(p.map(|to| Self::Range(from, to))); + } + } + Ok(Parsed::ok(Self::Single(from), rest)) + } + } +} + +fn parse_id<'a>(input: TokenSlice<'a>) -> ParseResult<'a, &'a Identifier> { + let mut iter = input.iter(); + if let Some(LexToken { + token: Token::Id(id), + .. + }) = iter.next() + { + Ok(Parsed::ok(id, iter.remainder())) + } else { + Err(ParseError::Mismatch( + input.error("Syntax error expecting identifier.").into(), + )) + } +} + +impl<'a> FromTokens<'a> for &'a Identifier { + fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> + where + Self: Sized, + { + parse_id(input) + } +} + +#[derive(Debug, FromTokens)] +struct Sort { + key: SortKey, + direction: Option, +} + +#[derive(Debug, FromTokens)] +enum SortKey { + Mean, + SMean, + Stddev, + Variance, + Range, + Min, + Max, + Sum, + Skewness, + Kurtosis, + Name, +} + +#[derive(Debug, FromTokens)] +enum Direction { + #[pspp(syntax = "(A)")] + Ascending, + #[pspp(syntax = "(D)")] + Descending, +} + +#[derive(Debug, FromTokens)] +enum Statistic { + Default, + Mean, + SeMean, + Stddev, + Variance, + Range, + Sum, + Min, + Max, + Skewness, + Kurtosis, + All, +} + +fn collect_subcommands<'a>(src: &'a TokenSlice) -> Vec> { + src.split(|token| token.token == Token::Punct(Punct::Slash)) + .filter(|slice| !slice.is_empty()) + .collect() +} + +fn commands() -> &'static [Command] { + fn new_commands() -> Vec { + vec![ + Command { + allowed_states: FlagSet::full(), + enhanced_only: false, + testing_only: false, + no_abbrev: false, + name: "DESCRIPTIVES", + run: Box::new(|context| { + let mut input = context.lexer; + while !input.is_empty() { + match >::from_tokens(input) { + Ok(Parsed { + value: subcommand, + rest, + diagnostics, + }) => { + println!("\n{subcommand:?}"); + //println!("rest: {rest:?}"); + println!("warnings: {diagnostics:?}"); + //println!("{:?}", DescriptivesSubcommand::from_tokens(subcommand.0)); + input = rest; + } + Err(error) => { + println!("{error:?}"); + break; + } + } + } + }), + }, + Command { + allowed_states: FlagSet::full(), + enhanced_only: false, + testing_only: false, + no_abbrev: false, + name: "ECHO", + run: Box::new(|_context| todo!()), + }, + ] + } + + static COMMANDS: OnceLock> = OnceLock::new(); + COMMANDS.get_or_init(|| new_commands()).as_slice() +} + +fn parse_command_word(lexer: &mut TokenSlice, s: &mut String, n: usize) -> bool { + let separator = match s.chars().next_back() { + Some(c) if c != '-' => " ", + _ => "", + }; + + match lexer.get_token(n) { + Some(Token::Punct(Punct::Dash)) => { + s.push('-'); + true + } + Some(Token::Id(id)) => { + write!(s, "{separator}{id}").unwrap(); + true + } + Some(Token::Number(number)) if number.is_sign_positive() => { + if let Some(integer) = number.to_exact_usize() { + write!(s, "{separator}{integer}").unwrap(); + true + } else { + false + } + } + _ => false, + } +} + +fn find_best_match(s: &str) -> (Option<&'static Command>, isize) { + let mut cm = CommandMatcher::new(s); + for command in commands() { + cm.add(command.name, command); + } + cm.get_match() +} + +fn parse_command_name( + lexer: &mut TokenSlice, + error: &Box, +) -> Result<(&'static Command, usize), ()> { + let mut s = String::new(); + let mut word = 0; + let mut missing_words = 0; + let mut command = None; + while parse_command_word(lexer, &mut s, word) { + (command, missing_words) = find_best_match(&s); + if missing_words <= 0 { + break; + } + word += 1; + } + if command.is_none() && missing_words > 0 { + s.push_str(" ."); + (command, missing_words) = find_best_match(&s); + s.truncate(s.len() - 2); + } + + match command { + Some(command) => Ok((command, ((word as isize + 1) + missing_words) as usize)), + None => { + if word == 0 { + error( + lexer + .subslice(0..1) + .error("Syntax error expecting command name"), + ) + } else { + error(lexer.subslice(0..word + 1).error("Unknown command `{s}`.")) + }; + Err(()) + } + } +} + +pub enum Success { + Success, + Eof, + Finish, +} + +pub fn end_of_command(context: &Context, range: RangeFrom) -> Result { + match context.lexer.get_token(range.start) { + None | Some(Token::End) => Ok(Success::Success), + _ => { + context.error( + context + .lexer + .subslice(range.start..context.lexer.len()) + .error("Syntax error expecting end of command."), + ); + Err(()) + } + } +} + +fn parse_in_state(mut lexer: TokenSlice, error: &Box, _state: State) { + match lexer.get_token(0) { + None | Some(Token::End) => (), + _ => match parse_command_name(&mut lexer, error) { + Ok((command, n_tokens)) => { + let mut context = Context { + error, + lexer: lexer.subslice(n_tokens..lexer.len()), + command_name: Some(command.name), + }; + (command.run)(&mut context); + } + Err(error) => println!("{error:?}"), + }, + } +} + +pub fn parse_command(lexer: TokenSlice, error: &Box) { + parse_in_state(lexer, error, State::Initial) +} + +pub struct Context<'a> { + error: &'a Box, + lexer: TokenSlice<'a>, + command_name: Option<&'static str>, +} + +impl<'a> Context<'a> { + pub fn error(&self, diagnostic: Diagnostic) { + (self.error)(diagnostic); + } +} + +#[cfg(test)] +mod tests { + mod descriptives { + use std::sync::Arc; + + use encoding_rs::UTF_8; + + use crate::{ + engine::Engine, + lex::lexer::{Source, SourceFile}, + }; + + fn test(syntax: &str) { + let mut engine = Engine::new(); + engine.run(Source::new_default(&Arc::new( + SourceFile::for_file_contents( + syntax.to_string(), + Some("test.sps".to_string()), + UTF_8, + ), + ))); + } + + #[test] + fn basics() { + test("descript a to b (c) all/stat=all/format=serial."); + } + + #[test] + fn include_missing() { + test("descript all/stat=all/format=serial/missing=include."); + } + + #[test] + fn include_missing_listwise() { + test("descript all/stat=all/format=serial/missing=listwise."); + test("descript all/stat=all/format=serial/missing=listwise include."); + } + + #[test] + fn mean_only() { + test("descript all/stat=mean."); + } + + #[test] + fn z_scores() { + test("DESCRIPTIVES /VAR=a b /SAVE."); + } + + #[test] + fn syntax_errors() { + test("\ +DESCRIPTIVES MISSING=**. +DESCRIPTIVES FORMAT=**. +DESCRIPTIVES STATISTICS=**. +DESCRIPTIVES SORT=**. +DESCRIPTIVES SORT=NAME (**). +DESCRIPTIVES SORT=NAME (A **). +DESCRIPTIVES **. +DESCRIPTIVES x/ **. +DESCRIPTIVES MISSING=INCLUDE. +"); + } + } +}