From: Ben Pfaff Date: Wed, 4 Dec 2024 16:51:59 +0000 (-0800) Subject: work X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=28907f42e297eb45011898e10348f82605e92a37;p=pspp work --- diff --git a/rust/pspp-derive/src/lib.rs b/rust/pspp-derive/src/lib.rs index 5ea20581a9..59c9d1e7e0 100644 --- a/rust/pspp-derive/src/lib.rs +++ b/rust/pspp-derive/src/lib.rs @@ -83,7 +83,7 @@ fn derive_enum(ast: &DeriveInput, e: &DataEnum) -> Result { } } }; - println!("{output}"); + //println!("{output}"); Ok(output) } @@ -137,7 +137,7 @@ fn derive_struct(ast: &DeriveInput, s: &DataStruct) -> Result Command { + Command { + allowed_states: FlagSet::full(), + enhanced_only: false, + testing_only: false, + no_abbrev: false, + name: "CROSSTABS", + run: Box::new(|context| { + let input = context.lexer; + match ::from_tokens(input) { + Ok(Parsed { + value, + rest: _, + diagnostics, + }) => { + println!("\n{value:#?}"); + //println!("rest: {rest:?}"); + println!("warnings: {diagnostics:?}"); + //println!("{:?}", DescriptivesSubcommand::from_tokens(subcommand.0)); + } + Err(error) => { + println!("{error:?}"); + } + } + }), + } +} + +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(add_lifetime)] +struct Crosstabs<'a>(Subcommands>); + +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(add_lifetime, required_equals)] +enum CrosstabsSubcommand<'a> { + #[pspp(default)] + Tables(Punctuated, By>), + Missing(Missing), + Write(Write), + HideSmallCounts(HideSmallCounts), + ShowDim(Integer), + Statistics(Punctuated), + Cells(Punctuated), + Variables(Punctuated>), + Format(Punctuated), + Count(Punctuated), + Method(Method), +} + +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(add_lifetime)] +struct BoundedVars<'a> { + vars: VarRange<'a>, + bounds: InParens, +} + +#[derive(Debug, pspp_derive::FromTokens)] +struct Bounds { + min: Integer, + comma: Comma, + max: Integer, +} + +#[derive(Debug, pspp_derive::FromTokens)] +enum Method { + Mc(Punctuated), + Exact(Punctuated), +} + +#[derive(Debug, pspp_derive::FromTokens)] +enum Mc { + CIn(InParens), + Samples(InParens), +} + +#[derive(Debug, pspp_derive::FromTokens)] +enum Exact { + Timer(InParens), +} + +#[derive(Debug, pspp_derive::FromTokens)] +enum Cell { + Prop, + BProp, + Count, + Row, + Column, + Total, + Expected, + ResId, + SResId, + ASResid, + All, + None, +} + +#[derive(Debug, pspp_derive::FromTokens)] +enum Statistic { + ChiSq, + Phi, + CC, + Lambda, + UC, + Risk, + BTau, + CTau, + Kappa, + Gamma, + D, + McNemar, + Eta, + Corr, + Cmh(InParens), + All, + None, +} + +#[derive(Debug, pspp_derive::FromTokens)] +enum Count { + AsIs, + Case, + Cell, + Round, + Truncate, +} + +#[derive(Debug, pspp_derive::FromTokens)] +enum Format { + AValue, + DValue, + Tables, + NoTables, +} + +#[derive(Debug, pspp_derive::FromTokens)] +enum Missing { + Table, + Include, + Report, +} + +#[derive(Debug, pspp_derive::FromTokens)] +enum Write { + None, + Cells, + All, +} + +#[derive(Debug, pspp_derive::FromTokens)] +struct HideSmallCounts { + // XXX `COUNT =` + count: Integer, +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use encoding_rs::UTF_8; + + use crate::{ + engine::Engine, + lex::lexer::{Source, SourceFile}, + }; + + fn test(syntax: &str) { + let mut engine = Engine::new(); + engine.run(Source::new_default(&Arc::new( + SourceFile::for_file_contents(syntax.to_string(), Some("test.sps".to_string()), UTF_8), + ))); + } + + #[test] + fn basics() { + test( + "CROSSTABS r by c /STATISTICS=CHISQ +/CELLS=COUNT EXPECTED RESID SRESID ASRESID. +", + ); + } + + #[test] + fn integer_mode() { + test("CROSSTABS VARIABLES=X (1,7) Y (1,7) /TABLES=X BY Y."); + } +} diff --git a/rust/pspp/src/command/descriptives.rs b/rust/pspp/src/command/descriptives.rs index 0d33fccb8b..7e57f3de07 100644 --- a/rust/pspp/src/command/descriptives.rs +++ b/rust/pspp/src/command/descriptives.rs @@ -1,9 +1,9 @@ use flagset::FlagSet; -use super::{Command, Subcommand}; +use super::{Command, Punctuated, Subcommand}; use crate::command::{ parse_token, FromTokens, Identifier, InParens, MismatchToError, ParseError, ParseResult, - Parsed, Punct, Token, TokenSlice, Vars, + Parsed, Punct, Token, TokenSlice, VarRange, }; pub(super) fn descriptives_command() -> Command { @@ -48,7 +48,7 @@ struct Descriptives<'a> { #[pspp(add_lifetime, required_equals)] enum DescriptivesSubcommand<'a> { #[pspp(default)] - Variables(Vec>), + Variables(Punctuated>), Missing(Vec), Save, Statistics(Vec), @@ -76,7 +76,7 @@ enum Format { #[derive(Debug, pspp_derive::FromTokens)] #[pspp(add_lifetime)] struct DescriptivesVars<'a> { - vars: Vars<'a>, + vars: VarRange<'a>, z_name: Option>, } @@ -166,7 +166,7 @@ mod tests { #[test] fn z_scores() { - test("DESCRIPTIVES /VAR=a b /SAVE."); + test("DESCRIPTIVES /VAR=a b, c /SAVE."); } #[test] diff --git a/rust/pspp/src/command/mod.rs b/rust/pspp/src/command/mod.rs index 2490257f3d..adf915fa02 100644 --- a/rust/pspp/src/command/mod.rs +++ b/rust/pspp/src/command/mod.rs @@ -1,6 +1,11 @@ #![allow(dead_code)] -use std::{fmt::Write, ops::RangeFrom, sync::OnceLock}; +use std::{ + fmt::{Debug, Write}, + ops::RangeFrom, + sync::OnceLock, +}; +use crosstabs::crosstabs_command; use descriptives::descriptives_command; use flagset::{flags, FlagSet}; use pspp_derive::FromTokens; @@ -16,6 +21,7 @@ use crate::{ message::{Diagnostic, Diagnostics}, }; +pub mod crosstabs; pub mod descriptives; flags! { @@ -140,6 +146,163 @@ where } } +#[derive(Debug)] +pub struct Comma(Token); + +impl<'a> FromTokens<'a> for Comma { + fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> + where + Self: Sized, + { + _parse_token(input, &Token::Punct(Punct::Comma)).map(|p| p.map(|token| Comma(token))) + } +} + +#[derive(Debug)] +struct By(Token); + +impl<'a> FromTokens<'a> for By { + fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> + where + Self: Sized, + { + _parse_token(input, &Token::Punct(Punct::By)).map(|p| p.map(|token| By(token))) + } +} + +pub struct Punctuated> { + head: Vec<(T, P)>, + tail: Option, +} + +impl Debug for Punctuated +where + T: Debug, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "[")?; + for (index, item) in self + .head + .iter() + .map(|(t, _p)| t) + .chain(self.tail.iter()) + .enumerate() + { + if index > 0 { + write!(f, ", ")?; + } + write!(f, "{item:?}")?; + } + write!(f, "]") + } +} + +impl<'a, T, P> FromTokens<'a> for Punctuated +where + T: FromTokens<'a>, + P: FromTokens<'a>, +{ + fn from_tokens(mut input: TokenSlice<'a>) -> ParseResult<'a, Self> + where + Self: Sized, + { + let mut head = Vec::new(); + let mut warnings_vec = Vec::new(); + let tail = loop { + let t = match T::from_tokens(input) { + Ok(Parsed { + value, + rest, + diagnostics: mut warnings, + }) => { + warnings_vec.append(&mut warnings.0); + input = rest; + value + } + Err(ParseError::Mismatch(_)) => break None, + Err(ParseError::Error(e)) => return Err(ParseError::Error(e)), + }; + let p = match P::from_tokens(input) { + Ok(Parsed { + value, + rest, + diagnostics: mut warnings, + }) => { + warnings_vec.append(&mut warnings.0); + input = rest; + value + } + Err(ParseError::Mismatch(_)) => break Some(t), + Err(ParseError::Error(e)) => return Err(ParseError::Error(e)), + }; + head.push((t, p)); + }; + Ok(Parsed { + value: Punctuated { head, tail }, + rest: input, + diagnostics: Diagnostics(warnings_vec), + }) + } +} + +pub struct Subcommands(Vec); + +impl Debug for Subcommands +where + T: Debug, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Subcommands[")?; + for (index, item) in self.0.iter().enumerate() { + if index > 0 { + write!(f, ",\n")?; + } + write!(f, "{item:?}")?; + } + write!(f, "]") + } +} + +impl<'a, T> FromTokens<'a> for Subcommands +where + T: FromTokens<'a>, +{ + fn from_tokens(mut input: TokenSlice<'a>) -> ParseResult<'a, Self> + where + Self: Sized, + { + let mut items = Vec::new(); + let mut diagnostics = Vec::new(); + loop { + let start = input.skip_until(|token| token != &Token::Punct(Punct::Slash)); + if start.is_empty() { + break; + } + let end = start.skip_to(&Token::Punct(Punct::Slash)); + let subcommand = start.subslice(0..start.len() - end.len()); + match T::from_tokens(subcommand) { + Ok(p) => { + let (value, rest, mut d) = p.into_tuple(); + items.push(value); + diagnostics.append(&mut d.0); + if !rest.is_empty() { + diagnostics.push(rest.warning("Syntax error expecting end of subcommand.")); + } + } + Err(ParseError::Error(mut d) | ParseError::Mismatch(mut d)) => { + diagnostics.append(&mut d.0); + } + } + input = end; + } + Ok(Parsed { + value: Subcommands(items), + rest: input, + diagnostics: Diagnostics(diagnostics), + }) + } +} + impl<'a, T> FromTokens<'a> for Vec where T: FromTokens<'a>, @@ -150,7 +313,7 @@ where { let mut values_vec = Vec::new(); let mut warnings_vec = Vec::new(); - loop { + while !input.is_empty() { match T::from_tokens(input) { Ok(Parsed { value, @@ -233,6 +396,28 @@ where } } +fn parse_token_if<'a, F, R>(input: TokenSlice<'a>, parse: F) -> ParseResult<'a, R> +where + F: Fn(&Token) -> Option, +{ + if let Some(token) = input.get_token(0) { + if let Some(result) = parse(token) { + return Ok(Parsed::ok(result, input.subslice(1..input.len()))); + } + } + Err(ParseError::Mismatch(Diagnostics::default())) +} + +fn _parse_token<'a>(input: TokenSlice<'a>, token: &Token) -> ParseResult<'a, Token> { + if let Some(rest) = input.skip(token) { + Ok(Parsed::ok(input.first().token.clone(), rest)) + } else { + Err(ParseError::Mismatch( + input.error(format!("expecting {token}")).into(), + )) + } +} + fn parse_token<'a>(input: TokenSlice<'a>, token: &Token) -> ParseResult<'a, ()> { if let Some(rest) = input.skip(token) { Ok(Parsed::ok((), rest)) @@ -253,14 +438,52 @@ fn parse_keyword<'a>(input: TokenSlice<'a>, keyword: &str) -> ParseResult<'a, () } } +pub type VarList<'a> = Punctuated>; + +#[derive(Debug)] +pub struct Number(f64); + +impl<'a> FromTokens<'a> for Number { + fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> + where + Self: Sized, + { + parse_token_if(input, |token| token.as_number().map(Number)) + .map_err(|_| ParseError::Mismatch(input.error(String::from("expecting number")).into())) + } +} + #[derive(Debug)] -enum Vars<'a> { +pub struct Integer(i64); + +impl<'a> FromTokens<'a> for Integer { + fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> + where + Self: Sized, + { + parse_token_if(input, |token| token.as_integer().map(Integer)).map_err(|_| { + ParseError::Mismatch(input.error(String::from("expecting integer")).into()) + }) + } +} + +pub enum VarRange<'a> { Single(&'a Identifier), Range(&'a Identifier, &'a Identifier), All, } -impl<'a> FromTokens<'a> for Vars<'a> { +impl<'a> Debug for VarRange<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Single(var) => write!(f, "{var:?}"), + Self::Range(from, to) => write!(f, "{from:?} TO {to:?}"), + Self::All => write!(f, "ALL"), + } + } +} + +impl<'a> FromTokens<'a> for VarRange<'a> { fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> where Self: Sized, @@ -313,6 +536,7 @@ fn commands() -> &'static [Command] { fn new_commands() -> Vec { vec![ descriptives_command(), + crosstabs_command(), Command { allowed_states: FlagSet::full(), enhanced_only: false, diff --git a/rust/pspp/src/lex/lexer.rs b/rust/pspp/src/lex/lexer.rs index 6ffddc35ad..7b125586c7 100644 --- a/rust/pspp/src/lex/lexer.rs +++ b/rust/pspp/src/lex/lexer.rs @@ -358,7 +358,7 @@ impl<'a> TokenSlice<'a> { } } - fn first(&self) -> &LexToken { + pub fn first(&self) -> &LexToken { self.tokens.first().unwrap() } fn last(&self) -> &LexToken { diff --git a/rust/pspp/src/lex/token.rs b/rust/pspp/src/lex/token.rs index 2761f717ee..1f94f3be7b 100644 --- a/rust/pspp/src/lex/token.rs +++ b/rust/pspp/src/lex/token.rs @@ -34,6 +34,27 @@ impl Token { pub fn matches_keyword(&self, keyword: &str) -> bool { self.id().map_or(false, |id| id.matches_keyword(keyword)) } + + pub fn as_number(&self) -> Option { + if let Self::Number(number) = self { + Some(*number) + } else { + None + } + } + + pub fn as_integer(&self) -> Option { + match self { + Self::Number(number) + if *number >= i64::MIN as f64 + && *number <= i64::MAX as f64 + && *number == number.floor() => + { + Some(*number as i64) + } + _ => None, + } + } } fn is_printable(c: char) -> bool {