work
authorBen Pfaff <blp@cs.stanford.edu>
Wed, 4 Dec 2024 16:51:59 +0000 (08:51 -0800)
committerBen Pfaff <blp@cs.stanford.edu>
Wed, 4 Dec 2024 16:51:59 +0000 (08:51 -0800)
rust/pspp-derive/src/lib.rs
rust/pspp/src/command/crosstabs.rs [new file with mode: 0644]
rust/pspp/src/command/descriptives.rs
rust/pspp/src/command/mod.rs
rust/pspp/src/lex/lexer.rs
rust/pspp/src/lex/token.rs

index 5ea20581a99acd7808c5946c45af6b6c3af225fc..59c9d1e7e098bb823c196069cee3058d23d33be0 100644 (file)
@@ -83,7 +83,7 @@ fn derive_enum(ast: &DeriveInput, e: &DataEnum) -> Result<TokenStream2, Error> {
             }
         }
     };
-    println!("{output}");
+    //println!("{output}");
     Ok(output)
 }
 
@@ -137,7 +137,7 @@ fn derive_struct(ast: &DeriveInput, s: &DataStruct) -> Result<TokenStream2, Erro
             }
         }
     };
-    println!("{output}");
+    //println!("{output}");
     Ok(output)
 }
 
diff --git a/rust/pspp/src/command/crosstabs.rs b/rust/pspp/src/command/crosstabs.rs
new file mode 100644 (file)
index 0000000..3634c17
--- /dev/null
@@ -0,0 +1,194 @@
+use flagset::FlagSet;
+
+use super::{By, Comma, Command, Integer, Number, Punctuated, Subcommands, VarList};
+use crate::command::{
+    parse_token, FromTokens, InParens, MismatchToError, ParseError, ParseResult, Parsed, Punct,
+    Token, TokenSlice, VarRange,
+};
+
+pub(super) fn crosstabs_command() -> Command {
+    Command {
+        allowed_states: FlagSet::full(),
+        enhanced_only: false,
+        testing_only: false,
+        no_abbrev: false,
+        name: "CROSSTABS",
+        run: Box::new(|context| {
+            let input = context.lexer;
+            match <Crosstabs>::from_tokens(input) {
+                Ok(Parsed {
+                    value,
+                    rest: _,
+                    diagnostics,
+                }) => {
+                    println!("\n{value:#?}");
+                    //println!("rest: {rest:?}");
+                    println!("warnings: {diagnostics:?}");
+                    //println!("{:?}", DescriptivesSubcommand::from_tokens(subcommand.0));
+                }
+                Err(error) => {
+                    println!("{error:?}");
+                }
+            }
+        }),
+    }
+}
+
+#[derive(Debug, pspp_derive::FromTokens)]
+#[pspp(add_lifetime)]
+struct Crosstabs<'a>(Subcommands<CrosstabsSubcommand<'a>>);
+
+#[derive(Debug, pspp_derive::FromTokens)]
+#[pspp(add_lifetime, required_equals)]
+enum CrosstabsSubcommand<'a> {
+    #[pspp(default)]
+    Tables(Punctuated<VarList<'a>, By>),
+    Missing(Missing),
+    Write(Write),
+    HideSmallCounts(HideSmallCounts),
+    ShowDim(Integer),
+    Statistics(Punctuated<Statistic>),
+    Cells(Punctuated<Cell>),
+    Variables(Punctuated<BoundedVars<'a>>),
+    Format(Punctuated<Format>),
+    Count(Punctuated<Count>),
+    Method(Method),
+}
+
+#[derive(Debug, pspp_derive::FromTokens)]
+#[pspp(add_lifetime)]
+struct BoundedVars<'a> {
+    vars: VarRange<'a>,
+    bounds: InParens<Bounds>,
+}
+
+#[derive(Debug, pspp_derive::FromTokens)]
+struct Bounds {
+    min: Integer,
+    comma: Comma,
+    max: Integer,
+}
+
+#[derive(Debug, pspp_derive::FromTokens)]
+enum Method {
+    Mc(Punctuated<Mc>),
+    Exact(Punctuated<Exact>),
+}
+
+#[derive(Debug, pspp_derive::FromTokens)]
+enum Mc {
+    CIn(InParens<Number>),
+    Samples(InParens<Integer>),
+}
+
+#[derive(Debug, pspp_derive::FromTokens)]
+enum Exact {
+    Timer(InParens<Number>),
+}
+
+#[derive(Debug, pspp_derive::FromTokens)]
+enum Cell {
+    Prop,
+    BProp,
+    Count,
+    Row,
+    Column,
+    Total,
+    Expected,
+    ResId,
+    SResId,
+    ASResid,
+    All,
+    None,
+}
+
+#[derive(Debug, pspp_derive::FromTokens)]
+enum Statistic {
+    ChiSq,
+    Phi,
+    CC,
+    Lambda,
+    UC,
+    Risk,
+    BTau,
+    CTau,
+    Kappa,
+    Gamma,
+    D,
+    McNemar,
+    Eta,
+    Corr,
+    Cmh(InParens<Integer>),
+    All,
+    None,
+}
+
+#[derive(Debug, pspp_derive::FromTokens)]
+enum Count {
+    AsIs,
+    Case,
+    Cell,
+    Round,
+    Truncate,
+}
+
+#[derive(Debug, pspp_derive::FromTokens)]
+enum Format {
+    AValue,
+    DValue,
+    Tables,
+    NoTables,
+}
+
+#[derive(Debug, pspp_derive::FromTokens)]
+enum Missing {
+    Table,
+    Include,
+    Report,
+}
+
+#[derive(Debug, pspp_derive::FromTokens)]
+enum Write {
+    None,
+    Cells,
+    All,
+}
+
+#[derive(Debug, pspp_derive::FromTokens)]
+struct HideSmallCounts {
+    // XXX `COUNT =`
+    count: Integer,
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use encoding_rs::UTF_8;
+
+    use crate::{
+        engine::Engine,
+        lex::lexer::{Source, SourceFile},
+    };
+
+    fn test(syntax: &str) {
+        let mut engine = Engine::new();
+        engine.run(Source::new_default(&Arc::new(
+            SourceFile::for_file_contents(syntax.to_string(), Some("test.sps".to_string()), UTF_8),
+        )));
+    }
+
+    #[test]
+    fn basics() {
+        test(
+            "CROSSTABS r by c /STATISTICS=CHISQ
+/CELLS=COUNT EXPECTED RESID SRESID ASRESID.
+",
+        );
+    }
+
+    #[test]
+    fn integer_mode() {
+        test("CROSSTABS VARIABLES=X (1,7) Y (1,7) /TABLES=X BY Y.");
+    }
+}
index 0d33fccb8beece1fb17163df5937dc49aba16d80..7e57f3de07a38dd73e7db5a1fa9476a9efb78aef 100644 (file)
@@ -1,9 +1,9 @@
 use flagset::FlagSet;
 
-use super::{Command, Subcommand};
+use super::{Command, Punctuated, Subcommand};
 use crate::command::{
     parse_token, FromTokens, Identifier, InParens, MismatchToError, ParseError, ParseResult,
-    Parsed, Punct, Token, TokenSlice, Vars,
+    Parsed, Punct, Token, TokenSlice, VarRange,
 };
 
 pub(super) fn descriptives_command() -> Command {
@@ -48,7 +48,7 @@ struct Descriptives<'a> {
 #[pspp(add_lifetime, required_equals)]
 enum DescriptivesSubcommand<'a> {
     #[pspp(default)]
-    Variables(Vec<DescriptivesVars<'a>>),
+    Variables(Punctuated<DescriptivesVars<'a>>),
     Missing(Vec<Missing>),
     Save,
     Statistics(Vec<Statistic>),
@@ -76,7 +76,7 @@ enum Format {
 #[derive(Debug, pspp_derive::FromTokens)]
 #[pspp(add_lifetime)]
 struct DescriptivesVars<'a> {
-    vars: Vars<'a>,
+    vars: VarRange<'a>,
     z_name: Option<InParens<&'a Identifier>>,
 }
 
@@ -166,7 +166,7 @@ mod tests {
 
     #[test]
     fn z_scores() {
-        test("DESCRIPTIVES /VAR=a b /SAVE.");
+        test("DESCRIPTIVES /VAR=a b, c /SAVE.");
     }
 
     #[test]
index 2490257f3db5501a3e14f0ccde57db54523def7f..adf915fa0233ed67f8607de57ed9665f43d1c346 100644 (file)
@@ -1,6 +1,11 @@
 #![allow(dead_code)]
-use std::{fmt::Write, ops::RangeFrom, sync::OnceLock};
+use std::{
+    fmt::{Debug, Write},
+    ops::RangeFrom,
+    sync::OnceLock,
+};
 
+use crosstabs::crosstabs_command;
 use descriptives::descriptives_command;
 use flagset::{flags, FlagSet};
 use pspp_derive::FromTokens;
@@ -16,6 +21,7 @@ use crate::{
     message::{Diagnostic, Diagnostics},
 };
 
+pub mod crosstabs;
 pub mod descriptives;
 
 flags! {
@@ -140,6 +146,163 @@ where
     }
 }
 
+#[derive(Debug)]
+pub struct Comma(Token);
+
+impl<'a> FromTokens<'a> for Comma {
+    fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self>
+    where
+        Self: Sized,
+    {
+        _parse_token(input, &Token::Punct(Punct::Comma)).map(|p| p.map(|token| Comma(token)))
+    }
+}
+
+#[derive(Debug)]
+struct By(Token);
+
+impl<'a> FromTokens<'a> for By {
+    fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self>
+    where
+        Self: Sized,
+    {
+        _parse_token(input, &Token::Punct(Punct::By)).map(|p| p.map(|token| By(token)))
+    }
+}
+
+pub struct Punctuated<T, P = Option<Comma>> {
+    head: Vec<(T, P)>,
+    tail: Option<T>,
+}
+
+impl<T, P> Debug for Punctuated<T, P>
+where
+    T: Debug,
+{
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "[")?;
+        for (index, item) in self
+            .head
+            .iter()
+            .map(|(t, _p)| t)
+            .chain(self.tail.iter())
+            .enumerate()
+        {
+            if index > 0 {
+                write!(f, ", ")?;
+            }
+            write!(f, "{item:?}")?;
+        }
+        write!(f, "]")
+    }
+}
+
+impl<'a, T, P> FromTokens<'a> for Punctuated<T, P>
+where
+    T: FromTokens<'a>,
+    P: FromTokens<'a>,
+{
+    fn from_tokens(mut input: TokenSlice<'a>) -> ParseResult<'a, Self>
+    where
+        Self: Sized,
+    {
+        let mut head = Vec::new();
+        let mut warnings_vec = Vec::new();
+        let tail = loop {
+            let t = match T::from_tokens(input) {
+                Ok(Parsed {
+                    value,
+                    rest,
+                    diagnostics: mut warnings,
+                }) => {
+                    warnings_vec.append(&mut warnings.0);
+                    input = rest;
+                    value
+                }
+                Err(ParseError::Mismatch(_)) => break None,
+                Err(ParseError::Error(e)) => return Err(ParseError::Error(e)),
+            };
+            let p = match P::from_tokens(input) {
+                Ok(Parsed {
+                    value,
+                    rest,
+                    diagnostics: mut warnings,
+                }) => {
+                    warnings_vec.append(&mut warnings.0);
+                    input = rest;
+                    value
+                }
+                Err(ParseError::Mismatch(_)) => break Some(t),
+                Err(ParseError::Error(e)) => return Err(ParseError::Error(e)),
+            };
+            head.push((t, p));
+        };
+        Ok(Parsed {
+            value: Punctuated { head, tail },
+            rest: input,
+            diagnostics: Diagnostics(warnings_vec),
+        })
+    }
+}
+
+pub struct Subcommands<T>(Vec<T>);
+
+impl<T> Debug for Subcommands<T>
+where
+    T: Debug,
+{
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "Subcommands[")?;
+        for (index, item) in self.0.iter().enumerate() {
+            if index > 0 {
+                write!(f, ",\n")?;
+            }
+            write!(f, "{item:?}")?;
+        }
+        write!(f, "]")
+    }
+}
+
+impl<'a, T> FromTokens<'a> for Subcommands<T>
+where
+    T: FromTokens<'a>,
+{
+    fn from_tokens(mut input: TokenSlice<'a>) -> ParseResult<'a, Self>
+    where
+        Self: Sized,
+    {
+        let mut items = Vec::new();
+        let mut diagnostics = Vec::new();
+        loop {
+            let start = input.skip_until(|token| token != &Token::Punct(Punct::Slash));
+            if start.is_empty() {
+                break;
+            }
+            let end = start.skip_to(&Token::Punct(Punct::Slash));
+            let subcommand = start.subslice(0..start.len() - end.len());
+            match T::from_tokens(subcommand) {
+                Ok(p) => {
+                    let (value, rest, mut d) = p.into_tuple();
+                    items.push(value);
+                    diagnostics.append(&mut d.0);
+                    if !rest.is_empty() {
+                        diagnostics.push(rest.warning("Syntax error expecting end of subcommand."));
+                    }
+                }
+                Err(ParseError::Error(mut d) | ParseError::Mismatch(mut d)) => {
+                    diagnostics.append(&mut d.0);
+                }
+            }
+            input = end;
+        }
+        Ok(Parsed {
+            value: Subcommands(items),
+            rest: input,
+            diagnostics: Diagnostics(diagnostics),
+        })
+    }
+}
+
 impl<'a, T> FromTokens<'a> for Vec<T>
 where
     T: FromTokens<'a>,
@@ -150,7 +313,7 @@ where
     {
         let mut values_vec = Vec::new();
         let mut warnings_vec = Vec::new();
-        loop {
+        while !input.is_empty() {
             match T::from_tokens(input) {
                 Ok(Parsed {
                     value,
@@ -233,6 +396,28 @@ where
     }
 }
 
+fn parse_token_if<'a, F, R>(input: TokenSlice<'a>, parse: F) -> ParseResult<'a, R>
+where
+    F: Fn(&Token) -> Option<R>,
+{
+    if let Some(token) = input.get_token(0) {
+        if let Some(result) = parse(token) {
+            return Ok(Parsed::ok(result, input.subslice(1..input.len())));
+        }
+    }
+    Err(ParseError::Mismatch(Diagnostics::default()))
+}
+
+fn _parse_token<'a>(input: TokenSlice<'a>, token: &Token) -> ParseResult<'a, Token> {
+    if let Some(rest) = input.skip(token) {
+        Ok(Parsed::ok(input.first().token.clone(), rest))
+    } else {
+        Err(ParseError::Mismatch(
+            input.error(format!("expecting {token}")).into(),
+        ))
+    }
+}
+
 fn parse_token<'a>(input: TokenSlice<'a>, token: &Token) -> ParseResult<'a, ()> {
     if let Some(rest) = input.skip(token) {
         Ok(Parsed::ok((), rest))
@@ -253,14 +438,52 @@ fn parse_keyword<'a>(input: TokenSlice<'a>, keyword: &str) -> ParseResult<'a, ()
     }
 }
 
+pub type VarList<'a> = Punctuated<VarRange<'a>>;
+
+#[derive(Debug)]
+pub struct Number(f64);
+
+impl<'a> FromTokens<'a> for Number {
+    fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self>
+    where
+        Self: Sized,
+    {
+        parse_token_if(input, |token| token.as_number().map(Number))
+            .map_err(|_| ParseError::Mismatch(input.error(String::from("expecting number")).into()))
+    }
+}
+
 #[derive(Debug)]
-enum Vars<'a> {
+pub struct Integer(i64);
+
+impl<'a> FromTokens<'a> for Integer {
+    fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self>
+    where
+        Self: Sized,
+    {
+        parse_token_if(input, |token| token.as_integer().map(Integer)).map_err(|_| {
+            ParseError::Mismatch(input.error(String::from("expecting integer")).into())
+        })
+    }
+}
+
+pub enum VarRange<'a> {
     Single(&'a Identifier),
     Range(&'a Identifier, &'a Identifier),
     All,
 }
 
-impl<'a> FromTokens<'a> for Vars<'a> {
+impl<'a> Debug for VarRange<'a> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Single(var) => write!(f, "{var:?}"),
+            Self::Range(from, to) => write!(f, "{from:?} TO {to:?}"),
+            Self::All => write!(f, "ALL"),
+        }
+    }
+}
+
+impl<'a> FromTokens<'a> for VarRange<'a> {
     fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self>
     where
         Self: Sized,
@@ -313,6 +536,7 @@ fn commands() -> &'static [Command] {
     fn new_commands() -> Vec<Command> {
         vec![
             descriptives_command(),
+            crosstabs_command(),
             Command {
                 allowed_states: FlagSet::full(),
                 enhanced_only: false,
index 6ffddc35adaef251003b0ac0bc79d5d5f1c4c291..7b125586c7f8d1e73701f850e10e0e52498d9a4b 100644 (file)
@@ -358,7 +358,7 @@ impl<'a> TokenSlice<'a> {
         }
     }
 
-    fn first(&self) -> &LexToken {
+    pub fn first(&self) -> &LexToken {
         self.tokens.first().unwrap()
     }
     fn last(&self) -> &LexToken {
index 2761f717eec29e9cf02c2708094a12698e218b8d..1f94f3be7b889b0b3ebe6b7133623c3f9cd69233 100644 (file)
@@ -34,6 +34,27 @@ impl Token {
     pub fn matches_keyword(&self, keyword: &str) -> bool {
         self.id().map_or(false, |id| id.matches_keyword(keyword))
     }
+
+    pub fn as_number(&self) -> Option<f64> {
+        if let Self::Number(number) = self {
+            Some(*number)
+        } else {
+            None
+        }
+    }
+
+    pub fn as_integer(&self) -> Option<i64> {
+        match self {
+            Self::Number(number)
+                if *number >= i64::MIN as f64
+                    && *number <= i64::MAX as f64
+                    && *number == number.floor() =>
+            {
+                Some(*number as i64)
+            }
+            _ => None,
+        }
+    }
 }
 
 fn is_printable(c: char) -> bool {