start on ctables
authorBen Pfaff <blp@cs.stanford.edu>
Thu, 12 Dec 2024 20:06:27 +0000 (12:06 -0800)
committerBen Pfaff <blp@cs.stanford.edu>
Thu, 12 Dec 2024 20:06:27 +0000 (12:06 -0800)
rust/pspp/src/command/ctables.rs [new file with mode: 0644]
rust/pspp/src/command/mod.rs
rust/pspp/src/format.rs

diff --git a/rust/pspp/src/command/ctables.rs b/rust/pspp/src/command/ctables.rs
new file mode 100644 (file)
index 0000000..55da82b
--- /dev/null
@@ -0,0 +1,168 @@
+use flagset::FlagSet;
+
+use super::{
+    And, By, Command, Equals, Gt, InSquares, Number, Plus, Punctuated, Seq1, Subcommands, VarList
+};
+use crate::{
+    command::{FromTokens, InParens, MismatchToError, ParseError, ParseResult, Parsed, TokenSlice},
+    format::AbstractFormat,
+    identifier::Identifier,
+};
+
+pub(super) fn ctables_command() -> Command {
+    Command {
+        allowed_states: FlagSet::full(),
+        enhanced_only: false,
+        testing_only: false,
+        no_abbrev: false,
+        name: "CROSSTABS",
+        run: Box::new(|context| {
+            let input = context.lexer;
+            match <CTables>::from_tokens(input) {
+                Ok(Parsed {
+                    value,
+                    rest: _,
+                    diagnostics,
+                }) => {
+                    println!("\n{value:#?}");
+                    //println!("rest: {rest:?}");
+                    println!("warnings: {diagnostics:?}");
+                    //println!("{:?}", DescriptivesSubcommand::from_tokens(subcommand.0));
+                }
+                Err(error) => {
+                    println!("{error:?}");
+                }
+            }
+        }),
+    }
+}
+
+#[derive(Debug, pspp_derive::FromTokens)]
+#[pspp(add_lifetime)]
+struct CTables<'a>(Subcommands<CTablesSubcommand<'a>>);
+
+#[derive(Debug, pspp_derive::FromTokens)]
+#[pspp(add_lifetime)]
+enum CTablesSubcommand<'a> {
+    Table(Table<'a>),
+    Format(Seq1<Format<'a>>),
+    VLabels(Seq1<VLabels<'a>>),
+    SMissing(SMissing),
+    PCompute(And, &'a Identifier, Equals, keyword::Expr, InParens<Expression>),
+}
+
+#[derive(Debug, pspp_derive::FromTokens)]
+#[pspp(add_lifetime)]
+struct Table<'a> {
+    rows: Option<Axis<'a>>,
+    columns: Option<(By, Option<Axis<'a>>)>,
+    layers: Option<(By, Option<Axis<'a>>)>,
+}
+
+#[derive(Debug, pspp_derive::FromTokens)]
+#[pspp(add_lifetime)]
+enum Axis<'a> {
+    Variable(&'a Identifier, Option<InSquares<Measurement>>),
+    Nest(Box<Axis<'a>>, Gt, Box<Axis<'a>>),
+    Stack(Box<Axis<'a>>, Plus, Box<Axis<'a>>),
+    Parens(InParens<Box<Axis<'a>>>),
+    Annotate(InSquares<Punctuated<Annotation<'a>>>),
+}
+
+#[derive(Debug, pspp_derive::FromTokens)]
+enum Measurement {
+    C,
+    S,
+}
+
+#[derive(Debug, pspp_derive::FromTokens)]
+#[pspp(add_lifetime)]
+struct Annotation<'a> {
+    function: &'a Identifier,
+    percentile: Option<Number>,
+    label: Option<&'a String>,
+    format: Option<AbstractFormat>,
+}
+
+#[derive(Debug, pspp_derive::FromTokens)]
+#[pspp(add_lifetime)]
+enum Format<'a> {
+    MinColWidth(Equals, Width),
+    MaxColWidth(Equals, Width),
+    Units(Equals, Unit),
+    Empty(Equals, Empty<'a>),
+    Missing(Equals, &'a String),
+}
+
+#[derive(Debug, pspp_derive::FromTokens)]
+#[pspp(no_selector)]
+enum Width {
+    Default(keyword::Default),
+    Width(Number),
+}
+
+#[derive(Debug, pspp_derive::FromTokens)]
+#[pspp(no_selector, add_lifetime)]
+enum Empty<'a> {
+    Zero(keyword::Zero),
+    Blank(keyword::Blank),
+    Value(&'a String),
+}
+
+#[derive(Debug, pspp_derive::FromTokens)]
+enum Unit {
+    Points,
+    Inches,
+    Cm,
+}
+
+#[derive(Debug, pspp_derive::FromTokens)]
+#[pspp(add_lifetime)]
+enum VLabels<'a> {
+    Variables(Equals, VarList<'a>),
+    Display(Display),
+}
+
+#[derive(Debug, pspp_derive::FromTokens)]
+enum Display {
+    Default,
+    Name,
+    Label,
+    Both,
+    None,
+}
+
+#[derive(Debug, pspp_derive::FromTokens)]
+enum SMissing {
+    Variable,
+    Listwise,
+}
+
+#[derive(Debug, pspp_derive::FromTokens)]
+#[pspp(no_selector)]
+enum Expression {
+    //Category(InSquares<Category<'a>>),
+    Missing,
+    OtherNm,
+    
+}
+
+mod keyword {
+    use crate::command::{FromTokens, ParseResult, TokenSlice};
+
+    #[derive(Debug, pspp_derive::FromTokens)]
+    #[pspp(syntax = "default")]
+    pub struct Default;
+
+    #[derive(Debug, pspp_derive::FromTokens)]
+    #[pspp(syntax = "expr")]
+    pub struct Expr;
+
+    #[derive(Debug, pspp_derive::FromTokens)]
+    #[pspp(syntax = "zero")]
+    pub struct Zero;
+
+    #[derive(Debug, pspp_derive::FromTokens)]
+    #[pspp(syntax = "blank")]
+    pub struct Blank;
+}
index 3c6871bd4facb63dff66ac6a6337f7dffeffbee1..6fe65d5cf16d324dc389f3935aedb2601dcf6011 100644 (file)
@@ -6,12 +6,14 @@ use std::{
 };
 
 use crosstabs::crosstabs_command;
+use ctables::ctables_command;
 use data_list::data_list_command;
 use descriptives::descriptives_command;
 use flagset::{flags, FlagSet};
 use pspp_derive::FromTokens;
 
 use crate::{
+    format::AbstractFormat,
     identifier::Identifier,
     integer::ToInteger,
     lex::{
@@ -23,6 +25,7 @@ use crate::{
 };
 
 pub mod crosstabs;
+pub mod ctables;
 pub mod data_list;
 pub mod descriptives;
 
@@ -208,17 +211,21 @@ impl<'a> FromTokens<'a> for Comma {
 #[pspp(syntax = "=")]
 pub struct Equals;
 
-#[derive(Debug)]
-struct By(Token);
+#[derive(Debug, pspp_derive::FromTokens)]
+#[pspp(syntax = "&")]
+pub struct And;
 
-impl<'a> FromTokens<'a> for By {
-    fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self>
-    where
-        Self: Sized,
-    {
-        _parse_token(input, &Token::Punct(Punct::By)).map(|p| p.map(|token| By(token)))
-    }
-}
+#[derive(Debug, pspp_derive::FromTokens)]
+#[pspp(syntax = ">")]
+pub struct Gt;
+
+#[derive(Debug, pspp_derive::FromTokens)]
+#[pspp(syntax = "+")]
+pub struct Plus;
+
+#[derive(Debug, pspp_derive::FromTokens)]
+#[pspp(syntax = "BY")]
+struct By;
 
 pub struct Punctuated<T, P = Option<Comma>> {
     head: Vec<(T, P)>,
@@ -295,6 +302,18 @@ where
     }
 }
 
+impl<'a, T> FromTokens<'a> for Box<T>
+where
+    T: FromTokens<'a>,
+{
+    fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self>
+    where
+        Self: Sized,
+    {
+        T::from_tokens(input).map(|p| p.map(|value| Box::new(value)))
+    }
+}
+
 pub struct Subcommands<T>(Vec<T>);
 
 impl<T> Debug for Subcommands<T>
@@ -529,6 +548,28 @@ where
     }
 }
 
+#[derive(Debug)]
+struct InSquares<T>(pub T);
+
+impl<'a, T> FromTokens<'a> for InSquares<T>
+where
+    T: FromTokens<'a>,
+{
+    fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self>
+    where
+        Self: Sized,
+    {
+        let ((), rest, _) = parse_token(input, &Token::Punct(Punct::LSquare))?.into_tuple();
+        let (value, rest, warnings) = T::from_tokens(rest)?.into_tuple();
+        let ((), rest, _) = parse_token(rest, &Token::Punct(Punct::RSquare))?.into_tuple();
+        Ok(Parsed {
+            value: Self(value),
+            rest,
+            diagnostics: warnings,
+        })
+    }
+}
+
 fn parse_token_if<'a, F, R>(input: TokenSlice<'a>, parse: F) -> ParseResult<'a, R>
 where
     F: Fn(&Token) -> Option<R>,
@@ -650,6 +691,22 @@ fn parse_id<'a>(input: TokenSlice<'a>) -> ParseResult<'a, &'a Identifier> {
     }
 }
 
+fn parse_format<'a>(input: TokenSlice<'a>) -> ParseResult<'a, AbstractFormat> {
+    let mut iter = input.iter();
+    if let Some(LexToken {
+        token: Token::Id(id),
+        ..
+    }) = iter.next()
+    {
+        if let Ok(format) = id.0.as_ref().parse() {
+            return Ok(Parsed::ok(format, iter.remainder()));
+        }
+    }
+    Err(ParseError::Mismatch(
+        input.error("Syntax error expecting identifier.").into(),
+    ))
+}
+
 fn parse_string<'a>(input: TokenSlice<'a>) -> ParseResult<'a, &'a String> {
     let mut iter = input.iter();
     if let Some(LexToken {
@@ -683,6 +740,15 @@ impl<'a> FromTokens<'a> for &'a String {
     }
 }
 
+impl<'a> FromTokens<'a> for AbstractFormat {
+    fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self>
+    where
+        Self: Sized,
+    {
+        parse_format(input)
+    }
+}
+
 fn collect_subcommands<'a>(src: &'a TokenSlice) -> Vec<TokenSlice<'a>> {
     src.split(|token| token.token == Token::Punct(Punct::Slash))
         .filter(|slice| !slice.is_empty())
@@ -694,6 +760,7 @@ fn commands() -> &'static [Command] {
         vec![
             descriptives_command(),
             crosstabs_command(),
+            ctables_command(),
             data_list_command(),
             Command {
                 allowed_states: FlagSet::full(),
index 956e13949504d25df200c46adab389635ec3e0c5..72790b9f357605b4046db8ab6200070d87b9471d 100644 (file)
@@ -2,6 +2,7 @@
 use std::{
     fmt::{Display, Formatter, Result as FmtResult},
     ops::RangeInclusive,
+    str::FromStr,
 };
 
 use enum_map::{Enum, EnumMap};
@@ -365,6 +366,57 @@ fn max_digits_for_bytes(bytes: usize) -> usize {
     *[0, 3, 5, 8, 10, 13, 15, 17].get(bytes).unwrap_or(&20)
 }
 
+#[derive(Debug, PartialEq, Eq, Hash)]
+pub struct AbstractFormat {
+    pub name: String,
+    w: Width,
+    d: Decimals,
+}
+
+fn split<F>(s: &str, predicate: F) -> (&str, &str)
+where
+    F: Fn(&char) -> bool,
+{
+    let rest = s.trim_start_matches(|c| predicate(&c));
+    let start = &s[..s.len() - rest.len()];
+    (start, rest)
+}
+
+impl FromStr for AbstractFormat {
+    type Err = ();
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        let (name, s) = split(s, char::is_ascii_alphabetic);
+        if name.is_empty() {
+            return Err(());
+        }
+
+        let (w, s) = split(s, char::is_ascii_digit);
+        let Ok(w) = w.parse() else {
+            return Err(());
+        };
+
+        let (d, rest) = if let Some(s) = s.strip_prefix('.') {
+            let (d, rest) = split(s, char::is_ascii_digit);
+            let Ok(d) = d.parse() else {
+                return Err(());
+            };
+            (d, rest)
+        } else {
+            (0, s)
+        };
+
+        if !rest.is_empty() {
+            return Err(());
+        }
+        Ok(Self {
+            name: name.into(),
+            w,
+            d,
+        })
+    }
+}
+
 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
 pub struct Format {
     type_: Type,