From 35d335c40777bfe86031bc3665771cc991cb8c59 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 12 Dec 2024 12:06:27 -0800 Subject: [PATCH] start on ctables --- rust/pspp/src/command/ctables.rs | 168 +++++++++++++++++++++++++++++++ rust/pspp/src/command/mod.rs | 87 ++++++++++++++-- rust/pspp/src/format.rs | 52 ++++++++++ 3 files changed, 297 insertions(+), 10 deletions(-) create mode 100644 rust/pspp/src/command/ctables.rs diff --git a/rust/pspp/src/command/ctables.rs b/rust/pspp/src/command/ctables.rs new file mode 100644 index 0000000000..55da82bbd8 --- /dev/null +++ b/rust/pspp/src/command/ctables.rs @@ -0,0 +1,168 @@ +use flagset::FlagSet; + +use super::{ + And, By, Command, Equals, Gt, InSquares, Number, Plus, Punctuated, Seq1, Subcommands, VarList +}; +use crate::{ + command::{FromTokens, InParens, MismatchToError, ParseError, ParseResult, Parsed, TokenSlice}, + format::AbstractFormat, + identifier::Identifier, +}; + +pub(super) fn ctables_command() -> Command { + Command { + allowed_states: FlagSet::full(), + enhanced_only: false, + testing_only: false, + no_abbrev: false, + name: "CROSSTABS", + run: Box::new(|context| { + let input = context.lexer; + match ::from_tokens(input) { + Ok(Parsed { + value, + rest: _, + diagnostics, + }) => { + println!("\n{value:#?}"); + //println!("rest: {rest:?}"); + println!("warnings: {diagnostics:?}"); + //println!("{:?}", DescriptivesSubcommand::from_tokens(subcommand.0)); + } + Err(error) => { + println!("{error:?}"); + } + } + }), + } +} + +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(add_lifetime)] +struct CTables<'a>(Subcommands>); + +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(add_lifetime)] +enum CTablesSubcommand<'a> { + Table(Table<'a>), + Format(Seq1>), + VLabels(Seq1>), + SMissing(SMissing), + PCompute(And, &'a Identifier, Equals, keyword::Expr, InParens), +} + +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(add_lifetime)] +struct Table<'a> { + rows: Option>, + columns: Option<(By, Option>)>, + layers: Option<(By, Option>)>, +} + +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(add_lifetime)] +enum Axis<'a> { + Variable(&'a Identifier, Option>), + Nest(Box>, Gt, Box>), + Stack(Box>, Plus, Box>), + Parens(InParens>>), + Annotate(InSquares>>), +} + +#[derive(Debug, pspp_derive::FromTokens)] +enum Measurement { + C, + S, +} + +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(add_lifetime)] +struct Annotation<'a> { + function: &'a Identifier, + percentile: Option, + label: Option<&'a String>, + format: Option, +} + +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(add_lifetime)] +enum Format<'a> { + MinColWidth(Equals, Width), + MaxColWidth(Equals, Width), + Units(Equals, Unit), + Empty(Equals, Empty<'a>), + Missing(Equals, &'a String), +} + +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(no_selector)] +enum Width { + Default(keyword::Default), + Width(Number), +} + +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(no_selector, add_lifetime)] +enum Empty<'a> { + Zero(keyword::Zero), + Blank(keyword::Blank), + Value(&'a String), +} + +#[derive(Debug, pspp_derive::FromTokens)] +enum Unit { + Points, + Inches, + Cm, +} + +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(add_lifetime)] +enum VLabels<'a> { + Variables(Equals, VarList<'a>), + Display(Display), +} + +#[derive(Debug, pspp_derive::FromTokens)] +enum Display { + Default, + Name, + Label, + Both, + None, +} + +#[derive(Debug, pspp_derive::FromTokens)] +enum SMissing { + Variable, + Listwise, +} + +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(no_selector)] +enum Expression { + //Category(InSquares>), + Missing, + OtherNm, + +} + +mod keyword { + use crate::command::{FromTokens, ParseResult, TokenSlice}; + + #[derive(Debug, pspp_derive::FromTokens)] + #[pspp(syntax = "default")] + pub struct Default; + + #[derive(Debug, pspp_derive::FromTokens)] + #[pspp(syntax = "expr")] + pub struct Expr; + + #[derive(Debug, pspp_derive::FromTokens)] + #[pspp(syntax = "zero")] + pub struct Zero; + + #[derive(Debug, pspp_derive::FromTokens)] + #[pspp(syntax = "blank")] + pub struct Blank; +} diff --git a/rust/pspp/src/command/mod.rs b/rust/pspp/src/command/mod.rs index 3c6871bd4f..6fe65d5cf1 100644 --- a/rust/pspp/src/command/mod.rs +++ b/rust/pspp/src/command/mod.rs @@ -6,12 +6,14 @@ use std::{ }; use crosstabs::crosstabs_command; +use ctables::ctables_command; use data_list::data_list_command; use descriptives::descriptives_command; use flagset::{flags, FlagSet}; use pspp_derive::FromTokens; use crate::{ + format::AbstractFormat, identifier::Identifier, integer::ToInteger, lex::{ @@ -23,6 +25,7 @@ use crate::{ }; pub mod crosstabs; +pub mod ctables; pub mod data_list; pub mod descriptives; @@ -208,17 +211,21 @@ impl<'a> FromTokens<'a> for Comma { #[pspp(syntax = "=")] pub struct Equals; -#[derive(Debug)] -struct By(Token); +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(syntax = "&")] +pub struct And; -impl<'a> FromTokens<'a> for By { - fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> - where - Self: Sized, - { - _parse_token(input, &Token::Punct(Punct::By)).map(|p| p.map(|token| By(token))) - } -} +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(syntax = ">")] +pub struct Gt; + +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(syntax = "+")] +pub struct Plus; + +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(syntax = "BY")] +struct By; pub struct Punctuated> { head: Vec<(T, P)>, @@ -295,6 +302,18 @@ where } } +impl<'a, T> FromTokens<'a> for Box +where + T: FromTokens<'a>, +{ + fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> + where + Self: Sized, + { + T::from_tokens(input).map(|p| p.map(|value| Box::new(value))) + } +} + pub struct Subcommands(Vec); impl Debug for Subcommands @@ -529,6 +548,28 @@ where } } +#[derive(Debug)] +struct InSquares(pub T); + +impl<'a, T> FromTokens<'a> for InSquares +where + T: FromTokens<'a>, +{ + fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> + where + Self: Sized, + { + let ((), rest, _) = parse_token(input, &Token::Punct(Punct::LSquare))?.into_tuple(); + let (value, rest, warnings) = T::from_tokens(rest)?.into_tuple(); + let ((), rest, _) = parse_token(rest, &Token::Punct(Punct::RSquare))?.into_tuple(); + Ok(Parsed { + value: Self(value), + rest, + diagnostics: warnings, + }) + } +} + fn parse_token_if<'a, F, R>(input: TokenSlice<'a>, parse: F) -> ParseResult<'a, R> where F: Fn(&Token) -> Option, @@ -650,6 +691,22 @@ fn parse_id<'a>(input: TokenSlice<'a>) -> ParseResult<'a, &'a Identifier> { } } +fn parse_format<'a>(input: TokenSlice<'a>) -> ParseResult<'a, AbstractFormat> { + let mut iter = input.iter(); + if let Some(LexToken { + token: Token::Id(id), + .. + }) = iter.next() + { + if let Ok(format) = id.0.as_ref().parse() { + return Ok(Parsed::ok(format, iter.remainder())); + } + } + Err(ParseError::Mismatch( + input.error("Syntax error expecting identifier.").into(), + )) +} + fn parse_string<'a>(input: TokenSlice<'a>) -> ParseResult<'a, &'a String> { let mut iter = input.iter(); if let Some(LexToken { @@ -683,6 +740,15 @@ impl<'a> FromTokens<'a> for &'a String { } } +impl<'a> FromTokens<'a> for AbstractFormat { + fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> + where + Self: Sized, + { + parse_format(input) + } +} + fn collect_subcommands<'a>(src: &'a TokenSlice) -> Vec> { src.split(|token| token.token == Token::Punct(Punct::Slash)) .filter(|slice| !slice.is_empty()) @@ -694,6 +760,7 @@ fn commands() -> &'static [Command] { vec![ descriptives_command(), crosstabs_command(), + ctables_command(), data_list_command(), Command { allowed_states: FlagSet::full(), diff --git a/rust/pspp/src/format.rs b/rust/pspp/src/format.rs index 956e139495..72790b9f35 100644 --- a/rust/pspp/src/format.rs +++ b/rust/pspp/src/format.rs @@ -2,6 +2,7 @@ use std::{ fmt::{Display, Formatter, Result as FmtResult}, ops::RangeInclusive, + str::FromStr, }; use enum_map::{Enum, EnumMap}; @@ -365,6 +366,57 @@ fn max_digits_for_bytes(bytes: usize) -> usize { *[0, 3, 5, 8, 10, 13, 15, 17].get(bytes).unwrap_or(&20) } +#[derive(Debug, PartialEq, Eq, Hash)] +pub struct AbstractFormat { + pub name: String, + w: Width, + d: Decimals, +} + +fn split(s: &str, predicate: F) -> (&str, &str) +where + F: Fn(&char) -> bool, +{ + let rest = s.trim_start_matches(|c| predicate(&c)); + let start = &s[..s.len() - rest.len()]; + (start, rest) +} + +impl FromStr for AbstractFormat { + type Err = (); + + fn from_str(s: &str) -> Result { + let (name, s) = split(s, char::is_ascii_alphabetic); + if name.is_empty() { + return Err(()); + } + + let (w, s) = split(s, char::is_ascii_digit); + let Ok(w) = w.parse() else { + return Err(()); + }; + + let (d, rest) = if let Some(s) = s.strip_prefix('.') { + let (d, rest) = split(s, char::is_ascii_digit); + let Ok(d) = d.parse() else { + return Err(()); + }; + (d, rest) + } else { + (0, s) + }; + + if !rest.is_empty() { + return Err(()); + } + Ok(Self { + name: name.into(), + w, + d, + }) + } +} + #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] pub struct Format { type_: Type, -- 2.30.2