From 814fb0ff37cccd8a27c105932a81ec5716b0a640 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 9 Mar 2025 12:05:32 -0700 Subject: [PATCH] Add tablelook parser. --- rust/Cargo.lock | 34 +- rust/pspp/Cargo.toml | 3 + rust/pspp/src/output/pivot/mod.rs | 529 ++++++++++++++++++++++++++- rust/pspp/src/output/pivot/output.rs | 8 +- 4 files changed, 555 insertions(+), 19 deletions(-) diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 10d655058b..e4741043c8 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -255,6 +255,15 @@ version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" +[[package]] +name = "color" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c387f6cef110ee8eaf12fca5586d3d303c07c594f4a5f02c768b6470b70dbd" +dependencies = [ + "serde", +] + [[package]] name = "colorchoice" version = "1.0.2" @@ -898,6 +907,7 @@ dependencies = [ "chardetng", "chrono", "clap", + "color", "diff", "either", "encoding_rs", @@ -918,6 +928,8 @@ dependencies = [ "num-traits", "ordered-float", "pspp-derive", + "quick-xml", + "serde", "smallstr", "smallvec", "thiserror", @@ -948,6 +960,16 @@ dependencies = [ "tower-lsp", ] +[[package]] +name = "quick-xml" +version = "0.37.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "165859e9e55f79d67b96c5d96f4e88b6f2695a1972849c15a6a3f5c59fc2c003" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "quote" version = "1.0.37" @@ -1028,18 +1050,18 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "serde" -version = "1.0.208" +version = "1.0.218" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cff085d2cb684faa248efb494c39b68e522822ac0de72ccf08109abde717cfb2" +checksum = "e8dfc9d19bdbf6d17e22319da49161d5d0108e4188e8b680aef6299eed22df60" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.208" +version = "1.0.218" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24008e81ff7613ed8e5ba0cfaf24e2c2f1e5b8a0495711e44fcd4882fca62bcf" +checksum = "f09503e191f4e797cb8aac08e9a4a4695c5edf6a2e70e376d961ddd5c969f82b" dependencies = [ "proc-macro2", "quote", @@ -1126,9 +1148,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" -version = "2.0.77" +version = "2.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" dependencies = [ "proc-macro2", "quote", diff --git a/rust/pspp/Cargo.toml b/rust/pspp/Cargo.toml index 526073e6bf..a4805378fe 100644 --- a/rust/pspp/Cargo.toml +++ b/rust/pspp/Cargo.toml @@ -36,6 +36,9 @@ libm = "0.2.11" smallstr = "0.3.0" itertools = "0.14.0" unicode-linebreak = "0.1.5" +quick-xml = { version = "0.37.2", features = ["serialize"] } +serde = { version = "1.0.218", features = ["derive"] } +color = { version = "0.2.3", features = ["serde"] } [target.'cfg(windows)'.dependencies] windows-sys = { version = "0.48.0", features = ["Win32_Globalization"] } diff --git a/rust/pspp/src/output/pivot/mod.rs b/rust/pspp/src/output/pivot/mod.rs index 0ffa7baaee..b08d83d8dc 100644 --- a/rust/pspp/src/output/pivot/mod.rs +++ b/rust/pspp/src/output/pivot/mod.rs @@ -57,17 +57,20 @@ use std::{ collections::HashMap, - fmt::{Display, Write}, + fmt::{Debug, Display, Write}, iter::{once, repeat}, ops::{Index, IndexMut, Not, Range, RangeInclusive}, - str::from_utf8, + str::{from_utf8, FromStr}, sync::{Arc, OnceLock, Weak}, }; use chrono::NaiveDateTime; +pub use color::ParseError as ParseColorError; +use color::{palette::css::TRANSPARENT, AlphaColor, Rgba8, Srgb}; use encoding_rs::UTF_8; use enum_iterator::Sequence; use enum_map::{enum_map, Enum, EnumMap}; +use serde::{de::Visitor, Deserialize}; use smallstr::SmallString; use smallvec::{smallvec, SmallVec}; @@ -144,7 +147,7 @@ impl Area { } /// Table borders for styling purposes. -#[derive(Copy, Clone, Debug, Enum)] +#[derive(Copy, Clone, Debug, Enum, PartialEq, Eq)] pub enum Border { Title, OuterFrame(BoxBorder), @@ -185,7 +188,7 @@ impl Border { } /// The borders on a box. -#[derive(Copy, Clone, Debug, Enum)] +#[derive(Copy, Clone, Debug, Enum, PartialEq, Eq)] pub enum BoxBorder { Left, Top, @@ -508,7 +511,8 @@ pub struct Look { pub name: Option, pub omit_empty: bool, - pub row_labels_in_corner: bool, + + pub row_label_position: RowLabelPosition, /// Ranges of column widths in the two heading regions, in 1/96" units. pub heading_widths: EnumMap>, @@ -545,7 +549,7 @@ impl Default for Look { Self { name: None, omit_empty: true, - row_labels_in_corner: true, + row_label_position: RowLabelPosition::default(), heading_widths: EnumMap::from_fn(|region| match region { HeadingRegion::RowHeadings => 36..=72, HeadingRegion::ColumnHeadings => 36..=120, @@ -572,6 +576,418 @@ impl Look { } } +#[derive(Copy, Clone, Debug, Default, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub enum RowLabelPosition { + Nested, + + #[default] + InCorner, +} + +mod look_xml { + use std::{fmt::Debug, num::ParseFloatError, str::FromStr}; + + use serde::{de::Visitor, Deserialize}; + + use crate::output::pivot::{ + Color, FootnoteMarkerPosition, FootnoteMarkerType, RowLabelPosition, Stroke, + }; + use thiserror::Error as ThisError; + + #[derive(Deserialize, Debug)] + #[serde(rename_all = "camelCase")] + struct TableProperties { + #[serde(rename = "@name")] + name: Option, + general_properties: GeneralProperties, + footnote_properties: FootnoteProperties, + cell_format_properties: CellFormatProperties, + border_properties: BorderProperties, + printing_properties: PrintingProperties, + } + + #[derive(Deserialize, Debug)] + struct GeneralProperties { + #[serde(rename = "@hideEmptyRows")] + hide_empty_rows: bool, + + #[serde(rename = "@maximumColumnWidth")] + maximum_column_width: i64, + + #[serde(rename = "@minimumColumnWidth")] + minimum_column_width: i64, + + #[serde(rename = "@maximumRowWidth")] + maximum_row_width: i64, + + #[serde(rename = "@minimumRowWidth")] + minimum_row_width: i64, + + #[serde(rename = "@rowDimensionLabels")] + row_dimension_labels: RowLabelPosition, + } + + #[derive(Deserialize, Debug)] + #[serde(rename_all = "camelCase")] + struct FootnoteProperties { + #[serde(rename = "@markerPosition")] + marker_position: FootnoteMarkerPosition, + + #[serde(rename = "@numberFormat")] + number_format: FootnoteMarkerType, + } + + #[derive(Deserialize, Debug)] + #[serde(rename_all = "camelCase")] + struct CellFormatProperties { + caption: CellStyleHolder, + column_labels: CellStyleHolder, + corner_labels: CellStyleHolder, + data: CellStyleHolder, + footnotes: CellStyleHolder, + layers: CellStyleHolder, + row_labels: CellStyleHolder, + title: CellStyleHolder, + } + + #[derive(Deserialize, Debug)] + #[serde(rename_all = "camelCase")] + struct CellStyleHolder { + style: CellStyle, + } + + #[derive(Deserialize, Debug, Default)] + #[serde(default)] + struct CellStyle { + #[serde(rename = "@alternatingColor")] + alternating_color: Option, + #[serde(rename = "@alternatingTextColor")] + alternating_text_color: Option, + #[serde(rename = "@color")] + color: Option, + #[serde(rename = "@color2")] + color2: Option, + #[serde(rename = "@font-family")] + font_family: String, + #[serde(rename = "@font-size")] + font_size: Dimension, + #[serde(rename = "@font-style")] + font_style: FontStyle, + #[serde(rename = "@font-weight")] + font_weight: FontWeight, + #[serde(rename = "@font-underline")] + font_underline: FontUnderline, + #[serde(rename = "@labelLocationVertical")] + label_location_vertical: LabelLocationVertical, + #[serde(rename = "@margin-bottom")] + margin_bottom: Dimension, + #[serde(rename = "@margin-left")] + margin_left: Dimension, + #[serde(rename = "@margin-right")] + margin_right: Dimension, + #[serde(rename = "@margin-top")] + margin_top: Dimension, + #[serde(rename = "@textAlignment", default)] + text_alignment: TextAlignment, + #[serde(rename = "@decimal-offset")] + decimal_offset: Dimension, + } + + #[derive(Deserialize, Debug, Default)] + #[serde(rename_all = "camelCase")] + enum FontStyle { + #[default] + Regular, + Italic, + } + + #[derive(Deserialize, Debug, Default)] + #[serde(rename_all = "camelCase")] + enum FontWeight { + #[default] + Regular, + Bold, + } + + #[derive(Deserialize, Debug, Default)] + #[serde(rename_all = "camelCase")] + enum FontUnderline { + #[default] + None, + Underline, + } + + #[derive(Deserialize, Debug, Default)] + #[serde(rename_all = "camelCase")] + enum TextAlignment { + Left, + Right, + Center, + Decimal, + #[default] + Mixed, + } + + #[derive(Deserialize, Debug, Default)] + #[serde(rename_all = "camelCase")] + enum LabelLocationVertical { + /// Top. + #[default] + Positive, + + /// Bottom. + Negative, + + /// Center. + Center, + } + + #[derive(Deserialize, Debug)] + #[serde(rename_all = "camelCase")] + struct BorderProperties { + bottom_inner_frame: BorderStyle, + bottom_outer_frame: BorderStyle, + data_area_left: BorderStyle, + data_area_top: BorderStyle, + horizontal_category_border_columns: BorderStyle, + horizontal_category_border_rows: BorderStyle, + horizontal_dimension_border_columns: BorderStyle, + horizontal_dimension_border_rows: BorderStyle, + left_inner_frame: BorderStyle, + left_outer_frame: BorderStyle, + right_inner_frame: BorderStyle, + right_outer_frame: BorderStyle, + title_layer_separator: BorderStyle, + top_inner_frame: BorderStyle, + top_outer_frame: BorderStyle, + vertical_category_border_columns: BorderStyle, + vertical_category_border_rows: BorderStyle, + vertical_dimension_border_rows: BorderStyle, + vertical_dimension_border_columns: BorderStyle, + } + + #[derive(Deserialize, Debug)] + struct BorderStyle { + #[serde(rename = "@borderStyleType")] + border_style_type: Stroke, + + #[serde(rename = "@color")] + color: Color, + } + + #[derive(Deserialize, Debug, Default)] + #[serde(rename_all = "camelCase", default)] + struct PrintingProperties { + #[serde(rename = "@printAllLayers")] + print_all_layers: bool, + + #[serde(rename = "@printEachLayerOnSeparatePage")] + print_each_layer_on_separate_page: bool, + + #[serde(rename = "@rescaleWideTableToFitPage")] + rescale_wide_table_to_fit_page: bool, + + #[serde(rename = "@rescaleLongTableToFitPage")] + rescale_long_table_to_fit_page: bool, + + #[serde(rename = "@windowOrphanLines")] + window_orphan_lines: i64, + + #[serde(rename = "@continuationText")] + continuation_text: String, + + #[serde(rename = "@continuationTextAtBottom")] + continuation_text_at_bottom: bool, + + #[serde(rename = "@continuationTextAtTop")] + continuation_text_at_top: bool, + } + + #[derive(Default, PartialEq)] + struct Dimension( + /// In inches. + f64, + ); + + impl Debug for Dimension { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:.2}in", self.0) + } + } + + impl FromStr for Dimension { + type Err = DimensionParseError; + + fn from_str(s: &str) -> Result { + let s = s.trim_start(); + let unit = s.trim_start_matches(|c: char| c.is_ascii_digit() || c == '.'); + let number: f64 = s[..s.len() - unit.len()] + .parse() + .map_err(DimensionParseError::ParseFloatError)?; + let divisor = match unit.trim() { + // Inches. + "in" | "인치" | "pol." | "cala" | "cali" => 1.0, + + // Device-independent pixels. + "px" => 96.0, + + // Points. + "pt" | "пт" | "" => 72.0, + + // Centimeters. + "cm" | "см" => 2.54, + + other => return Err(DimensionParseError::InvalidUnit(other.into())), + }; + Ok(Dimension(number / divisor)) + } + } + + #[derive(ThisError, Debug, PartialEq, Eq)] + enum DimensionParseError { + /// Invalid number. + #[error("{0}")] + ParseFloatError(ParseFloatError), + + /// Unknown unit. + #[error("Unknown unit {0:?}")] + InvalidUnit(String), + } + + impl<'de> Deserialize<'de> for Dimension { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + struct DimensionVisitor; + + impl<'de> Visitor<'de> for DimensionVisitor { + type Value = Dimension; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("a string") + } + + fn visit_borrowed_str(self, v: &'de str) -> Result + where + E: serde::de::Error, + { + v.parse().map_err(E::custom) + } + } + + deserializer.deserialize_str(DimensionVisitor) + } + } + + #[cfg(test)] + mod test { + use std::str::FromStr; + + use quick_xml::de::from_str; + + use crate::output::pivot::look_xml::{Dimension, DimensionParseError, TableProperties}; + + #[test] + fn dimension() { + assert_eq!(Dimension::from_str("1"), Ok(Dimension(1.0 / 72.0))); + assert_eq!(Dimension::from_str("1pt"), Ok(Dimension(1.0 / 72.0))); + assert_eq!(Dimension::from_str("1пт"), Ok(Dimension(1.0 / 72.0))); + assert_eq!(Dimension::from_str("1.0"), Ok(Dimension(1.0 / 72.0))); + assert_eq!(Dimension::from_str(" 1.0"), Ok(Dimension(1.0 / 72.0))); + assert_eq!(Dimension::from_str(" 1.0 "), Ok(Dimension(1.0 / 72.0))); + assert_eq!(Dimension::from_str("1.0 pt"), Ok(Dimension(1.0 / 72.0))); + assert_eq!(Dimension::from_str("1.0pt "), Ok(Dimension(1.0 / 72.0))); + assert_eq!(Dimension::from_str(" 1.0pt "), Ok(Dimension(1.0 / 72.0))); + + assert_eq!(Dimension::from_str("1in"), Ok(Dimension(1.0))); + + assert_eq!(Dimension::from_str("96px"), Ok(Dimension(1.0))); + + assert_eq!(Dimension::from_str("2.54cm"), Ok(Dimension(1.0))); + + assert_eq!( + Dimension::from_str(""), + Err(DimensionParseError::ParseFloatError( + "".parse::().unwrap_err() + )) + ); + assert_eq!( + Dimension::from_str("1.2.3"), + Err(DimensionParseError::ParseFloatError( + "1.2.3".parse::().unwrap_err() + )) + ); + assert_eq!( + Dimension::from_str("1asdf"), + Err(DimensionParseError::InvalidUnit("asdf".into())) + ); + } + + #[test] + fn look() { + const XML: &str = r##" + + + + + + + <vizml:style color="#000000" color2="#ffffff" font-family="Sans Serif" font-size="9pt" font-weight="bold" font-underline="none" labelLocationVertical="center" margin-bottom="6pt" margin-left="6pt" margin-right="8pt" margin-top="0pt" textAlignment="left"/> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +"##; + let table_properties: TableProperties = from_str(XML).unwrap(); + dbg!(&table_properties); + } + } +} + /// The heading region of a rendered pivot table: /// /// ```text @@ -643,7 +1059,7 @@ impl HorzAlign { } } -#[derive(Copy, Clone, Debug)] +#[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum VertAlign { /// Top alignment. Top, @@ -669,7 +1085,7 @@ pub struct FontStyle { size: i32, } -#[derive(Copy, Clone, Debug)] +#[derive(Copy, Clone, PartialEq, Eq)] pub struct Color { alpha: u8, r: u8, @@ -680,6 +1096,7 @@ pub struct Color { impl Color { const BLACK: Color = Color::new(0, 0, 0); const WHITE: Color = Color::new(255, 255, 255); + const TRANSPARENT: Color = Color::new(0, 0, 0).with_alpha(0); const fn new(r: u8, g: u8, b: u8) -> Self { Self { @@ -689,6 +1106,93 @@ impl Color { b, } } + + const fn with_alpha(self, alpha: u8) -> Self { + Self { alpha, ..self } + } +} + +impl Debug for Color { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let Color { alpha, r, g, b } = *self; + match alpha { + 255 => write!(f, "#{r:02x}{g:02x}{b:02x}"), + _ => write!(f, "rgb({r}, {g}, {b}, {:.2})", alpha as f64 / 255.0), + } + } +} + +impl From for Color { + fn from(Rgba8 { r, g, b, a }: Rgba8) -> Self { + Self::new(r, g, b).with_alpha(a) + } +} + +impl FromStr for Color { + type Err = ParseColorError; + + fn from_str(s: &str) -> Result { + fn is_bare_hex(s: &str) -> bool { + let s = s.trim(); + s.chars().count() == 6 && s.chars().all(|c| c.is_ascii_hexdigit()) + } + let color: AlphaColor = match s.parse() { + Err(ParseColorError::UnknownColorSyntax) if is_bare_hex(s) => { + ("#".to_owned() + s).parse() + } + Err(ParseColorError::UnknownColorSyntax) + if s.trim().eq_ignore_ascii_case("transparent") => + { + Ok(TRANSPARENT) + } + other => other, + }?; + Ok(color.to_rgba8().into()) + } +} + +#[cfg(test)] +mod test { + use crate::output::pivot::Color; + + #[test] + fn color() { + assert_eq!("#112233".parse(), Ok(Color::new(0x11, 0x22, 0x33))); + assert_eq!("112233".parse(), Ok(Color::new(0x11, 0x22, 0x33))); + assert_eq!("rgb(11,22,33)".parse(), Ok(Color::new(11, 22, 33))); + assert_eq!( + "rgba(11,22,33, 0.25)".parse(), + Ok(Color::new(11, 22, 33).with_alpha(64)) + ); + assert_eq!("lavender".parse(), Ok(Color::new(230, 230, 250))); + assert_eq!("transparent".parse(), Ok(Color::new(0, 0, 0).with_alpha(0))); + } +} + +impl<'de> Deserialize<'de> for Color { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + struct ColorVisitor; + + impl<'de> Visitor<'de> for ColorVisitor { + type Value = Color; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("\"#rrggbb\" or \"rrggbb\" or web color name") + } + + fn visit_borrowed_str(self, v: &'de str) -> Result + where + E: serde::de::Error, + { + v.parse().map_err(E::custom) + } + } + + deserializer.deserialize_str(ColorVisitor) + } } #[derive(Copy, Clone, Debug)] @@ -720,7 +1224,8 @@ impl BorderStyle { } } -#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Enum)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Enum, Deserialize)] +#[serde(rename_all = "camelCase")] pub enum Stroke { None, Solid, @@ -882,7 +1387,8 @@ impl IndexMut for Rect2 { } } -#[derive(Copy, Clone, Debug, Default)] +#[derive(Copy, Clone, Debug, Default, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] pub enum FootnoteMarkerType { /// a, b, c, ... #[default] @@ -892,7 +1398,8 @@ pub enum FootnoteMarkerType { Numeric, } -#[derive(Copy, Clone, Debug, Default)] +#[derive(Copy, Clone, Debug, Default, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] pub enum FootnoteMarkerPosition { /// Subscripts. #[default] diff --git a/rust/pspp/src/output/pivot/output.rs b/rust/pspp/src/output/pivot/output.rs index 6f8bb82ed6..c15b569d01 100644 --- a/rust/pspp/src/output/pivot/output.rs +++ b/rust/pspp/src/output/pivot/output.rs @@ -3,7 +3,10 @@ use std::sync::Arc; use enum_map::{enum_map, EnumMap}; use smallvec::{SmallVec, ToSmallVec}; -use crate::output::table::{CellInner, Table}; +use crate::output::{ + pivot::RowLabelPosition, + table::{CellInner, Table}, +}; use super::{ Area, AsValueOptions, Axis, Axis2, Axis3, Border, BorderStyle, BoxBorder, Category, @@ -216,7 +219,8 @@ impl PivotTable { ); } } - if (self.corner_text.is_some() || self.look.row_labels_in_corner) + if (self.corner_text.is_some() + || self.look.row_label_position == RowLabelPosition::InCorner) && stub.x() > 0 && stub.y() > 0 { -- 2.30.2