work on parsing tlo files
authorBen Pfaff <blp@cs.stanford.edu>
Sun, 9 Mar 2025 23:58:34 +0000 (16:58 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Sun, 9 Mar 2025 23:58:34 +0000 (16:58 -0700)
rust/Cargo.lock
rust/pspp/Cargo.toml
rust/pspp/src/output/pivot/mod.rs
rust/pspp/src/output/pivot/test1.tlo [new file with mode: 0644]
rust/pspp/src/output/pivot/tlo.rs [new file with mode: 0644]

index e4741043c8372546676c0a82c4d1a59a635770a6..8495ff462eb48932a885ec4ce3f6c4cf73bec019 100644 (file)
@@ -96,6 +96,12 @@ version = "1.0.86"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da"
 
+[[package]]
+name = "array-init"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d62b7694a562cdf5a74227903507c56ab2cc8bdd1f781ed5cb4cf9c9f810bfc"
+
 [[package]]
 name = "async-trait"
 version = "0.1.81"
@@ -104,7 +110,7 @@ checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -126,7 +132,7 @@ checksum = "3c87f3f15e7794432337fc718554eaa4dc8f04c9677a950ffe366f20a162ae42"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -150,6 +156,30 @@ dependencies = [
  "rustc-demangle",
 ]
 
+[[package]]
+name = "binrw"
+version = "0.14.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7d4bca59c20d6f40c2cc0802afbe1e788b89096f61bdf7aeea6bf00f10c2909b"
+dependencies = [
+ "array-init",
+ "binrw_derive",
+ "bytemuck",
+]
+
+[[package]]
+name = "binrw_derive"
+version = "0.14.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d8ba42866ce5bced2645bfa15e97eef2c62d2bdb530510538de8dd3d04efff3c"
+dependencies = [
+ "either",
+ "owo-colors",
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
 [[package]]
 name = "bitflags"
 version = "1.3.2"
@@ -168,6 +198,12 @@ version = "3.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
 
+[[package]]
+name = "bytemuck"
+version = "1.22.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6b1fc10dbac614ebc03540c9dbd60e83887fda27794998c6528f1782047d540"
+
 [[package]]
 name = "bytes"
 version = "1.7.1"
@@ -246,7 +282,7 @@ dependencies = [
  "heck",
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -336,7 +372,7 @@ checksum = "a1ab991c1362ac86c61ab6f556cff143daa22e5a15e4e189df818b2fd19fe65b"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -356,7 +392,7 @@ checksum = "f282cfdfe92516eb26c2af8589c274c7c17681f5ecc03c18255fe741c6aa64eb"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -479,7 +515,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -761,7 +797,7 @@ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -828,6 +864,12 @@ dependencies = [
  "num-traits",
 ]
 
+[[package]]
+name = "owo-colors"
+version = "3.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f"
+
 [[package]]
 name = "parking_lot"
 version = "0.12.3"
@@ -874,7 +916,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -903,6 +945,7 @@ name = "pspp"
 version = "1.0.0"
 dependencies = [
  "anyhow",
+ "binrw",
  "bitflags 2.6.0",
  "chardetng",
  "chrono",
@@ -946,7 +989,7 @@ version = "0.1.0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -1065,7 +1108,7 @@ checksum = "f09503e191f4e797cb8aac08e9a4a4695c5edf6a2e70e376d961ddd5c969f82b"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -1088,7 +1131,7 @@ checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -1146,6 +1189,17 @@ version = "0.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
 
+[[package]]
+name = "syn"
+version = "1.0.109"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
 [[package]]
 name = "syn"
 version = "2.0.87"
@@ -1193,7 +1247,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -1237,7 +1291,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -1304,7 +1358,7 @@ checksum = "84fd902d4e0b9a4b27f2f440108dc034e1758628a9b702f8ec61ad66355422fa"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -1332,7 +1386,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -1444,7 +1498,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.87",
  "wasm-bindgen-shared",
 ]
 
@@ -1466,7 +1520,7 @@ checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.87",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
index a4805378fea618d24b1ec66a1a0abaa2b700d0ef..cba39b6ad6e678f479a8bbe7f4b3cc2ae417a18f 100644 (file)
@@ -39,6 +39,7 @@ unicode-linebreak = "0.1.5"
 quick-xml = { version = "0.37.2", features = ["serialize"] }
 serde = { version = "1.0.218", features = ["derive"] }
 color = { version = "0.2.3", features = ["serde"] }
+binrw = "0.14.1"
 
 [target.'cfg(windows)'.dependencies]
 windows-sys = { version = "0.48.0", features = ["Win32_Globalization"] }
index fd51041e20b8f9537b00e7bc4fd56b29be1da63c..0978751d486f4d64dbe7479c3c59fd277d537917 100644 (file)
@@ -85,6 +85,7 @@ use crate::{
 pub mod output;
 
 mod look_xml;
+mod tlo;
 pub use look_xml::TableProperties;
 
 /// Areas of a pivot table for styling purposes.
diff --git a/rust/pspp/src/output/pivot/test1.tlo b/rust/pspp/src/output/pivot/test1.tlo
new file mode 100644 (file)
index 0000000..a53af34
Binary files /dev/null and b/rust/pspp/src/output/pivot/test1.tlo differ
diff --git a/rust/pspp/src/output/pivot/tlo.rs b/rust/pspp/src/output/pivot/tlo.rs
new file mode 100644 (file)
index 0000000..b97be96
--- /dev/null
@@ -0,0 +1,214 @@
+use std::fmt::Debug;
+
+use binrw::{binread, BinRead, BinResult, Error as BinError};
+
+#[binread]
+#[br(little)]
+#[derive(Debug)]
+struct TableLook {
+    pt_table_look: PtTableLook,
+    pv_separator_style: PvSeparatorStyle,
+    pv_cell_style: PvCellStyle,
+    pv_text_style: PvTextStyle,
+}
+
+#[binread]
+#[br(little)]
+#[derive(Debug)]
+struct PtTableLook {
+    #[br(temp)]
+    #[br(assert(&tag.string == b"PTTableLook"))]
+    tag: Tag,
+
+    #[br(assert(version == 0 || version == 2, "PTTableLook version {version} not supported (expected 0 or 2)."))]
+    #[br(temp)]
+    version: u8,
+
+    flags: u16,
+
+    #[br(magic = b"\0\0")]
+    #[br(parse_with = parse_bool)]
+    nested_row_labels: bool,
+
+    #[br(magic = b"\0")]
+    #[br(parse_with = parse_bool)]
+    footnote_marker_subscripts: bool,
+
+    #[br(temp, magic = b"\0\x36\0\0\0\x12\0\0\0")]
+    _empty: (),
+}
+
+#[binread]
+#[br(little)]
+#[derive(Debug)]
+struct PvSeparatorStyle {
+    #[br(assert(&tag.string == b"PVSeparatorStyle"))]
+    tag: Tag,
+
+    #[br(magic = b"\0")]
+    horizontal_dimension_rows: Separator,
+    vertical_dimension_rows: Separator,
+    horizontal_category_rows: Separator,
+    vertical_category_rows: Separator,
+
+    #[br(magic = b"\x03\x80\0")]
+    horizontal_dimension_columns: Separator,
+    vertical_dimension_columns: Separator,
+    horizontal_category_columns: Separator,
+    vertical_category_columns: Separator,
+}
+
+#[binread]
+#[br(little)]
+#[derive(Debug)]
+enum Separator {
+    #[br(magic = 0u16)]
+    None,
+    #[br(magic = 1u16)]
+    Some { color: u32, style: u16, width: u16 },
+}
+
+#[binread]
+#[br(little)]
+#[derive(Debug)]
+struct PvCellStyle {
+    #[br(assert(&tag.string == b"PVCellStyle"))]
+    tag: Tag,
+
+    title_color: AreaColor,
+}
+
+#[binread]
+#[br(little)]
+#[derive(Debug)]
+struct AreaColor {
+    #[br(magic = b"\0\x01\0")]
+    color10: u32,
+    color0: u32,
+    shading: u8,
+    #[br(temp, magic = 0u8)]
+    _empty: (),
+}
+
+#[binread]
+#[br(little)]
+#[derive(Debug)]
+struct PvTextStyle {
+    #[br(assert(&tag.string == b"PVTextStyle"))]
+    tag: Tag,
+
+    #[br(magic = 0u8)]
+    title_style: AreaStyle,
+    layers: MostAreas,
+    corner: MostAreas,
+    row_labels: MostAreas,
+    column_labels: MostAreas,
+    data: MostAreas,
+    caption: MostAreas,
+    footer: MostAreas,
+}
+
+#[binread]
+#[br(little)]
+#[derive(Debug)]
+struct MostAreas {
+    #[br(magic = b"\x06\x80")]
+    color: AreaColor,
+
+    #[br(magic = b"\x08\x80\0")]
+    style: AreaStyle,
+}
+
+#[binread]
+#[br(little)]
+#[derive(Debug)]
+struct AreaStyle {
+    valign: u16,
+    halign: u16,
+    decimal_offset: u16,
+    left_margin: u16,
+    right_margin: u16,
+    top_margin: u16,
+    bottom_margin: u16,
+    #[br(magic = b"\0\0\x01\0")]
+    font_size: i32,
+    stretch: u16,
+    #[br(magic = 0u16)]
+    rotation_angle: u32,
+    #[br(magic = 0u32)]
+    weight: u16,
+    #[br(magic = 0u16)]
+    #[br(parse_with = parse_bool)]
+    italic: bool,
+    #[br(parse_with = parse_bool)]
+    underline: bool,
+    #[br(parse_with = parse_bool)]
+    strike_through: bool,
+    rtf_charset_number: u32,
+    x: u8,
+    font_name: U8String,
+    text_color: u32,
+    #[br(magic = 0u16)]
+    _empty: (),
+}
+
+#[binrw::parser(reader, endian)]
+fn parse_bool() -> BinResult<bool> {
+    let byte = <u8>::read_options(reader, endian, ())?;
+    match byte {
+        0 => Ok(false),
+        1 => Ok(true),
+        _ => Err(BinError::NoVariantMatch {
+            pos: reader.stream_position()? - 1,
+        }),
+    }
+}
+
+#[binread]
+#[br(little)]
+struct Tag {
+    #[br(magic = b"\xff\xff\0\0")]
+    #[br(temp)]
+    length: u16,
+
+    #[br(count = length)]
+    string: Vec<u8>,
+}
+
+impl Debug for Tag {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{:?}", String::from_utf8_lossy(&self.string))
+    }
+}
+
+#[binread]
+#[br(little)]
+struct U8String {
+    #[br(temp)]
+    length: u8,
+
+    #[br(count = length)]
+    string: Vec<u8>,
+}
+
+impl Debug for U8String {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{:?}", String::from_utf8_lossy(&self.string))
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use std::io::Cursor;
+
+    use binrw::BinRead;
+
+    use crate::output::pivot::tlo::TableLook;
+
+    #[test]
+    fn parse() {
+        let bytes = include_bytes!("test1.tlo");
+        let tlo = TableLook::read(&mut Cursor::new(bytes)).unwrap();
+        println!("{tlo:#?}");
+    }
+}