seems to work rust
authorBen Pfaff <blp@cs.stanford.edu>
Sat, 18 Oct 2025 16:02:10 +0000 (09:02 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Sat, 18 Oct 2025 16:02:10 +0000 (09:02 -0700)
rust/pspp/src/output/spv.rs
rust/pspp/src/output/spv/legacy_bin.rs
rust/pspp/src/output/spv/light.rs

index cff19a88a293289c96b6c358bc254000c33982a5..c249ca547c61436d7d32593e741bcf80ac3957fc 100644 (file)
@@ -32,6 +32,7 @@ use crate::output::{
     page::PageSetup,
     pivot::{PivotTable, TableProperties, Value},
     spv::{
+        legacy_bin::LegacyBin,
         legacy_xml::Visualization,
         light::{LightError, LightTable},
     },
@@ -39,8 +40,8 @@ use crate::output::{
 
 mod css;
 pub mod html;
-mod legacy_xml;
 mod legacy_bin;
+mod legacy_xml;
 mod light;
 
 #[derive(Debug, Display, thiserror::Error)]
@@ -429,7 +430,19 @@ impl Table {
                     Ok(result) => result,
                     Err(error) => panic!("{error:?}"),
                 };
-                //dbg!(_visualization);
+
+                let bin_member_name = &self.table_structure.data_path;
+                let mut bin_member = archive.by_name(bin_member_name)?;
+                let mut bin_data = Vec::with_capacity(bin_member.size() as usize);
+                bin_member.read_to_end(&mut bin_data)?;
+                let mut cursor = Cursor::new(bin_data);
+                let _legacy_bin = LegacyBin::read(&mut cursor).map_err(|e| {
+                    e.with_message(format!(
+                        "While parsing {bin_member_name:?} as legacy binary SPV member"
+                    ))
+                })?;
+                //dbg!(&_legacy_bin);
+
                 Ok(PivotTable::new([]).into_item())
             }
         }
index bcaabdd4043d765b0abe68870c1ad2f49c9452ba..c306a09cabef1b47f3fef466971839c41f45de71 100644 (file)
@@ -1,6 +1,8 @@
 use std::io::{Read, Seek, SeekFrom};
 
-use binrw::{BinRead, binread};
+use binrw::{BinRead, BinResult, binread};
+
+use crate::output::spv::light::{U32String, parse_vec};
 
 #[binread]
 #[br(little)]
@@ -13,8 +15,10 @@ pub struct LegacyBin {
     member_size: u32,
     #[br(count(n_sources), args { inner: (version,) })]
     metadata: Vec<Metadata>,
-    #[br(count(n_sources), args { inner: metadata.as_slice() })]
+    #[br(parse_with(parse_data), args(metadata.as_slice()))]
     data: Vec<Data>,
+    #[br(parse_with(parse_strings))]
+    strings: Option<Strings>,
 }
 
 #[binread]
@@ -45,6 +49,24 @@ struct Data {
     variables: Vec<Variable>,
 }
 
+#[binrw::parser(reader, endian)]
+fn parse_data(metadata: &[Metadata]) -> BinResult<Vec<Data>> {
+    let mut data = Vec::with_capacity(metadata.len());
+    for metadata in metadata {
+        reader.seek(SeekFrom::Start(metadata.data_offset as u64))?;
+        let mut variables = Vec::with_capacity(metadata.n_variables as usize);
+        for _ in 0..metadata.n_variables {
+            variables.push(Variable::read_options(
+                reader,
+                endian,
+                (metadata.n_values,),
+            )?);
+        }
+        data.push(Data { variables });
+    }
+    Ok(data)
+}
+
 impl BinRead for Data {
     type Args<'a> = &'a [Metadata];
 
@@ -74,3 +96,59 @@ struct Variable {
     #[br(count(n_values))]
     values: Vec<f64>,
 }
+
+#[binrw::parser(reader, endian)]
+fn parse_strings() -> BinResult<Option<Strings>> {
+    let position = reader.stream_position()?;
+    let length = reader.seek(SeekFrom::End(0))?;
+    if position != length {
+        reader.seek(SeekFrom::Start(position))?;
+        Ok(Some(Strings::read_options(reader, endian, ())?))
+    } else {
+        Ok(None)
+    }
+}
+
+#[binread]
+#[br(little)]
+#[derive(Debug)]
+struct Strings {
+    #[br(parse_with(parse_vec))]
+    source_maps: Vec<SourceMap>,
+    #[br(parse_with(parse_vec))]
+    labels: Vec<Label>,
+}
+
+#[binread]
+#[br(little)]
+#[derive(Debug)]
+struct SourceMap {
+    source_name: U32String,
+    #[br(parse_with(parse_vec))]
+    variable_maps: Vec<VariableMap>,
+}
+
+#[binread]
+#[br(little)]
+#[derive(Debug)]
+struct VariableMap {
+    variable_name: U32String,
+    #[br(parse_with(parse_vec))]
+    datum_maps: Vec<DatumMap>,
+}
+
+#[binread]
+#[br(little)]
+#[derive(Debug)]
+struct DatumMap {
+    value_idx: u32,
+    label_idx: u32,
+}
+
+#[binread]
+#[br(little)]
+#[derive(Debug)]
+struct Label {
+    frequency: u32,
+    label: U32String,
+}
index 1bea946d022f62274eb26bec3cbdb41aad03cdf2..fa5a119022fd64d40daca491f30bb48e85e9ba38 100644 (file)
@@ -300,7 +300,7 @@ where
 }
 
 #[binrw::parser(reader, endian)]
-fn parse_vec<T, A>(inner: A, ...) -> BinResult<Vec<T>>
+pub(super) fn parse_vec<T, A>(inner: A, ...) -> BinResult<Vec<T>>
 where
     for<'a> T: BinRead<Args<'a> = A>,
     A: Clone,
@@ -637,13 +637,13 @@ impl Sizing {
 
 #[binread]
 #[derive(Default)]
-struct U32String {
+pub(super) struct U32String {
     #[br(parse_with(parse_vec))]
     string: Vec<u8>,
 }
 
 impl U32String {
-    fn decode(&self, encoding: &'static Encoding) -> String {
+    pub(super) fn decode(&self, encoding: &'static Encoding) -> String {
         if let Ok(string) = str::from_utf8(&self.string) {
             string.into()
         } else {
@@ -653,7 +653,7 @@ impl U32String {
                 .into_owned()
         }
     }
-    fn decode_optional(&self, encoding: &'static Encoding) -> Option<String> {
+    pub(super) fn decode_optional(&self, encoding: &'static Encoding) -> Option<String> {
         let string = self.decode(encoding);
         if !string.is_empty() {
             Some(string)