work
authorBen Pfaff <blp@cs.stanford.edu>
Wed, 15 Oct 2025 21:51:19 +0000 (14:51 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Wed, 15 Oct 2025 21:51:19 +0000 (14:51 -0700)
rust/doc/src/spv/structure.md
rust/pspp/src/output.rs
rust/pspp/src/output/spv.rs
rust/pspp/src/output/spv/light.rs

index c1299b6cef137c7ab29e9c64e229998e37a52a46..1241c61ce770e52949dd69cc72397ced4cc9fc4c 100644 (file)
@@ -306,7 +306,7 @@ anyway.  The user cannot edit it.
 ```
 container
    :visibility=(visible | hidden)
-   :page-break-before=(always)?
+   :page-break-before=(always | auto | avoid | left | right | inherit)?
    :text-align=(left | center)?
    :width=dimension
 => label (table | container_text | graph | model | object | image | tree)
@@ -321,6 +321,11 @@ This element has the following attributes.
   Whether the container's content is displayed.  "Notes" tables are
   often hidden; other data is usually visible.
 
+* `page-break-before`  
+  Whether to start the element at the beginning of a new page.  This
+  attribute is usually not present.  The only value seen in the corpus
+  is `always`.
+
 * `text-align`  
   Alignment of text within the container.  Observed with nested
   `table` and `text` elements.
@@ -431,7 +436,7 @@ table
    :orphanTolerance=int?
    :rowBreakNumber=int?
    :subType
-   :tableId
+   :tableId?
    :tableLookId?
    :type[table_type]=(table | note | warning)
 => tableProperties? tableStructure
@@ -457,6 +462,9 @@ This element has the following attributes.
 * `tableId`  
   A number that uniquely identifies the table within the SPV file,
   typically a large negative number such as `-4147135649387905023`.
+  It is usually present.  For light binary members, this is the same
+  as `table-id` in the [light detail member
+  header](light-detail.md#header).
 
 * `creator-version`  
   As on the `heading` element.  In the corpus, this is only present
index b275f797ced9b8ff4c1b46c16619c6fbcc52b677..9c14affa47cb81df9dad3919eecea5414d76d265 100644 (file)
@@ -534,6 +534,13 @@ impl SpvInfo {
         }
     }
 
+    pub fn with_error(self) -> Self {
+        Self {
+            error: true,
+            ..self
+        }
+    }
+
     pub fn member_names(&self) -> Vec<&str> {
         let mut member_names = vec![self.structure_member.as_str()];
         if let Some(members) = &self.members {
index 3ab1d0fdb674a8bed0182d20e815c646a6047747..d1cdc1a695d61622f8b37e22b2c9812eea177621 100644 (file)
@@ -20,6 +20,7 @@ use std::{
     path::Path,
 };
 
+use anyhow::Context;
 use binrw::{BinRead, error::ContextExt};
 use displaydoc::Display;
 use serde::Deserialize;
@@ -28,7 +29,7 @@ use zip::{ZipArchive, result::ZipError};
 use crate::output::{
     Details, Item, SpvInfo, SpvMembers, Text,
     page::PageSetup,
-    pivot::{TableProperties, Value},
+    pivot::{PivotTable, TableProperties, Value},
     spv::light::{LightError, LightTable},
 };
 
@@ -106,6 +107,10 @@ impl Item {
     }
 }
 
+fn new_error_item(message: impl Into<Value>) -> Item {
+    Text::new_log(message).into_item().with_label("Error")
+}
+
 fn read_heading<R>(
     archive: &mut ZipArchive<R>,
     file_number: usize,
@@ -117,9 +122,11 @@ where
     let member = BufReader::new(archive.by_index(file_number)?);
     let mut heading: Heading = match serde_path_to_error::deserialize(
         &mut quick_xml::de::Deserializer::from_reader(member),
-    ) {
+    )
+    .with_context(|| format!("Failed to parse {structure_member}"))
+    {
         Ok(result) => result,
-        Err(error) => panic!("{error}"),
+        Err(error) => panic!("{error:?}"),
     };
     let page_setup = heading.page_setup.take();
     Ok((heading.decode(archive, structure_member)?, page_setup))
@@ -153,40 +160,33 @@ impl Heading {
         for child in self.children {
             match child {
                 HeadingContent::Container(container) => {
-                    if container.page_break_before {
+                    if container.page_break_before == PageBreakBefore::Always {
                         items.push(
                             Details::PageBreak
                                 .into_item()
                                 .with_spv_info(SpvInfo::new(structure_member)),
                         );
                     }
-                    match container.content {
+                    let item = match container.content {
                         ContainerContent::Table(table) => {
-                            items.push(
-                                table.decode(archive, structure_member).unwrap(), /* XXX*/
-                            );
-                        }
-                        ContainerContent::Graph(graph) => {
-                            items.push(graph.decode(structure_member));
+                            table.decode(archive, structure_member).unwrap() /* XXX*/
                         }
-                        ContainerContent::Text(container_text) => {
-                            items.push(
-                                Text::new(
-                                    match container_text.text_type {
-                                        TextType::Title => crate::output::TextType::Title,
-                                        TextType::Log | TextType::Text => {
-                                            crate::output::TextType::Log
-                                        }
-                                        TextType::PageTitle => crate::output::TextType::PageTitle,
-                                    },
-                                    container_text.decode(),
-                                )
-                                .into_item()
-                                .with_command_name(container_text.command_name)
-                                .with_spv_info(SpvInfo::new(structure_member)),
-                            );
-                        }
-                    }
+                        ContainerContent::Graph(graph) => graph.decode(structure_member),
+                        ContainerContent::Text(container_text) => Text::new(
+                            match container_text.text_type {
+                                TextType::Title => crate::output::TextType::Title,
+                                TextType::Log | TextType::Text => crate::output::TextType::Log,
+                                TextType::PageTitle => crate::output::TextType::PageTitle,
+                            },
+                            container_text.decode(),
+                        )
+                        .into_item()
+                        .with_command_name(container_text.command_name)
+                        .with_spv_info(SpvInfo::new(structure_member)),
+                        ContainerContent::Tree => new_error_item("trees not yet implemented")
+                            .with_spv_info(SpvInfo::new(structure_member).with_error()),
+                    };
+                    items.push(item);
                 }
                 HeadingContent::Heading(mut heading) => {
                     let show = !heading.visibility.is_some();
@@ -230,7 +230,7 @@ struct Container {
     visibility: Visibility,
     #[serde(rename = "@page-break-before")]
     #[serde(default)]
-    page_break_before: bool,
+    page_break_before: PageBreakBefore,
     #[serde(rename = "@text-align")]
     text_align: Option<TextAlign>,
     #[serde(rename = "@width")]
@@ -241,6 +241,18 @@ struct Container {
     content: ContainerContent,
 }
 
+#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Deserialize)]
+#[serde(rename_all = "camelCase")]
+enum PageBreakBefore {
+    #[default]
+    Auto,
+    Always,
+    Avoid,
+    Left,
+    Right,
+    Inherit,
+}
+
 #[derive(Deserialize, Debug)]
 #[serde(rename_all = "camelCase")]
 enum Visibility {
@@ -262,12 +274,17 @@ enum ContainerContent {
     Table(Table),
     Text(ContainerText),
     Graph(Graph),
-    /*    Model(Model),
+    /*
+    Model(Model),
     Object(Object),
-    Image(Image),
-    Tree(Tree),*/
+    Image(Image),*/
+    Tree,
 }
 
+#[derive(Deserialize, Debug)]
+#[serde(rename_all = "camelCase")]
+struct Tree;
+
 #[derive(Deserialize, Debug)]
 #[serde(rename_all = "camelCase")]
 struct Graph {
@@ -300,7 +317,7 @@ struct Table {
     #[serde(rename = "@subType")]
     sub_type: String,
     #[serde(rename = "@tableId")]
-    table_id: i64,
+    table_id: Option<i64>,
     #[serde(rename = "@type")]
     table_type: TableType,
     properties: Option<TableProperties>,
@@ -329,7 +346,7 @@ impl Table {
                     .with_members(SpvMembers::Light(self.table_structure.data_path.clone())),
             ))
         } else {
-            todo!()
+            Ok(PivotTable::new([]).into_item())
         }
     }
 }
index fdc2864aa507d7bf5a59ab507901d3243da5f5aa..4f36e639c0a770e6b446540ada8fed8bc5fd849f 100644 (file)
@@ -1,5 +1,4 @@
 use std::{
-    any::type_name,
     fmt::Debug,
     io::{Cursor, Read, Seek},
     ops::Deref,
@@ -51,7 +50,6 @@ pub enum LightError {
 #[br(little)]
 #[derive(Debug)]
 pub struct LightTable {
-    #[br(dbg)]
     header: Header,
     #[br(args(header.version))]
     titles: Titles,
@@ -63,7 +61,7 @@ pub struct LightTable {
     borders: Borders,
     #[br(parse_with(parse_counted))]
     print_settings: PrintSettings,
-    #[br(dbg, if(header.version == Version::V3), parse_with(parse_counted))]
+    #[br(if(header.version == Version::V3), parse_with(parse_counted))]
     table_settings: TableSettings,
     #[br(if(header.version == Version::V1), temp)]
     _ts: Option<Counted<Sponge>>,
@@ -774,7 +772,6 @@ where
         let start = reader.stream_position()?;
         let result = <T>::read_options(reader, endian, args).ok();
         if result.is_none() {
-            dbg!((start, type_name::<T>()));
             reader.seek(std::io::SeekFrom::Start(start))?;
         }
         Ok(Self(result))
@@ -796,9 +793,7 @@ struct Formats {
     _x8: bool,
     #[br(temp, parse_with(parse_bool))]
     _x9: bool,
-    #[br(dbg)]
     y0: Y0,
-    #[br(dbg)]
     custom_currency: CustomCurrency,
     #[br(if(version == Version::V1))]
     v1: Optional<Counted<X0>>,
@@ -851,7 +846,6 @@ impl Formats {
 #[br(little)]
 #[derive(Debug)]
 struct FormatsV3 {
-    #[br(dbg)]
     #[br(parse_with(parse_counted))]
     x1_x2: X1X2,
     #[br(parse_with(parse_counted))]
@@ -873,7 +867,6 @@ struct X1X2 {
 struct X0 {
     #[br(temp)]
     _bytes: [u8; 14],
-    #[br(dbg)]
     y1: Y1,
     y2: Y2,
 }
@@ -902,7 +895,6 @@ struct Y1 {
 #[br(little)]
 #[derive(Debug)]
 struct Y2 {
-    #[br(dbg)]
     custom_currency: CustomCurrency,
     missing: u8,
     #[br(temp, parse_with(parse_bool))]
@@ -966,15 +958,13 @@ struct X2 {
 #[br(little)]
 #[derive(Debug)]
 struct X3 {
-    #[br(dbg, temp, magic = b"\x01\0")]
+    #[br(temp, magic = b"\x01\0")]
     _x21: u8,
-    #[br(dbg, magic = b"\0\0\0")]
+    #[br(magic = b"\0\0\0")]
     y1: Y1,
-    #[br(dbg)]
     small: f64,
     #[br(magic = 1u8, temp)]
     _one: (),
-    #[br(dbg)]
     inner: Optional<X3Inner>,
     y2: Y2,
     #[br(temp)]
@@ -1132,9 +1122,8 @@ struct ValueFixedText {
 #[br(little, import(version: Version))]
 #[derive(Debug)]
 struct ValueTemplate {
-    #[br(dbg, parse_with(parse_explicit_optional), args(version))]
+    #[br(parse_with(parse_explicit_optional), args(version))]
     mods: Option<ValueMods>,
-    #[br(dbg)]
     template: U32String,
     #[br(parse_with(parse_vec), args(version))]
     args: Vec<Argument>,
@@ -1189,12 +1178,6 @@ impl BinRead for Value {
             }
             _ => Err(BinError::NoVariantMatch { pos: start }),
         }
-        .inspect(|result| {
-            println!(
-                "{start:#x}..{:#x}: {result:?}",
-                reader.stream_position().unwrap()
-            )
-        })
         .map_err(|e| e.with_message(format!("while parsing Value starting at offset {start:#x}")))
     }
 }
@@ -1329,7 +1312,6 @@ impl BinRead for Argument {
         (version,): (Version,),
     ) -> BinResult<Self> {
         let count = u32::read_options(reader, endian, ())? as usize;
-        dbg!(count);
         if count == 0 {
             Ok(Self(vec![Value::read_options(reader, endian, (version,))?]))
         } else {
@@ -1365,9 +1347,9 @@ impl Argument {
 #[br(little, import(version: Version))]
 #[derive(Debug)]
 struct ValueMods {
-    #[br(dbg, parse_with(parse_vec))]
+    #[br(parse_with(parse_vec))]
     refs: Vec<i16>,
-    #[br(dbg, parse_with(parse_vec))]
+    #[br(parse_with(parse_vec))]
     subscripts: Vec<U32String>,
     #[br(if(version == Version::V1))]
     v1: Option<Sponge>,
@@ -1665,7 +1647,6 @@ impl Axes {
 #[br(little, import(version: Version))]
 #[derive(Debug)]
 struct Cell {
-    #[br(dbg)]
     index: u64,
     #[br(if(version == Version::V1), temp)]
     _zero: Optional<Zero>,