From 9defaa299ad0a546feb8fb84bc64541e4709d7f7 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 18 Oct 2025 07:41:16 -0700 Subject: [PATCH] all files in submissions/spv parse --- rust/doc/src/spv/legacy-detail-binary.md | 57 ++++++++++--------- rust/doc/src/spv/legacy-detail-xml.md | 3 +- rust/pspp/src/output/spv.rs | 6 +- .../output/spv/{legacy.rs => legacy_xml.rs} | 50 +++++++++------- 4 files changed, 64 insertions(+), 52 deletions(-) rename rust/pspp/src/output/spv/{legacy.rs => legacy_xml.rs} (96%) diff --git a/rust/doc/src/spv/legacy-detail-binary.md b/rust/doc/src/spv/legacy-detail-binary.md index e5edf4227a..b8812b4f22 100644 --- a/rust/doc/src/spv/legacy-detail-binary.md +++ b/rust/doc/src/spv/legacy-detail-binary.md @@ -31,13 +31,14 @@ of the other data in the member. Versions 0xaf and 0xb0 are known. We will refer to "version 0xaf" and "version 0xb0" members later on. A legacy member consists of `n-sources` data sources, each of which -has Metadata and Data. +has `Metadata` and `Data`. `member-size` is the size of the legacy binary member, in bytes. -The Data and Strings above are commented out because the Metadata has -some oddities that mean that the Data sometimes seems to start at an -unexpected place. The following section goes into detail. +The `Data` and `Strings` above are commented out because the +`Metadata` has some oddities that mean that the `Data` sometimes seems +to start at an unexpected place. The following section goes into +detail. @@ -57,21 +58,21 @@ values. 0-bytes. The names that appear in the corpus are very generic: usually `tableData` for pivot table data or `source0` for chart data. -A given Metadata's `data-offset` is the offset, in bytes, from the -beginning of the member to the start of the corresponding Data. This -allows programs to skip to the beginning of the data for a particular -source. In every case in the corpus, the Data follow the Metadata in -the same order, but it is important to use `data-offset` instead of -reading sequentially through the file because of the exception described -below. - -One SPV file in the corpus has legacy binary members with version -0xb0 but a 28-byte `source-name` field (and only a single source). In -practice, this means that the 64-byte `source-name` used in version 0xb0 -has a lot of 0-bytes in the middle followed by the `variable-name` of -the following Data. As long as a reader treats the first 0-byte in the -`source-name` as terminating the string, it can properly interpret these -members. +A given `Metadata`'s `data-offset` is the offset, in bytes, from the +beginning of the member to the start of the corresponding `Data`. +This allows programs to skip to the beginning of the data for a +particular source. In every case in the corpus, the `Data` follow the +`Metadata` in the same order, but it is important to use `data-offset` +instead of reading sequentially through the file because of the +exception described below. + +One SPV file in the corpus has legacy binary members with version 0xb0 +but a 28-byte `source-name` field (and only a single source). In +practice, this means that the 64-byte `source-name` used in version +0xb0 has a lot of 0-bytes in the middle followed by the +`variable-name` of the following `Data`. As long as a reader treats +the first 0-byte in the `source-name` as terminating the string, it +can properly interpret these members. The meaning of `x` in version 0xb0 is unknown. @@ -82,14 +83,14 @@ Data => Variable*[n-variables] Variable => byte*288[variable-name] double*[n-values] ``` -Data follow the `Metadata` in the legacy binary format, with sources +`Data` follow the `Metadata` in the legacy binary format, with sources in the same order (but readers should use the `data-offset` in -`Metadata` records, rather than reading sequentially). Each Variable -begins with a `variable-name` that generally indicates its role in the -pivot table, e.g. "cell", "cellFormat", "dimension0categories", -"dimension0group0", followed by the numeric data, one double per -datum. A double with the maximum negative double `-DBL_MAX` -represents the system-missing value `SYSMIS`. +`Metadata` records, rather than reading sequentially). Each +`Variable` begins with a `variable-name` that generally indicates its +role in the pivot table, e.g. `cell`, `cellFormat`, +`dimension0categories`, `dimension0group0`, followed by the numeric +data, one double per datum. A double with the maximum negative double +`-DBL_MAX` represents the system-missing value `SYSMIS`. ## String Data @@ -108,10 +109,10 @@ Label => int32[frequency] string[label] Each variable may include a mix of numeric and string data values. If a legacy binary member contains any string data, `Strings` is present; -otherwise, it ends just after the last Data element. +otherwise, it ends just after the last `Data` element. The string data overlays the numeric data. When a variable includes -any string data, its Variable represents the string values with a +any string data, its `Variable` represents the string values with a `SYSMIS` or NaN placeholder. (Not all such values need be placeholders.) diff --git a/rust/doc/src/spv/legacy-detail-xml.md b/rust/doc/src/spv/legacy-detail-xml.md index 8d5edf3f87..a8871120f0 100644 --- a/rust/doc/src/spv/legacy-detail-xml.md +++ b/rust/doc/src/spv/legacy-detail-xml.md @@ -568,8 +568,7 @@ Each `layer` element represents a dimension, e.g.: ## The `facetLayout` Element ``` -facetLayout => tableLayout setCellProperties[scp1]* - facetLevel+ setCellProperties[scp2]* +facetLayout => tableLayout (setCellProperties | facetLevel)+ tableLayout :verticalTitlesInCorner=bool diff --git a/rust/pspp/src/output/spv.rs b/rust/pspp/src/output/spv.rs index a20ca1eabc..1e9d6d377c 100644 --- a/rust/pspp/src/output/spv.rs +++ b/rust/pspp/src/output/spv.rs @@ -32,14 +32,14 @@ use crate::output::{ page::PageSetup, pivot::{PivotTable, TableProperties, Value}, spv::{ - legacy::Visualization, + legacy_xml::Visualization, light::{LightError, LightTable}, }, }; mod css; pub mod html; -mod legacy; +mod legacy_xml; mod light; #[derive(Debug, Display, thiserror::Error)] @@ -428,7 +428,7 @@ impl Table { Ok(result) => result, Err(error) => panic!("{error:?}"), }; - dbg!(_visualization); + //dbg!(_visualization); Ok(PivotTable::new([]).into_item()) } } diff --git a/rust/pspp/src/output/spv/legacy.rs b/rust/pspp/src/output/spv/legacy_xml.rs similarity index 96% rename from rust/pspp/src/output/spv/legacy.rs rename to rust/pspp/src/output/spv/legacy_xml.rs index f6fab20c67..2ea4eeaa83 100644 --- a/rust/pspp/src/output/spv/legacy.rs +++ b/rust/pspp/src/output/spv/legacy_xml.rs @@ -54,18 +54,23 @@ pub struct Visualization { #[serde(rename = "@style")] style: Ref