From: Ben Pfaff Date: Sat, 18 Oct 2025 14:41:16 +0000 (-0700) Subject: all files in submissions/spv parse X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9defaa299ad0a546feb8fb84bc64541e4709d7f7;p=pspp all files in submissions/spv parse --- diff --git a/rust/doc/src/spv/legacy-detail-binary.md b/rust/doc/src/spv/legacy-detail-binary.md index e5edf4227a..b8812b4f22 100644 --- a/rust/doc/src/spv/legacy-detail-binary.md +++ b/rust/doc/src/spv/legacy-detail-binary.md @@ -31,13 +31,14 @@ of the other data in the member. Versions 0xaf and 0xb0 are known. We will refer to "version 0xaf" and "version 0xb0" members later on. A legacy member consists of `n-sources` data sources, each of which -has Metadata and Data. +has `Metadata` and `Data`. `member-size` is the size of the legacy binary member, in bytes. -The Data and Strings above are commented out because the Metadata has -some oddities that mean that the Data sometimes seems to start at an -unexpected place. The following section goes into detail. +The `Data` and `Strings` above are commented out because the +`Metadata` has some oddities that mean that the `Data` sometimes seems +to start at an unexpected place. The following section goes into +detail. @@ -57,21 +58,21 @@ values. 0-bytes. The names that appear in the corpus are very generic: usually `tableData` for pivot table data or `source0` for chart data. -A given Metadata's `data-offset` is the offset, in bytes, from the -beginning of the member to the start of the corresponding Data. This -allows programs to skip to the beginning of the data for a particular -source. In every case in the corpus, the Data follow the Metadata in -the same order, but it is important to use `data-offset` instead of -reading sequentially through the file because of the exception described -below. - -One SPV file in the corpus has legacy binary members with version -0xb0 but a 28-byte `source-name` field (and only a single source). In -practice, this means that the 64-byte `source-name` used in version 0xb0 -has a lot of 0-bytes in the middle followed by the `variable-name` of -the following Data. As long as a reader treats the first 0-byte in the -`source-name` as terminating the string, it can properly interpret these -members. +A given `Metadata`'s `data-offset` is the offset, in bytes, from the +beginning of the member to the start of the corresponding `Data`. +This allows programs to skip to the beginning of the data for a +particular source. In every case in the corpus, the `Data` follow the +`Metadata` in the same order, but it is important to use `data-offset` +instead of reading sequentially through the file because of the +exception described below. + +One SPV file in the corpus has legacy binary members with version 0xb0 +but a 28-byte `source-name` field (and only a single source). In +practice, this means that the 64-byte `source-name` used in version +0xb0 has a lot of 0-bytes in the middle followed by the +`variable-name` of the following `Data`. As long as a reader treats +the first 0-byte in the `source-name` as terminating the string, it +can properly interpret these members. The meaning of `x` in version 0xb0 is unknown. @@ -82,14 +83,14 @@ Data => Variable*[n-variables] Variable => byte*288[variable-name] double*[n-values] ``` -Data follow the `Metadata` in the legacy binary format, with sources +`Data` follow the `Metadata` in the legacy binary format, with sources in the same order (but readers should use the `data-offset` in -`Metadata` records, rather than reading sequentially). Each Variable -begins with a `variable-name` that generally indicates its role in the -pivot table, e.g. "cell", "cellFormat", "dimension0categories", -"dimension0group0", followed by the numeric data, one double per -datum. A double with the maximum negative double `-DBL_MAX` -represents the system-missing value `SYSMIS`. +`Metadata` records, rather than reading sequentially). Each +`Variable` begins with a `variable-name` that generally indicates its +role in the pivot table, e.g. `cell`, `cellFormat`, +`dimension0categories`, `dimension0group0`, followed by the numeric +data, one double per datum. A double with the maximum negative double +`-DBL_MAX` represents the system-missing value `SYSMIS`. ## String Data @@ -108,10 +109,10 @@ Label => int32[frequency] string[label] Each variable may include a mix of numeric and string data values. If a legacy binary member contains any string data, `Strings` is present; -otherwise, it ends just after the last Data element. +otherwise, it ends just after the last `Data` element. The string data overlays the numeric data. When a variable includes -any string data, its Variable represents the string values with a +any string data, its `Variable` represents the string values with a `SYSMIS` or NaN placeholder. (Not all such values need be placeholders.) diff --git a/rust/doc/src/spv/legacy-detail-xml.md b/rust/doc/src/spv/legacy-detail-xml.md index 8d5edf3f87..a8871120f0 100644 --- a/rust/doc/src/spv/legacy-detail-xml.md +++ b/rust/doc/src/spv/legacy-detail-xml.md @@ -568,8 +568,7 @@ Each `layer` element represents a dimension, e.g.: ## The `facetLayout` Element ``` -facetLayout => tableLayout setCellProperties[scp1]* - facetLevel+ setCellProperties[scp2]* +facetLayout => tableLayout (setCellProperties | facetLevel)+ tableLayout :verticalTitlesInCorner=bool diff --git a/rust/pspp/src/output/spv.rs b/rust/pspp/src/output/spv.rs index a20ca1eabc..1e9d6d377c 100644 --- a/rust/pspp/src/output/spv.rs +++ b/rust/pspp/src/output/spv.rs @@ -32,14 +32,14 @@ use crate::output::{ page::PageSetup, pivot::{PivotTable, TableProperties, Value}, spv::{ - legacy::Visualization, + legacy_xml::Visualization, light::{LightError, LightTable}, }, }; mod css; pub mod html; -mod legacy; +mod legacy_xml; mod light; #[derive(Debug, Display, thiserror::Error)] @@ -428,7 +428,7 @@ impl Table { Ok(result) => result, Err(error) => panic!("{error:?}"), }; - dbg!(_visualization); + //dbg!(_visualization); Ok(PivotTable::new([]).into_item()) } } diff --git a/rust/pspp/src/output/spv/legacy.rs b/rust/pspp/src/output/spv/legacy.rs deleted file mode 100644 index f6fab20c67..0000000000 --- a/rust/pspp/src/output/spv/legacy.rs +++ /dev/null @@ -1,1153 +0,0 @@ -// PSPP - a program for statistical analysis. -// Copyright (C) 2025 Free Software Foundation, Inc. -// -// This program is free software: you can redistribute it and/or modify it under -// the terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -// details. -// -// You should have received a copy of the GNU General Public License along with -// this program. If not, see . - -use std::marker::PhantomData; - -use serde::Deserialize; - -use crate::output::pivot::Color; - -#[derive(Debug)] -struct Ref { - references: String, - _phantom: PhantomData, -} - -impl<'de, T> Deserialize<'de> for Ref { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - Ok(Self { - references: String::deserialize(deserializer)?, - _phantom: PhantomData, - }) - } -} - -#[derive(Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct Visualization { - /// In format `YYYY-MM-DD`. - #[serde(rename = "@date")] - date: String, - // Locale used for output, e.g. `en-US`. - #[serde(rename = "@lang")] - lang: String, - /// Localized title of the pivot table. - #[serde(rename = "@name")] - name: String, - /// Base style for the pivot table. - #[serde(rename = "@style")] - style: Ref