From f8514777d0be9c26d1f781cc97a38ce90579ea30 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 14 Jul 2025 10:38:23 -0700 Subject: [PATCH] introduce SystemFile --- rust/pspp/src/main.rs | 14 +++++++---- rust/pspp/src/sys/cooked.rs | 48 +++++++++++++++++++++++++++++++++---- rust/pspp/src/sys/raw.rs | 8 ++++--- rust/pspp/src/sys/test.rs | 9 ++++--- 4 files changed, 63 insertions(+), 16 deletions(-) diff --git a/rust/pspp/src/main.rs b/rust/pspp/src/main.rs index f14b44570c..484b538485 100644 --- a/rust/pspp/src/main.rs +++ b/rust/pspp/src/main.rs @@ -18,7 +18,7 @@ use anyhow::{anyhow, Result}; use clap::{Args, Parser, Subcommand, ValueEnum}; use encoding_rs::Encoding; use pspp::crypto::EncryptedFile; -use pspp::sys::cooked::{Error, Headers}; +use pspp::sys::cooked::{Error, Headers, SystemFile}; use pspp::sys::raw::{infer_encoding, Decoder, Magic, Reader, Record, Warning}; use std::fs::File; use std::io::{stdout, BufReader, Write}; @@ -94,8 +94,9 @@ impl Convert { decoded_records.push(header.decode(&mut decoder)?); } let headers = Headers::new(decoded_records, &mut |e| Self::err(e))?; - let (dictionary, _metadata, cases) = - headers.decode(reader.cases(), decoder.encoding, |e| Self::err(e))?; + let SystemFile { + dictionary, cases, .. + } = headers.decode(reader.cases(), decoder.encoding, |e| Self::err(e)); let writer = match self.output { Some(path) => Box::new(File::create(path)?) as Box, None => Box::new(stdout()), @@ -290,8 +291,11 @@ fn dissect( decoded_records.push(header.decode(&mut decoder)?); } let headers = Headers::new(decoded_records, &mut |e| eprintln!("{e}"))?; - let (dictionary, metadata, _cases) = - headers.decode(reader.cases(), encoding, |e| eprintln!("{e}"))?; + let SystemFile { + dictionary, + metadata, + cases: _, + } = headers.decode(reader.cases(), encoding, |e| eprintln!("{e}")); println!("{dictionary:#?}"); println!("{metadata:#?}"); } diff --git a/rust/pspp/src/sys/cooked.rs b/rust/pspp/src/sys/cooked.rs index 972741b35d..ff78c14ea5 100644 --- a/rust/pspp/src/sys/cooked.rs +++ b/rust/pspp/src/sys/cooked.rs @@ -61,7 +61,7 @@ pub enum Error { #[error("Using default encoding {0}.")] UsingDefaultEncoding(String), - #[error("Variable record from offset {:x} to {:x} specifies width {width} not in valid range [-1,255).", offsets.start, offsets.end)] + #[error("Variable record from offset {:#x} to {:#x} specifies width {width} not in valid range [-1,255).", offsets.start, offsets.end)] InvalidVariableWidth { offsets: Range, width: i32 }, #[error("This file has corrupted metadata written by a buggy version of PSPP. To ensure that other software can read it correctly, save a new copy of the file.")] @@ -321,6 +321,18 @@ pub enum Error { WrongNumberOfVarDisplay { expected: usize, actual: usize }, } +/// The content of an SPSS system file. +pub struct SystemFile { + /// The system file dictionary. + pub dictionary: Dictionary, + + /// System file metadata that is not part of the dictionary. + pub metadata: Metadata, + + /// Data in the system file. + pub cases: Cases, +} + #[derive(Clone, Debug)] pub struct Headers { pub header: HeaderRecord, @@ -492,7 +504,7 @@ impl Headers { mut cases: Cases, encoding: &'static Encoding, mut warn: impl FnMut(Error), - ) -> Result<(Dictionary, Metadata, Cases), Error> { + ) -> SystemFile { let mut dictionary = Dictionary::new(encoding); let file_label = fix_line_ends(self.header.file_label.trim_end_matches(' ')); @@ -799,7 +811,7 @@ impl Headers { if n_segments == 1 { warn(Error::ShortVeryLongString { short_name: record.short_name.clone(), - width: record.length + width: record.length, }); continue; } @@ -809,7 +821,7 @@ impl Headers { width: record.length, index, n_segments, - len: dictionary.variables.len() + len: dictionary.variables.len(), }); continue; } @@ -1012,18 +1024,44 @@ impl Headers { if let Some(n_cases) = metadata.n_cases { cases = cases.with_expected_cases(n_cases); } - Ok((dictionary, metadata, cases)) + SystemFile { + dictionary, + metadata, + cases, + } } } +/// System file metadata that is not part of [Dictionary]. +/// +/// [Dictionary]: crate::dictionary::Dictionary #[derive(Clone, Debug, PartialEq, Eq)] pub struct Metadata { + /// Creation date and time. + /// + /// This comes from the file header, not from the file system. pub creation: NaiveDateTime, + + /// Endianness of integers and floating-point numbers in the file. pub endian: Endian, + + /// Compression type (if any). pub compression: Option, + + /// Number of cases in the file, if it says. + /// + /// This is not trustworthy: there can be more or fewer. pub n_cases: Option, + + /// Name of the product that wrote the file. pub product: String, + + /// Extended name of the product that wrote the file. pub product_ext: Option, + + /// Version number of the product that wrote the file. + /// + /// For example, `(1,2,3)` is version 1.2.3. pub version: Option<(i32, i32, i32)>, } diff --git a/rust/pspp/src/sys/raw.rs b/rust/pspp/src/sys/raw.rs index 12d224586b..97fe776d61 100644 --- a/rust/pspp/src/sys/raw.rs +++ b/rust/pspp/src/sys/raw.rs @@ -1398,9 +1398,11 @@ impl CaseVar { /// are divided into multiple, adjacent string variables, approximately one /// variable for each 252 bytes. /// -/// - [Headers::decode](super::cooked::Headers::decode) returns [Cases] in -/// which each [Dictionary](crate::dictionary::Dictionary) variable -/// corresponds to one [Datum], even for long string variables. +/// - In the [Cases] in [SystemFile], each [Dictionary] variable corresponds to +/// one [Datum], even for long string variables. +/// +/// [Dictionary]: crate::dictionary::Dictionary +/// [SystemFile]: crate::sys::cooked::SystemFile pub struct Cases { reader: Box, case_vars: Vec, diff --git a/rust/pspp/src/sys/test.rs b/rust/pspp/src/sys/test.rs index 6f55a23f76..e0d0b90439 100644 --- a/rust/pspp/src/sys/test.rs +++ b/rust/pspp/src/sys/test.rs @@ -29,7 +29,7 @@ use crate::{ Details, Item, Text, }, sys::{ - cooked::Headers, + cooked::{Headers, SystemFile}, raw::{infer_encoding, Decoder, Reader}, sack::sack, }, @@ -616,8 +616,11 @@ where let mut errors = Vec::new(); let headers = Headers::new(decoded_records, &mut |e| errors.push(e)).unwrap(); - let (dictionary, metadata, cases) = - headers.decode(cases, encoding, |e| errors.push(e)).unwrap(); + let SystemFile { + dictionary, + metadata, + cases, + } = headers.decode(cases, encoding, |e| errors.push(e)); let (group, data) = metadata.to_pivot_rows(); let metadata_table = PivotTable::new([(Axis3::Y, Dimension::new(group))]).with_data( data.into_iter() -- 2.30.2