From 5903ed89c4c4bcd6a7e4ccb7ed975e8a27b65dee Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 19 Jul 2025 14:04:10 -0700 Subject: [PATCH] work on writer --- rust/pspp/src/main.rs | 4 +- rust/pspp/src/sys/cooked.rs | 6 +-- rust/pspp/src/sys/mod.rs | 3 +- rust/pspp/src/sys/test.rs | 6 +-- rust/pspp/src/sys/write.rs | 97 ++++++++++++++++++++++++++++++++++--- 5 files changed, 100 insertions(+), 16 deletions(-) diff --git a/rust/pspp/src/main.rs b/rust/pspp/src/main.rs index 458dd01839..9cb88419c3 100644 --- a/rust/pspp/src/main.rs +++ b/rust/pspp/src/main.rs @@ -22,7 +22,7 @@ use pspp::{ sys::{ self, raw::{infer_encoding, records::Compression, Decoder, Magic, Reader, Record}, - ReaderOptions, Records, + ReadOptions, Records, }, }; use std::{ @@ -126,7 +126,7 @@ impl Convert { eprintln!("warning: {warning}"); } - let (dictionary, _, cases) = ReaderOptions::new() + let (dictionary, _, cases) = ReadOptions::new() .with_encoding(self.encoding) .with_password(self.password.clone()) .open_file(&self.input, warn)? diff --git a/rust/pspp/src/sys/cooked.rs b/rust/pspp/src/sys/cooked.rs index 802cd87d3d..11a6c59a9e 100644 --- a/rust/pspp/src/sys/cooked.rs +++ b/rust/pspp/src/sys/cooked.rs @@ -478,7 +478,7 @@ pub enum Error { /// Options for reading a system file. #[derive(Default, Clone, Debug)] -pub struct ReaderOptions { +pub struct ReadOptions { /// Character encoding for text in the system file. /// /// If not set, the character encoding will be determined from reading the @@ -494,8 +494,8 @@ pub struct ReaderOptions { pub password: Option, } -impl ReaderOptions { - /// Construct a new `ReaderOptions` that initially does not specify an +impl ReadOptions { + /// Construct a new `ReadOptions` that initially does not specify an /// encoding or password. pub fn new() -> Self { Self::default() diff --git a/rust/pspp/src/sys/mod.rs b/rust/pspp/src/sys/mod.rs index c3549ed358..911cc0f62f 100644 --- a/rust/pspp/src/sys/mod.rs +++ b/rust/pspp/src/sys/mod.rs @@ -22,7 +22,8 @@ //! facilitate interchange between even the oldest and newest versions of //! software. //! -//! To read a system file in the simplest way, use [ReaderOptions]. +//! Use [ReadOptions] to read a system file in the simplest way. +//! Use [WriteOptions] to write a system file. // Warn about missing docs, but not for items declared with `#[cfg(test)]`. #![cfg_attr(not(test), warn(missing_docs))] diff --git a/rust/pspp/src/sys/test.rs b/rust/pspp/src/sys/test.rs index 48e95a0f6b..a5fd40b92b 100644 --- a/rust/pspp/src/sys/test.rs +++ b/rust/pspp/src/sys/test.rs @@ -29,7 +29,7 @@ use crate::{ Details, Item, Text, }, sys::{ - cooked::ReaderOptions, + cooked::ReadOptions, raw::{self, ErrorDetails}, sack::sack, }, @@ -553,7 +553,7 @@ fn encrypted_file() { #[test] fn encrypted_file_without_password() { - let error = ReaderOptions::new() + let error = ReadOptions::new() .open_file("src/crypto/testdata/test-encrypted.sav", |_| { panic!(); }) @@ -615,7 +615,7 @@ where R: Read + Seek + 'static, { let mut warnings = Vec::new(); - let output = match ReaderOptions::new().open_reader(sysfile, |warning| warnings.push(warning)) { + let output = match ReadOptions::new().open_reader(sysfile, |warning| warnings.push(warning)) { Ok(system_file) => { let (dictionary, metadata, cases) = system_file.into_parts(); let (group, data) = metadata.to_pivot_rows(); diff --git a/rust/pspp/src/sys/write.rs b/rust/pspp/src/sys/write.rs index 53391f670b..2ba55c2926 100644 --- a/rust/pspp/src/sys/write.rs +++ b/rust/pspp/src/sys/write.rs @@ -1,11 +1,11 @@ -#![allow(dead_code, missing_docs)] +#![allow(dead_code)] use core::f64; use std::{ borrow::Cow, collections::HashMap, fmt::Write as _, fs::File, - io::{Cursor, Seek, Write}, + io::{BufWriter, Cursor, Seek, Write}, path::Path, }; @@ -48,6 +48,7 @@ pub enum Version { V3, } +/// Options for writing a system file. #[derive(Copy, Clone, Debug)] pub struct WriteOptions { /// How to compress (if at all) data in the system file. @@ -67,25 +68,36 @@ impl Default for WriteOptions { } impl WriteOptions { + /// Constructs a new set of default options. pub fn new() -> Self { Self::default() } + + /// Returns `self` with the compression format set to `compression`. pub fn with_compression(self, compression: Option) -> Self { Self { compression, ..self } } + + /// Returns `self` with the version set to `version`. pub fn with_version(self, version: Version) -> Self { Self { version, ..self } } + + /// Writes `dictionary` to `path` in system file format. Returns a [Writer] + /// that can be used for writing cases to the new file. pub fn write_file( self, dictionary: &Dictionary, path: impl AsRef, - ) -> Result, BinError> { - self.write_writer(dictionary, File::create(path)?) + ) -> Result>, BinError> { + self.write_writer(dictionary, BufWriter::new(File::create(path)?)) } + + /// Writes `dictionary` to `writer` in system file format. Returns a + /// [Writer] that can be used for writing cases to the new file. pub fn write_writer( self, dictionary: &Dictionary, @@ -123,6 +135,8 @@ fn put_attributes(attributes: &Attributes, s: &mut String) { } } +const BIAS: f64 = 100.0; + impl<'a, W> DictionaryWriter<'a, W> where W: Write + Seek, @@ -146,6 +160,7 @@ where self.write_header()?; self.write_variables()?; self.write_value_labels()?; + self.write_documents()?; self.write_integer_record()?; self.write_float_record()?; self.write_var_sets()?; @@ -196,7 +211,7 @@ where 0 }, n_cases: u32::MAX, - bias: 100.0, + bias: BIAS, creation_date: as_byte_array(now.format("%d %b %y").to_string()), creation_time: as_byte_array(now.format("%H:%M:%S").to_string()), file_label: as_byte_array(self.dictionary.file_label.clone().unwrap_or_default()), @@ -345,7 +360,7 @@ where Ok(()) } - pub fn write_documents(&mut self) -> Result<(), BinError> { + fn write_documents(&mut self) -> Result<(), BinError> { if !self.dictionary.documents.is_empty() { (6u32, self.dictionary.documents.len() as u32).write_le(self.writer)?; for line in &self.dictionary.documents { @@ -787,6 +802,8 @@ impl CaseVar { pub struct Writer { compression: Option, case_vars: Vec, + opcodes: Vec, + data: Vec, inner: W, } @@ -795,6 +812,8 @@ impl Writer { Self { compression: options.compression, case_vars, + opcodes: Vec::with_capacity(8), + data: Vec::with_capacity(64), inner, } } @@ -804,9 +823,11 @@ impl Writer where W: Write + Seek, { + /// Writes `case` to the system file. pub fn write_case(&mut self, case: &Case) -> Result<(), BinError> { match self.compression { - Some(_) => todo!(), + Some(Compression::Simple) => self.write_case_compressed(case), + Some(Compression::ZLib) => todo!(), None => self.write_case_uncompressed(case), } } @@ -830,4 +851,66 @@ where } Ok(()) } + fn flush_compressed( + opcodes: &mut Vec, + data: &mut Vec, + inner: &mut W, + ) -> Result<(), BinError> { + if !opcodes.is_empty() { + opcodes.resize(8, 0); + inner.write_all(opcodes)?; + inner.write(data)?; + opcodes.clear(); + data.clear(); + } + Ok(()) + } + fn put_opcode( + opcodes: &mut Vec, + data: &mut Vec, + inner: &mut W, + opcode: u8, + ) -> Result<(), BinError> { + if opcodes.len() >= 8 { + Self::flush_compressed(opcodes, data, inner)?; + } + opcodes.push(opcode); + Ok(()) + } + fn write_case_compressed(&mut self, case: &Case) -> Result<(), BinError> { + for (var, datum) in zip_eq(&self.case_vars, &case.0) { + match var { + CaseVar::Numeric => match datum.as_number().unwrap() { + None => { + Self::put_opcode(&mut self.opcodes, &mut self.data, &mut self.inner, 255)? + } + Some(number) => { + if number >= 1.0 - BIAS + && number <= 251.0 - BIAS + && number == number.trunc() + { + Self::put_opcode( + &mut self.opcodes, + &mut self.data, + &mut self.inner, + (number + BIAS) as u8, + )? + } else { + Self::put_opcode( + &mut self.opcodes, + &mut self.data, + &mut self.inner, + 253, + )?; + + number.write_le(&mut Cursor::new(&mut self.data)).unwrap(); + } + } + }, + + CaseVar::String { width: _, encoding } => todo!(), + } + } + Ok(()) + } } -- 2.30.2