From b0dc341103f18611986c29d98fe87e80baaf774d Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 30 Jul 2025 15:26:05 -0700 Subject: [PATCH] First test for writing a system file. --- rust/pspp/src/crypto/mod.rs | 8 +- rust/pspp/src/format/display/test.rs | 10 +- rust/pspp/src/format/parse.rs | 2 +- rust/pspp/src/sys/mod.rs | 2 +- rust/pspp/src/sys/test.rs | 72 +++++++++-- .../testdata/write-numeric-simple.expected | 35 ++++++ .../write-numeric-uncompressed.expected | 35 ++++++ .../sys/testdata/write-numeric-zlib.expected | 35 ++++++ rust/pspp/src/sys/write.rs | 112 ++++++++++++------ 9 files changed, 250 insertions(+), 61 deletions(-) create mode 100644 rust/pspp/src/sys/testdata/write-numeric-simple.expected create mode 100644 rust/pspp/src/sys/testdata/write-numeric-uncompressed.expected create mode 100644 rust/pspp/src/sys/testdata/write-numeric-zlib.expected diff --git a/rust/pspp/src/crypto/mod.rs b/rust/pspp/src/crypto/mod.rs index 8401ef7b72..8685f938af 100644 --- a/rust/pspp/src/crypto/mod.rs +++ b/rust/pspp/src/crypto/mod.rs @@ -588,9 +588,7 @@ mod test { use crate::crypto::{EncodedPassword, EncryptedFile, FileType}; fn test_decrypt(input_name: &Path, expected_name: &Path, password: &str, file_type: FileType) { - let input_filename = Path::new(env!("CARGO_MANIFEST_DIR")) - .join("src/crypto/testdata") - .join(input_name); + let input_filename = Path::new("src/crypto/testdata").join(input_name); let input = std::fs::read(&input_filename).unwrap(); let mut cursor = Cursor::new(&input); let file = EncryptedFile::new(&mut cursor).unwrap(); @@ -600,9 +598,7 @@ mod test { let mut actual = Vec::new(); std::io::copy(&mut reader, &mut actual).unwrap(); - let expected_filename = Path::new(env!("CARGO_MANIFEST_DIR")) - .join("src/crypto/testdata") - .join(expected_name); + let expected_filename = Path::new("src/crypto/testdata").join(expected_name); let expected = std::fs::read(&expected_filename).unwrap(); if actual != expected { panic!(); diff --git a/rust/pspp/src/format/display/test.rs b/rust/pspp/src/format/display/test.rs index 5ff3f00165..4a4ae6a0a4 100644 --- a/rust/pspp/src/format/display/test.rs +++ b/rust/pspp/src/format/display/test.rs @@ -31,9 +31,7 @@ use crate::{ }; fn test(name: &str) { - let filename = Path::new(env!("CARGO_MANIFEST_DIR")) - .join("src/format/testdata/display") - .join(name); + let filename = Path::new("src/format/testdata/display").join(name); let input = BufReader::new(File::open(&filename).unwrap()); let settings = Settings::default() .with_cc(CC::A, ",,,".parse().unwrap()) @@ -233,9 +231,7 @@ fn non_ascii_cc() { } fn test_binhex(name: &str) { - let filename = Path::new(env!("CARGO_MANIFEST_DIR")) - .join("src/format/testdata/display") - .join(name); + let filename = Path::new("src/format/testdata/display").join(name); let input = BufReader::new(File::open(&filename).unwrap()); let mut value = None; let mut value_name = String::new(); @@ -1278,7 +1274,7 @@ fn ymdhms25_5() { } fn test_times(format: Format, name: &str) { - let directory = Path::new(env!("CARGO_MANIFEST_DIR")).join("src/format/testdata/display"); + let directory = Path::new("src/format/testdata/display"); let input_filename = directory.join("time-input.txt"); let input = BufReader::new(File::open(&input_filename).unwrap()); diff --git a/rust/pspp/src/format/parse.rs b/rust/pspp/src/format/parse.rs index 8b84a4e7e3..eafddcf930 100644 --- a/rust/pspp/src/format/parse.rs +++ b/rust/pspp/src/format/parse.rs @@ -930,7 +930,7 @@ mod test { }; fn test(name: &str, type_: Type) { - let base = Path::new(env!("CARGO_MANIFEST_DIR")).join("src/format/testdata/parse"); + let base = Path::new("src/format/testdata/parse"); let input_stream = BufReader::new(File::open(base.join("num-in.txt")).unwrap()); let expected_stream = BufReader::new(File::open(base.join(name)).unwrap()); for ((input, expected), line_number) in input_stream diff --git a/rust/pspp/src/sys/mod.rs b/rust/pspp/src/sys/mod.rs index cfb64761e5..67217b4925 100644 --- a/rust/pspp/src/sys/mod.rs +++ b/rust/pspp/src/sys/mod.rs @@ -39,7 +39,7 @@ pub mod sack; mod write; use serde::Serializer; -pub use write::{Version, WriteOptions, Writer}; +pub use write::{SysfileVersion, WriteOptions, Writer}; #[cfg(test)] mod test; diff --git a/rust/pspp/src/sys/test.rs b/rust/pspp/src/sys/test.rs index f19924c2d8..7fa06d650b 100644 --- a/rust/pspp/src/sys/test.rs +++ b/rust/pspp/src/sys/test.rs @@ -15,23 +15,32 @@ // this program. If not, see . use std::{ + borrow::Cow, fs::File, io::{BufRead, BufReader, Cursor, Seek}, - path::Path, + path::{Path, PathBuf}, sync::Arc, }; +use chrono::{NaiveDate, NaiveDateTime, NaiveTime}; +use encoding_rs::UTF_8; +use hexplay::HexView; + use crate::{ crypto::EncryptedFile, + data::{BorrowedDatum, Datum}, + dictionary::{self, Dictionary, VarWidth, Variable}, endian::Endian, + identifier::Identifier, output::{ pivot::{test::assert_lines_eq, Axis3, Dimension, Group, PivotTable, Value}, Details, Item, Text, }, sys::{ cooked::ReadOptions, - raw::{self, ErrorDetails}, + raw::{self, records::Compression, ErrorDetails}, sack::sack, + ProductVersion, WriteOptions, }, }; @@ -564,9 +573,58 @@ fn encrypted_file_without_password() { )); } +#[test] +fn write_numeric() { + for (compression, compression_string) in [ + (None, "uncompressed"), + (Some(Compression::Simple), "simple"), + (Some(Compression::ZLib), "zlib"), + ] { + let mut dictionary = Dictionary::new(UTF_8); + for i in 0..4 { + let name = Identifier::new(format!("variable{i}")).unwrap(); + dictionary + .add_var(Variable::new(name, VarWidth::Numeric, UTF_8)) + .unwrap(); + } + let mut cases = WriteOptions::new() + .with_compression(compression) + .with_timestamp(NaiveDateTime::new( + NaiveDate::from_ymd_opt(2025, 7, 30).unwrap(), + NaiveTime::from_hms_opt(15, 7, 55).unwrap(), + )) + .with_product_name(Cow::from("PSPP TEST DATA FILE")) + .with_product_version(ProductVersion(1, 2, 3)) + .write_writer(&dictionary, Cursor::new(Vec::new())) + .unwrap(); + for case in [ + [1, 1, 1, 2], + [1, 1, 2, 30], + [1, 2, 1, 8], + [1, 2, 2, 20], + [2, 1, 1, 2], + [2, 1, 2, 22], + [2, 2, 1, 1], + [2, 2, 2, 3], + ] { + cases + .write_case( + case.into_iter() + .map(|number| BorrowedDatum::Number(Some(number as f64))), + ) + .unwrap(); + } + let sysfile = cases.finish().unwrap().unwrap().into_inner(); + let expected_filename = PathBuf::from(&format!( + "src/sys/testdata/write-numeric-{compression_string}.expected" + )); + let expected = String::from_utf8(std::fs::read(&expected_filename).unwrap()).unwrap(); + test_sysfile(Cursor::new(sysfile), &expected, &expected_filename); + } +} + fn test_raw_sysfile(name: &str) { - let input_filename = Path::new(env!("CARGO_MANIFEST_DIR")) - .join("src/sys/testdata") + let input_filename = Path::new("src/sys/testdata") .join(name) .with_extension("sav"); let sysfile = BufReader::new(File::open(&input_filename).unwrap()); @@ -576,8 +634,7 @@ fn test_raw_sysfile(name: &str) { } fn test_encrypted_sysfile(name: &str, password: &str) { - let input_filename = Path::new(env!("CARGO_MANIFEST_DIR")) - .join("src/sys/testdata") + let input_filename = Path::new("src/sys/testdata") .join(name) .with_extension("sav"); let sysfile = EncryptedFile::new(File::open(&input_filename).unwrap()) @@ -590,8 +647,7 @@ fn test_encrypted_sysfile(name: &str, password: &str) { } fn test_sack_sysfile(name: &str) { - let input_filename = Path::new(env!("CARGO_MANIFEST_DIR")) - .join("src/sys/testdata") + let input_filename = Path::new("src/sys/testdata") .join(name) .with_extension("sack"); let input = String::from_utf8(std::fs::read(&input_filename).unwrap()).unwrap(); diff --git a/rust/pspp/src/sys/testdata/write-numeric-simple.expected b/rust/pspp/src/sys/testdata/write-numeric-simple.expected new file mode 100644 index 0000000000..e4aaffd183 --- /dev/null +++ b/rust/pspp/src/sys/testdata/write-numeric-simple.expected @@ -0,0 +1,35 @@ +╭──────────────────────┬────────────────────╮ +│ Created │30-JUL-2025 15:07:55│ +├──────────────────────┼────────────────────┤ +│Writer Product │PSPP TEST DATA FILE │ +│ Version │1.2.3 │ +├──────────────────────┼────────────────────┤ +│ Compression │SAV │ +│ Number of Cases│ 8│ +╰──────────────────────┴────────────────────╯ + +╭─────────┬─╮ +│Variables│4│ +╰─────────┴─╯ + +╭─────────┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮ +│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│ +├─────────┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤ +│variable0│ 1│ │ │Input│ 8│Right │F8.2 │F8.2 │ │ +│variable1│ 2│ │ │Input│ 8│Right │F8.2 │F8.2 │ │ +│variable2│ 3│ │ │Input│ 8│Right │F8.2 │F8.2 │ │ +│variable3│ 4│ │ │Input│ 8│Right │F8.2 │F8.2 │ │ +╰─────────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯ + +╭────┬─────────┬─────────┬─────────┬─────────╮ +│Case│variable0│variable1│variable2│variable3│ +├────┼─────────┼─────────┼─────────┼─────────┤ +│1 │ 1.00│ 1.00│ 1.00│ 2.00│ +│2 │ 1.00│ 1.00│ 2.00│ 30.00│ +│3 │ 1.00│ 2.00│ 1.00│ 8.00│ +│4 │ 1.00│ 2.00│ 2.00│ 20.00│ +│5 │ 2.00│ 1.00│ 1.00│ 2.00│ +│6 │ 2.00│ 1.00│ 2.00│ 22.00│ +│7 │ 2.00│ 2.00│ 1.00│ 1.00│ +│8 │ 2.00│ 2.00│ 2.00│ 3.00│ +╰────┴─────────┴─────────┴─────────┴─────────╯ diff --git a/rust/pspp/src/sys/testdata/write-numeric-uncompressed.expected b/rust/pspp/src/sys/testdata/write-numeric-uncompressed.expected new file mode 100644 index 0000000000..084ca95c27 --- /dev/null +++ b/rust/pspp/src/sys/testdata/write-numeric-uncompressed.expected @@ -0,0 +1,35 @@ +╭──────────────────────┬────────────────────╮ +│ Created │30-JUL-2025 15:07:55│ +├──────────────────────┼────────────────────┤ +│Writer Product │PSPP TEST DATA FILE │ +│ Version │1.2.3 │ +├──────────────────────┼────────────────────┤ +│ Compression │None │ +│ Number of Cases│ 8│ +╰──────────────────────┴────────────────────╯ + +╭─────────┬─╮ +│Variables│4│ +╰─────────┴─╯ + +╭─────────┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮ +│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│ +├─────────┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤ +│variable0│ 1│ │ │Input│ 8│Right │F8.2 │F8.2 │ │ +│variable1│ 2│ │ │Input│ 8│Right │F8.2 │F8.2 │ │ +│variable2│ 3│ │ │Input│ 8│Right │F8.2 │F8.2 │ │ +│variable3│ 4│ │ │Input│ 8│Right │F8.2 │F8.2 │ │ +╰─────────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯ + +╭────┬─────────┬─────────┬─────────┬─────────╮ +│Case│variable0│variable1│variable2│variable3│ +├────┼─────────┼─────────┼─────────┼─────────┤ +│1 │ 1.00│ 1.00│ 1.00│ 2.00│ +│2 │ 1.00│ 1.00│ 2.00│ 30.00│ +│3 │ 1.00│ 2.00│ 1.00│ 8.00│ +│4 │ 1.00│ 2.00│ 2.00│ 20.00│ +│5 │ 2.00│ 1.00│ 1.00│ 2.00│ +│6 │ 2.00│ 1.00│ 2.00│ 22.00│ +│7 │ 2.00│ 2.00│ 1.00│ 1.00│ +│8 │ 2.00│ 2.00│ 2.00│ 3.00│ +╰────┴─────────┴─────────┴─────────┴─────────╯ diff --git a/rust/pspp/src/sys/testdata/write-numeric-zlib.expected b/rust/pspp/src/sys/testdata/write-numeric-zlib.expected new file mode 100644 index 0000000000..40a0b02010 --- /dev/null +++ b/rust/pspp/src/sys/testdata/write-numeric-zlib.expected @@ -0,0 +1,35 @@ +╭──────────────────────┬────────────────────╮ +│ Created │30-JUL-2025 15:07:55│ +├──────────────────────┼────────────────────┤ +│Writer Product │PSPP TEST DATA FILE │ +│ Version │1.2.3 │ +├──────────────────────┼────────────────────┤ +│ Compression │ZSAV │ +│ Number of Cases│ 8│ +╰──────────────────────┴────────────────────╯ + +╭─────────┬─╮ +│Variables│4│ +╰─────────┴─╯ + +╭─────────┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮ +│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│ +├─────────┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤ +│variable0│ 1│ │ │Input│ 8│Right │F8.2 │F8.2 │ │ +│variable1│ 2│ │ │Input│ 8│Right │F8.2 │F8.2 │ │ +│variable2│ 3│ │ │Input│ 8│Right │F8.2 │F8.2 │ │ +│variable3│ 4│ │ │Input│ 8│Right │F8.2 │F8.2 │ │ +╰─────────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯ + +╭────┬─────────┬─────────┬─────────┬─────────╮ +│Case│variable0│variable1│variable2│variable3│ +├────┼─────────┼─────────┼─────────┼─────────┤ +│1 │ 1.00│ 1.00│ 1.00│ 2.00│ +│2 │ 1.00│ 1.00│ 2.00│ 30.00│ +│3 │ 1.00│ 2.00│ 1.00│ 8.00│ +│4 │ 1.00│ 2.00│ 2.00│ 20.00│ +│5 │ 2.00│ 1.00│ 1.00│ 2.00│ +│6 │ 2.00│ 1.00│ 2.00│ 22.00│ +│7 │ 2.00│ 2.00│ 1.00│ 1.00│ +│8 │ 2.00│ 2.00│ 2.00│ 3.00│ +╰────┴─────────┴─────────┴─────────┴─────────╯ diff --git a/rust/pspp/src/sys/write.rs b/rust/pspp/src/sys/write.rs index 8fa7d11410..cdbb5e6028 100644 --- a/rust/pspp/src/sys/write.rs +++ b/rust/pspp/src/sys/write.rs @@ -9,7 +9,7 @@ use std::{ }; use binrw::{BinWrite, Endian, Error as BinError}; -use chrono::Local; +use chrono::{Local, NaiveDateTime}; use either::Either; use encoding_rs::Encoding; use flate2::write::ZlibEncoder; @@ -40,7 +40,7 @@ use crate::{ /// System file format version. #[derive(Copy, Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord)] -pub enum Version { +pub enum SysfileVersion { /// Obsolete version. V2, @@ -50,20 +50,36 @@ pub enum Version { } /// Options for writing a system file. -#[derive(Copy, Clone, Debug)] +#[derive(Clone, Debug)] pub struct WriteOptions { /// How to compress (if at all) data in the system file. pub compression: Option, /// System file version to write. - pub version: Version, + pub sysfile_version: SysfileVersion, + + /// Date and time to write to the file. + pub timestamp: NaiveDateTime, + + /// Product name. + /// + /// Only the first 40 bytes are written. + pub product_name: Cow<'static, str>, + + /// Product version number. + /// + /// The default is taken from `CARGO_PKG_VERSION`. + pub product_version: ProductVersion, } impl Default for WriteOptions { fn default() -> Self { Self { compression: Some(Compression::Simple), - version: Default::default(), + sysfile_version: Default::default(), + timestamp: Local::now().naive_local(), + product_name: Cow::from(concat!("GNU PSPP (Rust) ", env!("CARGO_PKG_VERSION"))), + product_version: ProductVersion::VERSION, } } } @@ -82,9 +98,32 @@ impl WriteOptions { } } - /// Returns `self` with the version set to `version`. - pub fn with_version(self, version: Version) -> Self { - Self { version, ..self } + pub fn with_timestamp(self, timestamp: NaiveDateTime) -> Self { + Self { timestamp, ..self } + } + + /// Returns `self` with the system file version set to `sysfile_version`. + pub fn with_sysfile_version(self, sysfile_version: SysfileVersion) -> Self { + Self { + sysfile_version, + ..self + } + } + + /// Returns `self` with the product name set to `product_name`. + pub fn with_product_name(self, product_name: Cow<'static, str>) -> Self { + Self { + product_name, + ..self + } + } + + /// Returns `self` with the product version set to `product_version`. + pub fn with_product_version(self, product_version: ProductVersion) -> Self { + Self { + product_version, + ..self + } } /// Writes `dictionary` to `path` in system file format. Returns a [Writer] @@ -109,13 +148,13 @@ impl WriteOptions { { let mut dict_writer = DictionaryWriter::new(&self, &mut writer, dictionary); dict_writer.write()?; - Writer::new(self, dict_writer.case_vars, writer) + let DictionaryWriter { case_vars, .. } = dict_writer; + Writer::new(self, case_vars, writer) } } struct DictionaryWriter<'a, W> { - compression: Option, - version: Version, + options: &'a WriteOptions, short_names: Vec>, case_vars: Vec, writer: &'a mut W, @@ -138,14 +177,19 @@ fn put_attributes(attributes: &Attributes, s: &mut String) { const BIAS: f64 = 100.0; +fn encode_fixed_string(s: &str, encoding: &'static Encoding) -> [u8; N] { + let mut encoded = encoding.encode(s).0.into_owned(); + encoded.resize(N, b' '); + encoded.try_into().unwrap() +} + impl<'a, W> DictionaryWriter<'a, W> where W: Write + Seek, { - pub fn new(options: &WriteOptions, writer: &'a mut W, dictionary: &'a Dictionary) -> Self { + pub fn new(options: &'a WriteOptions, writer: &'a mut W, dictionary: &'a Dictionary) -> Self { Self { - compression: options.compression, - version: options.version, + options, short_names: dictionary.short_names(), case_vars: dictionary .variables @@ -185,23 +229,20 @@ where bytes.try_into().unwrap() } - let now = Local::now(); let header = RawHeader { - magic: if self.compression == Some(Compression::ZLib) { + magic: if self.options.compression == Some(Compression::ZLib) { Magic::Zsav } else { Magic::Sav } .into(), - eye_catcher: { - as_byte_array(format!( - "@(#) SPSS DATA FILE GNU pspp (Rust) {}", - env!("CARGO_PKG_VERSION") - )) - }, + eye_catcher: encode_fixed_string( + &format!("@(#) SPSS DATA FILE {}", &self.options.product_name), + self.dictionary.encoding(), + ), layout_code: 2, nominal_case_size: count_segments(&self.case_vars), - compression_code: match self.compression { + compression_code: match self.options.compression { Some(Compression::Simple) => 1, Some(Compression::ZLib) => 2, None => 0, @@ -213,8 +254,8 @@ where }, n_cases: u32::MAX, bias: BIAS, - creation_date: as_byte_array(now.format("%d %b %y").to_string()), - creation_time: as_byte_array(now.format("%H:%M:%S").to_string()), + creation_date: as_byte_array(self.options.timestamp.format("%d %b %y").to_string()), + creation_time: as_byte_array(self.options.timestamp.format("%H:%M:%S").to_string()), file_label: as_byte_array(self.dictionary.file_label.clone().unwrap_or_default()), }; header.write_le(self.writer) @@ -312,11 +353,6 @@ where Ok(()) } - fn encode_fixed_string(s: &str, encoding: &'static Encoding) -> [u8; N] { - let mut encoded = encoding.encode(s).0.into_owned(); - encoded.resize(N, b' '); - encoded.try_into().unwrap() - } fn to_raw_format(mut format: Format, width: VarWidth) -> RawFormat { format.resize(width); RawFormat::try_from(format).unwrap() @@ -379,7 +415,7 @@ where 4u32, 8u32, RawIntegerInfoRecord { - version: ProductVersion::VERSION, + version: self.options.product_version, machine_code: -1, floating_point_rep: 1, compression_code: 1, @@ -505,7 +541,7 @@ where } fn write_long_variable_names(&mut self) -> Result<(), BinError> { - if self.version == Version::V2 { + if self.options.sysfile_version == SysfileVersion::V2 { return Ok(()); } @@ -586,7 +622,7 @@ where } fn write_data_file_attributes(&mut self) -> Result<(), BinError> { - if self.version != Version::V2 { + if self.options.sysfile_version != SysfileVersion::V2 { return Ok(()); } let mut s = String::new(); @@ -595,7 +631,7 @@ where } fn write_variable_attributes(&mut self) -> Result<(), BinError> { - if self.version != Version::V2 { + if self.options.sysfile_version != SysfileVersion::V2 { return Ok(()); } let mut s = String::new(); @@ -932,7 +968,7 @@ where /// Finishes writing the file, flushing buffers and updating headers to /// match the final case counts. - pub fn finish(mut self) -> Result<(), BinError> { + pub fn finish(mut self) -> Result, BinError> { self.try_finish() } @@ -942,9 +978,9 @@ where /// # Panic /// /// Attempts to write more cases after calling this function may will panic. - pub fn try_finish(&mut self) -> Result<(), BinError> { + pub fn try_finish(&mut self) -> Result, BinError> { let Some(inner) = self.inner.take() else { - return Ok(()); + return Ok(None); }; let mut inner = match inner { @@ -974,7 +1010,7 @@ where let _ = inner.write_all(&n_cases.to_le_bytes()); } } - Ok(()) + Ok(Some(inner)) } /// Writes `case` to the system file. -- 2.30.2