From: Ben Pfaff Date: Sun, 3 Aug 2025 16:36:19 +0000 (-0700) Subject: Add more write tests. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d79acc03bda763585f908d3c48caadba4d28d95c;p=pspp Add more write tests. --- diff --git a/rust/pspp/src/dictionary.rs b/rust/pspp/src/dictionary.rs index d42abfafe2..509ef7f819 100644 --- a/rust/pspp/src/dictionary.rs +++ b/rust/pspp/src/dictionary.rs @@ -177,7 +177,7 @@ impl VarWidth { /// Returns true if this is a very long string width, meaning wider than 255 /// bytes, which was the limit for old versions of SPSS. - pub fn is_very_long(&self) -> bool { + pub fn is_very_long_string(&self) -> bool { match *self { VarWidth::Numeric => false, VarWidth::String(width) => width > 255, @@ -194,7 +194,7 @@ impl VarWidth { /// a PSPP user. Only very long string variables have more than one /// segment. pub fn n_segments(&self) -> usize { - if self.is_very_long() { + if self.is_very_long_string() { self.as_string_width().unwrap().div_ceil(Self::SEGMENT_SIZE) } else { 1 @@ -219,7 +219,7 @@ impl VarWidth { /// by a PSPP user. pub fn segment_alloc_width(&self, segment_idx: usize) -> usize { debug_assert!(segment_idx < self.n_segments()); - debug_assert!(self.is_very_long()); + debug_assert!(self.is_very_long_string()); if segment_idx < self.n_segments() - 1 { 255 @@ -712,7 +712,10 @@ impl Dictionary { 8, ) .or_else(|_| { - Identifier::new(format!("V{}", Display26Adic::new_uppercase(self.index))) + Identifier::new(format!( + "V{}", + Display26Adic::new_uppercase(self.index) + )) }) .unwrap() }; @@ -2006,6 +2009,8 @@ impl<'a> MissingValuesMut<'a> { Err(MissingValuesError::TooMany) } else if value.var_type() != VarType::from(self.width) { Err(MissingValuesError::MixedTypes) + } else if value == Datum::Number(None) { + Err(MissingValuesError::SystemMissing) } else if value.resize(self.width).is_err() { Err(MissingValuesError::TooWide) } else { @@ -2082,6 +2087,7 @@ pub enum MissingValuesError { TooMany, TooWide, MixedTypes, + SystemMissing, } impl From for MissingValuesError { @@ -2182,7 +2188,7 @@ impl MissingValues { } } -#[derive(Copy, Clone, Debug, Serialize)] +#[derive(Copy, Clone, Debug, Serialize, PartialEq)] pub enum MissingValueRange { In { low: f64, high: f64 }, From { low: f64 }, diff --git a/rust/pspp/src/sys/cooked.rs b/rust/pspp/src/sys/cooked.rs index 63fca6744c..8609590e6c 100644 --- a/rust/pspp/src/sys/cooked.rs +++ b/rust/pspp/src/sys/cooked.rs @@ -1271,7 +1271,9 @@ impl Records { Err(MissingValuesError::TooWide) => { warn(Error::MissingValuesTooWide(record.var_name.clone())) } - Err(MissingValuesError::TooMany) | Err(MissingValuesError::MixedTypes) => { + Err(MissingValuesError::TooMany) + | Err(MissingValuesError::MixedTypes) + | Err(MissingValuesError::SystemMissing) => { unreachable!() } } diff --git a/rust/pspp/src/sys/raw.rs b/rust/pspp/src/sys/raw.rs index 9b88153a8f..7dce91626c 100644 --- a/rust/pspp/src/sys/raw.rs +++ b/rust/pspp/src/sys/raw.rs @@ -616,7 +616,7 @@ impl Record { { let rec_type: u32 = endian.parse(read_bytes(reader)?); match rec_type { - 2 => Ok(Some(VariableRecord::read(reader, endian, warn)?)), + 2 => Ok(Some(Record::Variable(VariableRecord::read(reader, endian, warn)?))), 3 => Ok(ValueLabelRecord::read(reader, endian, var_types, warn)?), 6 => Ok(Some(DocumentRecord::read(reader, endian)?)), 7 => Extension::read(reader, endian, var_types, warn), diff --git a/rust/pspp/src/sys/raw/records.rs b/rust/pspp/src/sys/raw/records.rs index d21a8383ae..2a76362e67 100644 --- a/rust/pspp/src/sys/raw/records.rs +++ b/rust/pspp/src/sys/raw/records.rs @@ -542,11 +542,7 @@ pub struct RawVariableRecord { impl VariableRecord { /// Reads a variable record from `r`. - pub fn read( - r: &mut R, - endian: Endian, - warn: &mut dyn FnMut(Warning), - ) -> Result + pub fn read(r: &mut R, endian: Endian, warn: &mut dyn FnMut(Warning)) -> Result where R: Read + Seek, { @@ -595,7 +591,7 @@ impl VariableRecord { let end_offset = r.stream_position()?; - Ok(Record::Variable(VariableRecord { + Ok(Self { offsets: start_offset..end_offset, width, name: raw_record.name.into(), @@ -603,7 +599,7 @@ impl VariableRecord { write_format: raw_record.write_format, missing_values, label, - })) + }) } /// Decodes a variable record using `decoder`. diff --git a/rust/pspp/src/sys/write.rs b/rust/pspp/src/sys/write.rs index b2a8da96b5..bd88012442 100644 --- a/rust/pspp/src/sys/write.rs +++ b/rust/pspp/src/sys/write.rs @@ -327,7 +327,13 @@ where ) .write_le(self.writer)?; } - variable.missing_values().values().write_le(self.writer)?; + let pad = variable + .width + .as_string_width() + .map_or(0, |width| 8 - width); + for value in variable.missing_values().values() { + (value, Zeros(pad)).write_le(self.writer)?; + } } write_variable_continuation_records(&mut self.writer, seg0_width)?; @@ -589,7 +595,7 @@ where fn write_very_long_strings(&mut self) -> Result<(), BinError> { let mut s = String::new(); for (index, variable) in self.dictionary.variables.iter().enumerate() { - if variable.width.is_very_long() { + if variable.width.is_very_long_string() { let width = variable.width.as_string_width().unwrap(); write!(&mut s, "{}={width:05}\0\t", &self.short_names[index][0],).unwrap(); } @@ -1223,20 +1229,24 @@ where mod tests { use std::io::Cursor; - use binrw::BinRead; + use binrw::{BinRead, Endian}; use encoding_rs::UTF_8; + use hexplay::HexView; use itertools::Itertools; use crate::{ - dictionary::{Dictionary, VarWidth, Variable}, + data::{ByteString, Datum}, + dictionary::{Dictionary, MissingValueRange, VarWidth, Variable}, identifier::Identifier, sys::{ - raw::records::{RawHeader, RawVariableRecord}, + raw::records::{RawHeader, RawVariableRecord, VariableRecord}, write::DictionaryWriter, WriteOptions, }, }; + /// Checks that the header record has the right nominal case size and weight + /// index, even with long and very long string variables. #[test] fn header() { for variables in [ @@ -1286,8 +1296,11 @@ mod tests { } } + /// Checks that variable records are followed by the right number of + /// continuation records, and that very long string variables have the right + /// number of segment variables. #[test] - fn variables() { + fn variables_widths() { let variables = [ (VarWidth::Numeric, vec![0]), (VarWidth::String(1), vec![1]), @@ -1355,4 +1368,130 @@ mod tests { } } } + + /// Checks that missing values are written correctly. + #[test] + fn variables_missing_values() { + let test_cases = [ + (VarWidth::Numeric, vec![Datum::Number(Some(1.0))], None), + ( + VarWidth::Numeric, + vec![Datum::Number(Some(1.0)), Datum::Number(Some(2.0))], + None, + ), + ( + VarWidth::Numeric, + vec![ + Datum::Number(Some(1.0)), + Datum::Number(Some(2.0)), + Datum::Number(Some(3.0)), + ], + None, + ), + ( + VarWidth::Numeric, + vec![], + Some(MissingValueRange::In { + low: 10.0, + high: 20.0, + }), + ), + ( + VarWidth::Numeric, + vec![], + Some(MissingValueRange::From { low: 100.0 }), + ), + ( + VarWidth::Numeric, + vec![], + Some(MissingValueRange::To { high: 200.0 }), + ), + ( + VarWidth::Numeric, + vec![Datum::Number(Some(1.0))], + Some(MissingValueRange::In { + low: 10.0, + high: 20.0, + }), + ), + ( + VarWidth::Numeric, + vec![Datum::Number(Some(1.0))], + Some(MissingValueRange::From { low: 100.0 }), + ), + ( + VarWidth::Numeric, + vec![Datum::Number(Some(1.0))], + Some(MissingValueRange::To { high: 200.0 }), + ), + ( + VarWidth::String(5), + vec![Datum::String(ByteString::from("abcde"))], + None, + ), + ( + VarWidth::String(5), + vec![ + Datum::String(ByteString::from("abcde")), + Datum::String(ByteString::from("qwioe")), + ], + None, + ), + ( + VarWidth::String(5), + vec![ + Datum::String(ByteString::from("abcde")), + Datum::String(ByteString::from("qwioe")), + Datum::String(ByteString::from("jksld")), + ], + None, + ), + ( + VarWidth::String(9), + vec![ + Datum::String(ByteString::from("abcdeasd")), + Datum::String(ByteString::from("qwioejdf")), + Datum::String(ByteString::from("jksldiwe")), + ], + None, + ), + ]; + + for (width, values, range) in test_cases { + let mut dictionary = Dictionary::new(UTF_8); + let mut variable = Variable::new(Identifier::new("var").unwrap(), width, UTF_8); + variable + .missing_values_mut() + .add_values(values.iter().map(|value| value.as_encoded(UTF_8).cloned())) + .unwrap(); + if let Some(range) = &range { + variable + .missing_values_mut() + .add_range(range.clone()) + .unwrap(); + } + dictionary.add_var(variable).unwrap(); + + let mut raw = Vec::new(); + DictionaryWriter::new( + &WriteOptions::reproducible(None), + &mut Cursor::new(&mut raw), + &dictionary, + ) + .write_variables() + .unwrap(); + println!("{}", HexView::new(&raw)); + + let mut cursor = Cursor::new(&raw[4..]); + let record = + VariableRecord::read(&mut cursor, Endian::Little, &mut |_| panic!()).unwrap(); + dbg!(&record); + if !width.is_long_string() { + assert_eq!(&record.missing_values.values, &values); + } else { + assert_eq!(&record.missing_values.values, &vec![]); + } + assert_eq!(&record.missing_values.range, &range); + } + } }