/// Returns true if this is a very long string width, meaning wider than 255
/// bytes, which was the limit for old versions of SPSS.
- pub fn is_very_long(&self) -> bool {
+ pub fn is_very_long_string(&self) -> bool {
match *self {
VarWidth::Numeric => false,
VarWidth::String(width) => width > 255,
/// a PSPP user. Only very long string variables have more than one
/// segment.
pub fn n_segments(&self) -> usize {
- if self.is_very_long() {
+ if self.is_very_long_string() {
self.as_string_width().unwrap().div_ceil(Self::SEGMENT_SIZE)
} else {
1
/// by a PSPP user.
pub fn segment_alloc_width(&self, segment_idx: usize) -> usize {
debug_assert!(segment_idx < self.n_segments());
- debug_assert!(self.is_very_long());
+ debug_assert!(self.is_very_long_string());
if segment_idx < self.n_segments() - 1 {
255
8,
)
.or_else(|_| {
- Identifier::new(format!("V{}", Display26Adic::new_uppercase(self.index)))
+ Identifier::new(format!(
+ "V{}",
+ Display26Adic::new_uppercase(self.index)
+ ))
})
.unwrap()
};
Err(MissingValuesError::TooMany)
} else if value.var_type() != VarType::from(self.width) {
Err(MissingValuesError::MixedTypes)
+ } else if value == Datum::Number(None) {
+ Err(MissingValuesError::SystemMissing)
} else if value.resize(self.width).is_err() {
Err(MissingValuesError::TooWide)
} else {
TooMany,
TooWide,
MixedTypes,
+ SystemMissing,
}
impl From<ResizeError> for MissingValuesError {
}
}
-#[derive(Copy, Clone, Debug, Serialize)]
+#[derive(Copy, Clone, Debug, Serialize, PartialEq)]
pub enum MissingValueRange {
In { low: f64, high: f64 },
From { low: f64 },
)
.write_le(self.writer)?;
}
- variable.missing_values().values().write_le(self.writer)?;
+ let pad = variable
+ .width
+ .as_string_width()
+ .map_or(0, |width| 8 - width);
+ for value in variable.missing_values().values() {
+ (value, Zeros(pad)).write_le(self.writer)?;
+ }
}
write_variable_continuation_records(&mut self.writer, seg0_width)?;
fn write_very_long_strings(&mut self) -> Result<(), BinError> {
let mut s = String::new();
for (index, variable) in self.dictionary.variables.iter().enumerate() {
- if variable.width.is_very_long() {
+ if variable.width.is_very_long_string() {
let width = variable.width.as_string_width().unwrap();
write!(&mut s, "{}={width:05}\0\t", &self.short_names[index][0],).unwrap();
}
mod tests {
use std::io::Cursor;
- use binrw::BinRead;
+ use binrw::{BinRead, Endian};
use encoding_rs::UTF_8;
+ use hexplay::HexView;
use itertools::Itertools;
use crate::{
- dictionary::{Dictionary, VarWidth, Variable},
+ data::{ByteString, Datum},
+ dictionary::{Dictionary, MissingValueRange, VarWidth, Variable},
identifier::Identifier,
sys::{
- raw::records::{RawHeader, RawVariableRecord},
+ raw::records::{RawHeader, RawVariableRecord, VariableRecord},
write::DictionaryWriter,
WriteOptions,
},
};
+ /// Checks that the header record has the right nominal case size and weight
+ /// index, even with long and very long string variables.
#[test]
fn header() {
for variables in [
}
}
+ /// Checks that variable records are followed by the right number of
+ /// continuation records, and that very long string variables have the right
+ /// number of segment variables.
#[test]
- fn variables() {
+ fn variables_widths() {
let variables = [
(VarWidth::Numeric, vec![0]),
(VarWidth::String(1), vec![1]),
}
}
}
+
+ /// Checks that missing values are written correctly.
+ #[test]
+ fn variables_missing_values() {
+ let test_cases = [
+ (VarWidth::Numeric, vec![Datum::Number(Some(1.0))], None),
+ (
+ VarWidth::Numeric,
+ vec![Datum::Number(Some(1.0)), Datum::Number(Some(2.0))],
+ None,
+ ),
+ (
+ VarWidth::Numeric,
+ vec![
+ Datum::Number(Some(1.0)),
+ Datum::Number(Some(2.0)),
+ Datum::Number(Some(3.0)),
+ ],
+ None,
+ ),
+ (
+ VarWidth::Numeric,
+ vec![],
+ Some(MissingValueRange::In {
+ low: 10.0,
+ high: 20.0,
+ }),
+ ),
+ (
+ VarWidth::Numeric,
+ vec![],
+ Some(MissingValueRange::From { low: 100.0 }),
+ ),
+ (
+ VarWidth::Numeric,
+ vec![],
+ Some(MissingValueRange::To { high: 200.0 }),
+ ),
+ (
+ VarWidth::Numeric,
+ vec![Datum::Number(Some(1.0))],
+ Some(MissingValueRange::In {
+ low: 10.0,
+ high: 20.0,
+ }),
+ ),
+ (
+ VarWidth::Numeric,
+ vec![Datum::Number(Some(1.0))],
+ Some(MissingValueRange::From { low: 100.0 }),
+ ),
+ (
+ VarWidth::Numeric,
+ vec![Datum::Number(Some(1.0))],
+ Some(MissingValueRange::To { high: 200.0 }),
+ ),
+ (
+ VarWidth::String(5),
+ vec![Datum::String(ByteString::from("abcde"))],
+ None,
+ ),
+ (
+ VarWidth::String(5),
+ vec![
+ Datum::String(ByteString::from("abcde")),
+ Datum::String(ByteString::from("qwioe")),
+ ],
+ None,
+ ),
+ (
+ VarWidth::String(5),
+ vec![
+ Datum::String(ByteString::from("abcde")),
+ Datum::String(ByteString::from("qwioe")),
+ Datum::String(ByteString::from("jksld")),
+ ],
+ None,
+ ),
+ (
+ VarWidth::String(9),
+ vec![
+ Datum::String(ByteString::from("abcdeasd")),
+ Datum::String(ByteString::from("qwioejdf")),
+ Datum::String(ByteString::from("jksldiwe")),
+ ],
+ None,
+ ),
+ ];
+
+ for (width, values, range) in test_cases {
+ let mut dictionary = Dictionary::new(UTF_8);
+ let mut variable = Variable::new(Identifier::new("var").unwrap(), width, UTF_8);
+ variable
+ .missing_values_mut()
+ .add_values(values.iter().map(|value| value.as_encoded(UTF_8).cloned()))
+ .unwrap();
+ if let Some(range) = &range {
+ variable
+ .missing_values_mut()
+ .add_range(range.clone())
+ .unwrap();
+ }
+ dictionary.add_var(variable).unwrap();
+
+ let mut raw = Vec::new();
+ DictionaryWriter::new(
+ &WriteOptions::reproducible(None),
+ &mut Cursor::new(&mut raw),
+ &dictionary,
+ )
+ .write_variables()
+ .unwrap();
+ println!("{}", HexView::new(&raw));
+
+ let mut cursor = Cursor::new(&raw[4..]);
+ let record =
+ VariableRecord::read(&mut cursor, Endian::Little, &mut |_| panic!()).unwrap();
+ dbg!(&record);
+ if !width.is_long_string() {
+ assert_eq!(&record.missing_values.values, &values);
+ } else {
+ assert_eq!(&record.missing_values.values, &vec![]);
+ }
+ assert_eq!(&record.missing_values.range, &range);
+ }
+ }
}