}
pub fn short_names(&self) -> Vec<SmallVec<[Identifier; 1]>> {
- fn pick_short_name(
- variable_name: &Identifier,
- used_names: &mut HashSet<Identifier>,
+ struct PickShortName<'a> {
+ variable_name: &'a Identifier,
+ used_names: &'a mut HashSet<Identifier>,
encoding: &'static Encoding,
- ) -> Identifier {
- for index in 0.. {
- let name = if index == 0 {
- variable_name.shortened(encoding)
- } else {
- variable_name
- .with_suffix(
- &format!("_{}", Display26Adic::new_uppercase(index)),
- encoding,
- 8,
- )
- .or_else(|_| {
- Identifier::new(format!("V{}", Display26Adic::new_uppercase(index)))
- })
- .unwrap()
- };
- if !used_names.contains(&name) {
- used_names.insert(name.clone());
- return name;
+ index: usize,
+ }
+ impl<'a> PickShortName<'a> {
+ fn new(
+ variable_name: &'a Identifier,
+ used_names: &'a mut HashSet<Identifier>,
+ encoding: &'static Encoding,
+ ) -> Self {
+ Self {
+ variable_name,
+ used_names,
+ encoding,
+ index: 0,
+ }
+ }
+
+ fn next(&mut self) -> Identifier {
+ loop {
+ let name = if self.index == 0 {
+ self.variable_name.shortened(self.encoding)
+ } else {
+ self.variable_name
+ .with_suffix(
+ &format!("_{}", Display26Adic::new_uppercase(self.index)),
+ self.encoding,
+ 8,
+ )
+ .or_else(|_| {
+ Identifier::new(format!("V{}", Display26Adic::new_uppercase(self.index)))
+ })
+ .unwrap()
+ };
+ if !self.used_names.contains(&name) {
+ self.used_names.insert(name.clone());
+ return name;
+ }
+ self.index += 1;
}
}
- unreachable!()
}
let mut used_names = HashSet::new();
// then similarly for additional segments.
for (variable, short_names) in self.variables.iter().zip(short_names.iter_mut()) {
if short_names[0].is_none() {
- short_names[0] = Some(pick_short_name(
- &variable.name,
- &mut used_names,
- self.encoding,
- ));
+ short_names[0] =
+ Some(PickShortName::new(&variable.name, &mut used_names, self.encoding).next());
}
}
for (variable, short_names) in self.variables.iter().zip(short_names.iter_mut()) {
+ let mut picker = PickShortName::new(&variable.name, &mut used_names, self.encoding);
for assigned_short_name in short_names.iter_mut().skip(1) {
if assigned_short_name.is_none() {
- *assigned_short_name = Some(pick_short_name(
- &variable.name,
- &mut used_names,
- self.encoding,
- ));
+ *assigned_short_name = Some(picker.next());
}
}
}
// this program. If not, see <http://www.gnu.org/licenses/>.
use std::{
- borrow::Cow,
fs::File,
io::{BufRead, BufReader, Cursor, Seek},
path::{Path, PathBuf},
};
use binrw::Endian;
-use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
use encoding_rs::UTF_8;
-use hexplay::HexView;
use crate::{
crypto::EncryptedFile,
cooked::ReadOptions,
raw::{self, records::Compression, ErrorDetails},
sack::sack,
- ProductVersion, WriteOptions,
+ WriteOptions,
},
};
));
}
-impl WriteOptions {
- /// Returns a [WriteOptions] with the given `compression` and the other
- /// members set to fixed values so that running at different times or with
- /// different crate names or versions won't change what's written to the
- /// file.
- fn reproducible(compression: Option<Compression>) -> Self {
- WriteOptions::new()
- .with_compression(compression)
- .with_timestamp(NaiveDateTime::new(
- NaiveDate::from_ymd_opt(2025, 7, 30).unwrap(),
- NaiveTime::from_hms_opt(15, 7, 55).unwrap(),
- ))
- .with_product_name(Cow::from("PSPP TEST DATA FILE"))
- .with_product_version(ProductVersion(1, 2, 3))
- }
-}
-
/// Tests the most basic kind of writing a system file, just writing a few
/// numeric variables and cases.
#[test]
let DictionaryWriter { case_vars, .. } = dict_writer;
Writer::new(self, case_vars, writer)
}
+
+ /// Returns a [WriteOptions] with the given `compression` and the other
+ /// members set to fixed values so that running at different times or with
+ /// different crate names or versions won't change what's written to the
+ /// file.
+ #[cfg(test)]
+ pub(super) fn reproducible(compression: Option<Compression>) -> Self {
+ use chrono::{NaiveDate, NaiveTime};
+ WriteOptions::new()
+ .with_compression(compression)
+ .with_timestamp(NaiveDateTime::new(
+ NaiveDate::from_ymd_opt(2025, 7, 30).unwrap(),
+ NaiveTime::from_hms_opt(15, 7, 55).unwrap(),
+ ))
+ .with_product_name(Cow::from("PSPP TEST DATA FILE"))
+ .with_product_version(ProductVersion(1, 2, 3))
+ }
}
struct DictionaryWriter<'a, W> {
},
)
.write_le(self.writer)?;
+ write_variable_continuation_records(&mut self.writer, width)?;
}
}
Err(IoError::from(ErrorKind::NotSeekable))
}
}
+
+#[cfg(test)]
+mod tests {
+ use std::io::Cursor;
+
+ use binrw::BinRead;
+ use encoding_rs::UTF_8;
+ use itertools::Itertools;
+
+ use crate::{
+ dictionary::{Dictionary, VarWidth, Variable},
+ identifier::Identifier,
+ sys::{
+ raw::records::{RawHeader, RawVariableRecord},
+ write::DictionaryWriter,
+ WriteOptions,
+ },
+ };
+
+ #[test]
+ fn header() {
+ for variables in [
+ (VarWidth::Numeric, 1),
+ (VarWidth::String(1), 1),
+ (VarWidth::String(8), 1),
+ (VarWidth::String(15), 2),
+ (VarWidth::String(255), 32),
+ (VarWidth::String(256), 33),
+ (VarWidth::String(20000), 79 * 32 + 12),
+ ]
+ .iter()
+ .copied()
+ .combinations_with_replacement(4)
+ {
+ let mut dictionary = Dictionary::new(UTF_8);
+ let mut expected_case_size = 0;
+ let mut weight_indexes = vec![(None, 0)];
+ for (index, (width, n_chunks)) in variables.into_iter().enumerate() {
+ let index = dictionary
+ .add_var(Variable::new(
+ Identifier::new(format!("v{index}")).unwrap(),
+ width,
+ UTF_8,
+ ))
+ .unwrap();
+ if width.is_numeric() {
+ weight_indexes.push((Some(index), expected_case_size + 1));
+ }
+ expected_case_size += n_chunks;
+ }
+ for (weight_index, expected_weight_index) in weight_indexes {
+ dictionary.set_weight(weight_index).unwrap();
+
+ let mut raw = Vec::new();
+ DictionaryWriter::new(
+ &WriteOptions::reproducible(None),
+ &mut Cursor::new(&mut raw),
+ &dictionary,
+ )
+ .write_header()
+ .unwrap();
+ let header = RawHeader::read_le(&mut Cursor::new(&raw)).unwrap();
+ assert_eq!(header.weight_index, expected_weight_index as u32);
+ assert_eq!(header.nominal_case_size, expected_case_size as u32);
+ }
+ }
+ }
+
+ #[test]
+ fn variables() {
+ let variables = [
+ (VarWidth::Numeric, vec![0]),
+ (VarWidth::String(1), vec![1]),
+ (VarWidth::String(8), vec![8]),
+ (VarWidth::String(15), vec![15, -1]),
+ (
+ VarWidth::String(255),
+ std::iter::once(255)
+ .chain(std::iter::repeat_n(-1, 31))
+ .collect(),
+ ),
+ (
+ VarWidth::String(256),
+ std::iter::once(255)
+ .chain(std::iter::repeat_n(-1, 31))
+ .chain(std::iter::once(4))
+ .collect(),
+ ),
+ (
+ VarWidth::String(20000),
+ std::iter::once(255)
+ .chain(std::iter::repeat_n(-1, 31))
+ .cycle()
+ .take(32 * 79)
+ .chain(std::iter::once(92))
+ .chain(std::iter::repeat_n(-1, 11))
+ .collect(),
+ ),
+ ];
+ for variables in variables.iter().combinations_with_replacement(4) {
+ let mut dictionary = Dictionary::new(UTF_8);
+ for (index, (width, _)) in variables.iter().enumerate() {
+ dictionary
+ .add_var(Variable::new(
+ Identifier::new(format!("v{index}")).unwrap(),
+ *width,
+ UTF_8,
+ ))
+ .unwrap();
+ }
+
+ let widths = variables
+ .into_iter()
+ .map(|(_, w)| w.iter())
+ .flatten()
+ .copied();
+
+ let mut raw = Vec::new();
+ DictionaryWriter::new(
+ &WriteOptions::reproducible(None),
+ &mut Cursor::new(&mut raw),
+ &dictionary,
+ )
+ .write_variables()
+ .unwrap();
+
+ let mut cursor = Cursor::new(&raw);
+ let mut records = Vec::new();
+ while cursor.position() < raw.len() as u64 {
+ assert_eq!(u32::read_le(&mut cursor).unwrap(), 2);
+ records.push(RawVariableRecord::read_le(&mut cursor).unwrap());
+ }
+ for (record, expected_width) in records.iter().zip_eq(widths.into_iter()) {
+ assert_eq!(record.width, expected_width);
+ }
+ }
+ }
+}