From 9748e7fa134153a30442d3afa168d93d1dcc5ac7 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 19 Jul 2025 10:29:25 -0700 Subject: [PATCH] bulk of sav writer --- rust/pspp/build.rs | 14 ++ rust/pspp/src/dictionary.rs | 50 ++++- rust/pspp/src/output/spv.rs | 4 +- rust/pspp/src/sys/cooked.rs | 8 +- rust/pspp/src/sys/encoding.rs | 60 ++++- rust/pspp/src/sys/raw.rs | 2 +- rust/pspp/src/sys/raw/records.rs | 52 ++--- rust/pspp/src/sys/sack.rs | 2 +- rust/pspp/src/sys/write.rs | 362 +++++++++++++++++++++++++++++-- 9 files changed, 497 insertions(+), 57 deletions(-) diff --git a/rust/pspp/build.rs b/rust/pspp/build.rs index 0535380377..13e9534d16 100644 --- a/rust/pspp/build.rs +++ b/rust/pspp/build.rs @@ -188,6 +188,20 @@ fn main() -> AnyResult<()> { } process_converter(&converter, &mut codepages); + for (codepage, source, name) in [ + (20932, Source::Codepage, "EUC-JP"), + (50220, Source::Codepage, "ISO-2022-JP"), + (28600, Source::Windows, "ISO-8859-10"), + (28604, Source::Windows, "ISO-8859-14"), + (28606, Source::Windows, "ISO-8859-16"), + (99998, Source::Codepage, "replacement"), + (99999, Source::Codepage, "x-user-defined"), + ] { + assert!(codepages + .insert(codepage, [(source, vec![name])].into_iter().collect()) + .is_none()); + } + let output_file_name = Path::new(&var_os("OUT_DIR").unwrap()).join("encodings.rs"); write_output(&codepages, &output_file_name) diff --git a/rust/pspp/src/dictionary.rs b/rust/pspp/src/dictionary.rs index 4a4e9f9d9e..ee5b15f0e7 100644 --- a/rust/pspp/src/dictionary.rs +++ b/rust/pspp/src/dictionary.rs @@ -1074,6 +1074,19 @@ impl TryFrom for Role { } } +impl From for i32 { + fn from(value: Role) -> Self { + match value { + Role::Input => 0, + Role::Target => 1, + Role::Both => 2, + Role::None => 3, + Role::Partition => 4, + Role::Split => 5, + } + } +} + #[derive(Clone, Debug, Default, PartialEq, Eq)] pub struct Attributes(pub BTreeMap>); @@ -1354,6 +1367,37 @@ pub enum MultipleResponseType { MultipleCategory, } +impl MultipleResponseType { + pub fn supported_before_v14(&self) -> bool { + match self { + MultipleResponseType::MultipleDichotomy { + labels: CategoryLabels::CountedValues { .. }, + datum: _, + } => false, + _ => true, + } + } + + pub fn label_from_var_label(&self) -> bool { + match self { + MultipleResponseType::MultipleDichotomy { + labels: + CategoryLabels::CountedValues { + use_var_label_as_mrset_label: true, + }, + .. + } => true, + _ => false, + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum CategoryLabels { + VarLabels, + CountedValues { use_var_label_as_mrset_label: bool }, +} + #[derive(Clone, Debug)] pub struct VariableSet { pub name: String, @@ -1683,12 +1727,6 @@ impl Measure { } } -#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub enum CategoryLabels { - VarLabels, - CountedValues, -} - #[cfg(test)] mod test { use std::collections::HashSet; diff --git a/rust/pspp/src/output/spv.rs b/rust/pspp/src/output/spv.rs index 43ed553d6d..378a598e44 100644 --- a/rust/pspp/src/output/spv.rs +++ b/rust/pspp/src/output/spv.rs @@ -1175,7 +1175,7 @@ impl BinWrite for Value { 2u8, ValueMod::new(self), format, - number.value.unwrap_or(-f64::MAX), + number.value.unwrap_or(f64::MIN), SpvString::optional(&number.var_name), SpvString::optional(&number.value_label), Show::as_spv(&number.show), @@ -1186,7 +1186,7 @@ impl BinWrite for Value { 1u8, ValueMod::new(self), format, - number.value.unwrap_or(-f64::MAX), + number.value.unwrap_or(f64::MIN), ) .write_options(writer, endian, args)?; } diff --git a/rust/pspp/src/sys/cooked.rs b/rust/pspp/src/sys/cooked.rs index 9c5b7e0ab5..05dc637fb3 100644 --- a/rust/pspp/src/sys/cooked.rs +++ b/rust/pspp/src/sys/cooked.rs @@ -792,7 +792,7 @@ impl Records { .collect(); if let Some(integer_info) = self.integer_info.first() { - let floating_point_rep = integer_info.floating_point_rep; + let floating_point_rep = integer_info.inner.floating_point_rep; if floating_point_rep != 1 { warn(Error::UnexpectedFloatFormat(floating_point_rep)) } @@ -801,7 +801,7 @@ impl Records { Endian::Big => 1, Endian::Little => 2, }; - let actual = integer_info.endianness; + let actual = integer_info.inner.endianness; if actual != expected { warn(Error::UnexpectedEndianess { actual, expected }); } @@ -834,7 +834,7 @@ impl Records { && self .integer_info .get(0) - .is_none_or(|info| info.version.0 != 13) + .is_none_or(|info| info.inner.version.0 != 13) { warn(Error::WrongVariablePositions { actual: n_vars, @@ -1417,7 +1417,7 @@ impl Metadata { .or_else(|| header.n_cases.map(|n| n as u64)), product, product_ext: headers.product_info.first().map(|pe| fix_line_ends(&pe.0)), - version: headers.integer_info.first().map(|ii| ii.version), + version: headers.integer_info.first().map(|ii| ii.inner.version), } } } diff --git a/rust/pspp/src/sys/encoding.rs b/rust/pspp/src/sys/encoding.rs index f4fecbab62..0a2bffcf9b 100644 --- a/rust/pspp/src/sys/encoding.rs +++ b/rust/pspp/src/sys/encoding.rs @@ -20,18 +20,24 @@ use std::sync::LazyLock; use crate::locale_charset::locale_charset; use encoding_rs::{Encoding, UTF_8}; +use thiserror::Error as ThisError; include!(concat!(env!("OUT_DIR"), "/encodings.rs")); /// Returns the code page number corresponding to `encoding`, or `None` if /// unknown. -pub fn codepage_from_encoding(encoding: &str) -> Option { +pub fn codepage_from_encoding_name(encoding: &str) -> Option { CODEPAGE_NAME_TO_NUMBER .get(encoding.to_ascii_lowercase().as_str()) .copied() } -use thiserror::Error as ThisError; +/// Returns the code page number for `encoding`. +pub fn codepage_from_encoding(encoding: &'static Encoding) -> u32 { + // This `unwrap()` is tested against all the actual [Encoding]s in a + // #[test]. + codepage_from_encoding_name(encoding.name()).unwrap() +} /// An error or warning related to encodings. #[derive(Clone, ThisError, Debug, PartialEq, Eq)] @@ -104,3 +110,53 @@ pub fn get_encoding( Encoding::for_label(label.as_bytes()).ok_or(Error::UnknownEncoding(label.into())) } + +#[cfg(test)] +mod tests { + use crate::sys::encoding::codepage_from_encoding; + + /// Test that every `Encoding` has a codepage. + #[test] + fn codepages() { + codepage_from_encoding(&encoding_rs::BIG5); + codepage_from_encoding(&encoding_rs::EUC_JP); + codepage_from_encoding(&encoding_rs::EUC_KR); + codepage_from_encoding(&encoding_rs::GB18030); + codepage_from_encoding(&encoding_rs::GBK); + codepage_from_encoding(&encoding_rs::IBM866); + codepage_from_encoding(&encoding_rs::ISO_2022_JP); + codepage_from_encoding(&encoding_rs::ISO_8859_2); + codepage_from_encoding(&encoding_rs::ISO_8859_3); + codepage_from_encoding(&encoding_rs::ISO_8859_4); + codepage_from_encoding(&encoding_rs::ISO_8859_5); + codepage_from_encoding(&encoding_rs::ISO_8859_6); + codepage_from_encoding(&encoding_rs::ISO_8859_7); + codepage_from_encoding(&encoding_rs::ISO_8859_8); + codepage_from_encoding(&encoding_rs::ISO_8859_8_I); + codepage_from_encoding(&encoding_rs::ISO_8859_10); + codepage_from_encoding(&encoding_rs::ISO_8859_13); + codepage_from_encoding(&encoding_rs::ISO_8859_14); + codepage_from_encoding(&encoding_rs::ISO_8859_15); + codepage_from_encoding(&encoding_rs::ISO_8859_16); + codepage_from_encoding(&encoding_rs::KOI8_R); + codepage_from_encoding(&encoding_rs::KOI8_U); + codepage_from_encoding(&encoding_rs::MACINTOSH); + codepage_from_encoding(&encoding_rs::REPLACEMENT); + codepage_from_encoding(&encoding_rs::SHIFT_JIS); + codepage_from_encoding(&encoding_rs::UTF_8); + codepage_from_encoding(&encoding_rs::UTF_16BE); + codepage_from_encoding(&encoding_rs::UTF_16LE); + codepage_from_encoding(&encoding_rs::WINDOWS_874); + codepage_from_encoding(&encoding_rs::WINDOWS_1250); + codepage_from_encoding(&encoding_rs::WINDOWS_1251); + codepage_from_encoding(&encoding_rs::WINDOWS_1252); + codepage_from_encoding(&encoding_rs::WINDOWS_1253); + codepage_from_encoding(&encoding_rs::WINDOWS_1254); + codepage_from_encoding(&encoding_rs::WINDOWS_1255); + codepage_from_encoding(&encoding_rs::WINDOWS_1256); + codepage_from_encoding(&encoding_rs::WINDOWS_1257); + codepage_from_encoding(&encoding_rs::WINDOWS_1258); + codepage_from_encoding(&encoding_rs::X_MAC_CYRILLIC); + codepage_from_encoding(&encoding_rs::X_USER_DEFINED); + } +} diff --git a/rust/pspp/src/sys/raw.rs b/rust/pspp/src/sys/raw.rs index dca752f034..23dd35d94b 100644 --- a/rust/pspp/src/sys/raw.rs +++ b/rust/pspp/src/sys/raw.rs @@ -808,7 +808,7 @@ pub fn infer_encoding( let character_code = records .iter() .filter_map(|record| match record { - Record::IntegerInfo(record) => Some(record.character_code), + Record::IntegerInfo(record) => Some(record.inner.character_code), _ => None, }) .next(); diff --git a/rust/pspp/src/sys/raw/records.rs b/rust/pspp/src/sys/raw/records.rs index 606bb4ef21..a246f137c3 100644 --- a/rust/pspp/src/sys/raw/records.rs +++ b/rust/pspp/src/sys/raw/records.rs @@ -254,7 +254,7 @@ impl FileHeader { } } -/// [Format](crate::format::Format) as represented in a system file. +/// [Format] as represented in a system file. #[derive(Copy, Clone, PartialEq, Eq, Hash, BinRead, BinWrite)] pub struct RawFormat( /// The most-significant 16 bits are the type, the next 8 bytes are the @@ -910,6 +910,13 @@ pub struct IntegerInfoRecord { /// File offsets occupied by the record. pub offsets: Range, + /// Details. + pub inner: RawIntegerInfoRecord, +} + +/// Machine integer info record in [binrw] format. +#[derive(Clone, Debug, BinRead, BinWrite)] +pub struct RawIntegerInfoRecord { /// Version number. /// /// e.g. `(1,2,3)` for version 1.2.3. @@ -938,18 +945,12 @@ impl IntegerInfoRecord { pub fn parse(ext: &Extension, endian: Endian) -> Result { ext.check_size(Some(4), Some(8), "integer record")?; - let mut input = &ext.data[..]; - let data: Vec = (0..8) - .map(|_| endian.parse(read_bytes(&mut input).unwrap())) - .collect(); + let inner = + RawIntegerInfoRecord::read_options(&mut Cursor::new(ext.data.as_slice()), endian, ()) + .unwrap(); Ok(Record::IntegerInfo(IntegerInfoRecord { offsets: ext.offsets.clone(), - version: (data[0], data[1], data[2]), - machine_code: data[3], - floating_point_rep: data[4], - compression_code: data[5], - endianness: data[6], - character_code: data[7], + inner, })) } } @@ -959,20 +960,14 @@ impl FloatInfoRecord { pub fn parse(ext: &Extension, endian: Endian) -> Result { ext.check_size(Some(8), Some(3), "floating point record")?; - let mut input = &ext.data[..]; - let data: Vec = (0..3) - .map(|_| endian.parse(read_bytes(&mut input).unwrap())) - .collect(); - Ok(Record::FloatInfo(FloatInfoRecord { - sysmis: data[0], - highest: data[1], - lowest: data[2], - })) + let data = FloatInfoRecord::read_options(&mut Cursor::new(ext.data.as_slice()), endian, ()) + .unwrap(); + Ok(Record::FloatInfo(data)) } } /// A floating-point info record. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, BinRead, BinWrite)] pub struct FloatInfoRecord { /// Value used for system-missing values. pub sysmis: f64, @@ -1212,16 +1207,23 @@ impl MultipleResponseType { ) } Some((b'E', input)) => { - let (labels, input) = if let Some(rest) = input.strip_prefix(b" 1 ") { - (CategoryLabels::CountedValues, rest) + let (use_var_label_as_mrset_label, input) = if let Some(rest) = + input.strip_prefix(b" 1 ") + { + (false, rest) } else if let Some(rest) = input.strip_prefix(b" 11 ") { - (CategoryLabels::VarLabels, rest) + (true, rest) } else { return Err(MultipleResponseWarning::InvalidMultipleDichotomyLabelType.into()); }; let (value, input) = parse_counted_string(input)?; ( - MultipleResponseType::MultipleDichotomy { value, labels }, + MultipleResponseType::MultipleDichotomy { + value, + labels: CategoryLabels::CountedValues { + use_var_label_as_mrset_label, + }, + }, input, ) } diff --git a/rust/pspp/src/sys/sack.rs b/rust/pspp/src/sys/sack.rs index c6695bd1be..7a8108073d 100644 --- a/rust/pspp/src/sys/sack.rs +++ b/rust/pspp/src/sys/sack.rs @@ -552,7 +552,7 @@ impl<'a> Lexer<'a> { "i64" => Token::I64, "SYSMIS" => Token::Float(OrderedFloat(-f64::MAX)), "PCSYSMIS" => Token::PcSysmis, - "LOWEST" => Token::Float((-f64::MAX).next_up().into()), + "LOWEST" => Token::Float(f64::MIN.next_up().into()), "HIGHEST" => Token::Float(f64::MAX.into()), "ENDIAN" => Token::Integer(if self.endian == Endian::Big { 1 } else { 2 }), "COUNT" => Token::Count, diff --git a/rust/pspp/src/sys/write.rs b/rust/pspp/src/sys/write.rs index 83d5663737..a0869ead14 100644 --- a/rust/pspp/src/sys/write.rs +++ b/rust/pspp/src/sys/write.rs @@ -1,8 +1,10 @@ #![allow(dead_code, missing_docs)] use core::f64; use std::{ + borrow::Cow, collections::HashMap, - io::{Seek, Write}, + fmt::Write as _, + io::{Cursor, Seek, Write}, }; use binrw::{BinWrite, Endian, Error as BinError}; @@ -12,13 +14,22 @@ use smallvec::SmallVec; use crate::{ data::Datum, - dictionary::{Dictionary, ValueLabels, VarWidth}, - format::Format, + dictionary::{ + Alignment, Attributes, CategoryLabels, Dictionary, Measure, MultipleResponseType, + ValueLabels, VarWidth, + }, + format::{DisplayPlainF64, Format}, identifier::Identifier, output::spv::Zeros, - sys::raw::{ - records::{Compression, RawFormat, RawHeader, RawVariableRecord}, - Magic, + sys::{ + encoding::codepage_from_encoding, + raw::{ + records::{ + Compression, FloatInfoRecord, RawFormat, RawHeader, RawIntegerInfoRecord, + RawVariableRecord, + }, + Magic, + }, }, }; @@ -60,6 +71,20 @@ struct DictionaryWriter<'a, W> { dictionary: &'a Dictionary, } +fn count_segments(case_vars: &[CaseVar]) -> u32 { + case_vars.iter().map(CaseVar::n_segments).sum::() as u32 +} + +fn put_attributes(attributes: &Attributes, s: &mut String) { + for (name, values) in attributes.iter(true) { + write!(s, "{name}(").unwrap(); + for value in values { + writeln!(s, "'{value}'").unwrap(); + } + write!(s, ")").unwrap() + } +} + impl<'a, W> DictionaryWriter<'a, W> where W: Write + Seek, @@ -82,7 +107,21 @@ where pub fn write(&mut self) -> Result<(), BinError> { self.write_header()?; self.write_variables()?; - self.write_value_labels() + self.write_value_labels()?; + self.write_integer_record()?; + self.write_float_record()?; + self.write_var_sets()?; + self.write_mrsets(true)?; + self.write_variable_display_parameters()?; + self.write_long_variable_names()?; + self.write_very_long_strings()?; + self.write_long_string_value_labels()?; + self.write_long_string_missing_values()?; + self.write_data_file_attributes()?; + self.write_variable_attributes()?; + self.write_mrsets(false)?; + self.write_encoding()?; + (999u32, 0u32).write_le(self.writer) } fn write_header(&mut self) -> Result<(), BinError> { @@ -92,10 +131,6 @@ where bytes.try_into().unwrap() } - fn count_segments(case_vars: &[CaseVar]) -> u32 { - case_vars.iter().map(CaseVar::n_segments).sum::() as u32 - } - let now = Local::now(); let header = RawHeader { magic: if self.compression == Some(Compression::ZLib) { @@ -172,7 +207,7 @@ where if !variable.width.is_long_string() { if let Some(range) = variable.missing_values.range() { ( - range.low().unwrap_or(-f64::MAX), + range.low().unwrap_or(f64::MIN), range.high().unwrap_or(f64::MAX), ) .write_le(self.writer)?; @@ -264,7 +299,7 @@ where // Variable record. (4u32, variables.len() as u32, variables).write_le(self.writer)?; } - todo!() + Ok(()) } pub fn write_documents(&mut self) -> Result<(), BinError> { @@ -277,6 +312,303 @@ where } Ok(()) } + + const fn version() -> (i32, i32, i32) { + const fn parse_integer(mut s: &[u8]) -> (i32, &[u8]) { + let mut value = 0; + let mut n = 0; + while let Some((c, rest)) = s.split_first() + && *c >= b'0' + && *c <= b'9' + { + value = value * 10 + (*c - b'0') as i32; + n += 1; + s = rest; + } + assert!(n > 0); + (value, s) + } + + const fn skip_dot(s: &[u8]) -> &[u8] { + let Some((c, rest)) = s.split_first() else { + unreachable!() + }; + assert!(*c == b'.'); + rest + } + + let s = env!("CARGO_PKG_VERSION").as_bytes(); + let (first, s) = parse_integer(s); + let s = skip_dot(s); + let (second, s) = parse_integer(s); + let s = skip_dot(s); + let (third, s) = parse_integer(s); + assert!(matches!(s.first(), None | Some(b'-' | b'+'))); + (first, second, third) + } + + fn write_integer_record(&mut self) -> Result<(), BinError> { + ( + 7u32, + 3u32, + 4u32, + 8u32, + RawIntegerInfoRecord { + version: Self::version(), + machine_code: -1, + floating_point_rep: 1, + compression_code: 1, + endianness: { + // We always write files in little-endian. + 2 + }, + character_code: codepage_from_encoding(self.dictionary.encoding) as i32, + }, + ) + .write_le(self.writer) + } + + fn write_float_record(&mut self) -> Result<(), BinError> { + ( + 7u32, + 4u32, + 8u32, + 3u32, + FloatInfoRecord { + sysmis: f64::MIN, + highest: f64::MAX, + lowest: f64::MIN.next_up(), + }, + ) + .write_le(self.writer) + } + + fn write_var_sets(&mut self) -> Result<(), BinError> { + let mut s = String::new(); + for set in &self.dictionary.variable_sets { + write!(&mut s, "{}= ", set.name).unwrap(); + for (index, variable) in set.variables.iter().enumerate() { + let prefix = if index > 0 { " " } else { "" }; + write!( + &mut s, + "{prefix}{}", + self.dictionary.variables[*variable].name + ) + .unwrap(); + } + writeln!(&mut s).unwrap(); + } + self.write_string_record(5, &s) + } + + /// If `pre_v14` is true, writes only sets supported by SPSS before release + /// 14, otherwise writes sets supported only by later versions. + fn write_mrsets(&mut self, pre_v14: bool) -> Result<(), BinError> { + let mut output = Vec::new(); + for set in self + .dictionary + .mrsets + .iter() + .filter(|set| set.mr_type.supported_before_v14() == pre_v14) + { + output.extend_from_slice(&self.dictionary.encoding.encode(&set.name).0[..]); + output.push(b'='); + match &set.mr_type { + MultipleResponseType::MultipleDichotomy { datum, labels } => { + let leader = match labels { + CategoryLabels::VarLabels => b"D".as_slice(), + CategoryLabels::CountedValues { + use_var_label_as_mrset_label: true, + } => b"E 11".as_slice(), + CategoryLabels::CountedValues { + use_var_label_as_mrset_label: false, + } => b"E 1".as_slice(), + }; + output.extend_from_slice(leader); + + let mut value = match datum { + Datum::Number(Some(number)) => { + DisplayPlainF64(*number).to_string().into_bytes() + } + Datum::Number(None) => vec![b'.'], + Datum::String(raw_string) => raw_string.0.clone(), + }; + write!(&mut output, "{} ", value.len()).unwrap(); + output.append(&mut value); + } + MultipleResponseType::MultipleCategory => write!(&mut output, "C").unwrap(), + } + + let label = if set.mr_type.label_from_var_label() { + Cow::from(&[]) + } else { + self.dictionary.encoding.encode(&set.label).0 + }; + write!(&mut output, "{} ", label.len()).unwrap(); + output.extend_from_slice(&label[..]); + + for variable in set.variables.iter().copied() { + // Only lowercase ASCII characters because other characters + // might expand upon lowercasing. + let short_name = self.short_names[variable][0].as_str().to_ascii_lowercase(); + output.push(b' '); + output.extend_from_slice(&self.dictionary.encoding.encode(&short_name).0); + } + output.push(b'\n'); + } + self.write_bytes_record(if pre_v14 { 7 } else { 19 }, &output) + } + + fn write_variable_display_parameters(&mut self) -> Result<(), BinError> { + (7u32, 11u32, 4u32, count_segments(&self.case_vars) * 3).write_le(self.writer)?; + for variable in &self.dictionary.variables { + let measure = match variable.measure { + None => 0, + Some(Measure::Nominal) => 1, + Some(Measure::Ordinal) => 2, + Some(Measure::Scale) => 3, + }; + let alignment = match variable.alignment { + Alignment::Left => 0, + Alignment::Right => 1, + Alignment::Center => 2, + }; + for (index, segment) in SegmentWidths::new(variable.width).enumerate() { + let display_width = match index { + 0 => variable.display_width, + _ => segment.default_display_width(), + }; + (measure, display_width, alignment).write_le(self.writer)?; + } + } + Ok(()) + } + + fn write_long_variable_names(&mut self) -> Result<(), BinError> { + if self.version == Version::V2 { + return Ok(()); + } + + let mut s = String::new(); + for (index, variable) in self.dictionary.variables.iter().enumerate() { + if index > 0 { + s.push('\t'); + } + write!(&mut s, "{}={}", &self.short_names[index][0], variable.name).unwrap(); + } + self.write_string_record(13, &s) + } + + fn write_very_long_strings(&mut self) -> Result<(), BinError> { + let mut s = String::new(); + for (index, variable) in self.dictionary.variables.iter().enumerate() { + if variable.width.is_very_long() { + let width = variable.width.as_string_width().unwrap(); + write!(&mut s, "{}={width:05}\0\t", &self.short_names[index][0],).unwrap(); + } + } + self.write_string_record(14, &s) + } + + fn write_long_string_value_labels(&mut self) -> Result<(), BinError> { + let mut body = Vec::new(); + let mut cursor = Cursor::new(&mut body); + for variable in &self.dictionary.variables { + if variable.value_labels.is_empty() || !variable.width.is_long_string() { + break; + } + let name = self.dictionary.encoding.encode(&variable.name).0; + ( + name.len() as u32, + &name[..], + variable.width.as_string_width().unwrap() as u32, + variable.value_labels.0.len() as u32, + ) + .write_le(&mut cursor)?; + + for (value, label) in &variable.value_labels.0 { + let value = value.as_string().unwrap(); + let label = self.dictionary.encoding.encode(&label).0; + ( + value.len() as u32, + value.as_bytes(), + label.len() as u32, + &label[..], + ) + .write_le(&mut cursor)?; + } + } + self.write_bytes_record(21, &body) + } + + fn write_long_string_missing_values(&mut self) -> Result<(), BinError> { + let mut body = Vec::new(); + let mut cursor = Cursor::new(&mut body); + for variable in &self.dictionary.variables { + if variable.missing_values.is_empty() || !variable.width.is_long_string() { + break; + } + let name = self.dictionary.encoding.encode(&variable.name).0; + ( + name.len() as u32, + &name[..], + variable.missing_values.values().len() as u32, + 8u32, + ) + .write_le(&mut cursor)?; + + for value in variable.missing_values.values() { + let value = value.as_string().unwrap(); + value.0[..8].write_le(&mut cursor).unwrap(); + } + } + self.write_bytes_record(22, &body) + } + + fn write_data_file_attributes(&mut self) -> Result<(), BinError> { + if self.version != Version::V2 { + return Ok(()); + } + let mut s = String::new(); + put_attributes(&self.dictionary.attributes, &mut s); + self.write_string_record(17, &s) + } + + fn write_variable_attributes(&mut self) -> Result<(), BinError> { + if self.version != Version::V2 { + return Ok(()); + } + let mut s = String::new(); + for (index, variable) in self.dictionary.variables.iter().enumerate() { + let mut attributes = variable.attributes.clone(); + attributes.0.insert( + Identifier::new("$@Role").unwrap(), + vec![i32::from(variable.role).to_string()], + ); + + if index > 0 { + s.push('/'); + } + put_attributes(&attributes, &mut s); + } + self.write_string_record(18, &s) + } + + fn write_encoding(&mut self) -> Result<(), BinError> { + self.write_string_record(20, self.dictionary.encoding.name()) + } + + fn write_bytes_record(&mut self, subtype: u32, bytes: &[u8]) -> Result<(), BinError> { + if !bytes.is_empty() { + (7u32, subtype, 1u32, bytes.len() as u32, bytes).write_le(self.writer) + } else { + Ok(()) + } + } + + fn write_string_record(&mut self, subtype: u32, s: &str) -> Result<(), BinError> { + self.write_bytes_record(subtype, &self.dictionary.encoding.encode(&s).0) + } } #[derive(BinWrite)] @@ -357,9 +689,7 @@ impl BinWrite for Datum { _: (), ) -> binrw::BinResult<()> { match self { - Datum::Number(number) => number - .unwrap_or(-f64::MAX) - .write_options(writer, endian, ()), + Datum::Number(number) => number.unwrap_or(f64::MIN).write_options(writer, endian, ()), Datum::String(raw_string) => raw_string.0.write_options(writer, endian, ()), } } -- 2.30.2