bulk of sav writer
authorBen Pfaff <blp@cs.stanford.edu>
Sat, 19 Jul 2025 17:29:25 +0000 (10:29 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Sat, 19 Jul 2025 17:29:25 +0000 (10:29 -0700)
rust/pspp/build.rs
rust/pspp/src/dictionary.rs
rust/pspp/src/output/spv.rs
rust/pspp/src/sys/cooked.rs
rust/pspp/src/sys/encoding.rs
rust/pspp/src/sys/raw.rs
rust/pspp/src/sys/raw/records.rs
rust/pspp/src/sys/sack.rs
rust/pspp/src/sys/write.rs

index 05353803775f2f4e696556d8b8ce51b752e644da..13e9534d16e105b3380f9bfdcbb179e108834151 100644 (file)
@@ -188,6 +188,20 @@ fn main() -> AnyResult<()> {
     }
     process_converter(&converter, &mut codepages);
 
+    for (codepage, source, name) in [
+        (20932, Source::Codepage, "EUC-JP"),
+        (50220, Source::Codepage, "ISO-2022-JP"),
+        (28600, Source::Windows, "ISO-8859-10"),
+        (28604, Source::Windows, "ISO-8859-14"),
+        (28606, Source::Windows, "ISO-8859-16"),
+        (99998, Source::Codepage, "replacement"),
+        (99999, Source::Codepage, "x-user-defined"),
+    ] {
+        assert!(codepages
+            .insert(codepage, [(source, vec![name])].into_iter().collect())
+            .is_none());
+    }
+
     let output_file_name = Path::new(&var_os("OUT_DIR").unwrap()).join("encodings.rs");
 
     write_output(&codepages, &output_file_name)
index 4a4e9f9d9e6458486c2006c31069893248017efe..ee5b15f0e756c2e4f806e68c5d2c9fad698a4ef2 100644 (file)
@@ -1074,6 +1074,19 @@ impl TryFrom<i32> for Role {
     }
 }
 
+impl From<Role> for i32 {
+    fn from(value: Role) -> Self {
+        match value {
+            Role::Input => 0,
+            Role::Target => 1,
+            Role::Both => 2,
+            Role::None => 3,
+            Role::Partition => 4,
+            Role::Split => 5,
+        }
+    }
+}
+
 #[derive(Clone, Debug, Default, PartialEq, Eq)]
 pub struct Attributes(pub BTreeMap<Identifier, Vec<String>>);
 
@@ -1354,6 +1367,37 @@ pub enum MultipleResponseType {
     MultipleCategory,
 }
 
+impl MultipleResponseType {
+    pub fn supported_before_v14(&self) -> bool {
+        match self {
+            MultipleResponseType::MultipleDichotomy {
+                labels: CategoryLabels::CountedValues { .. },
+                datum: _,
+            } => false,
+            _ => true,
+        }
+    }
+
+    pub fn label_from_var_label(&self) -> bool {
+        match self {
+            MultipleResponseType::MultipleDichotomy {
+                labels:
+                    CategoryLabels::CountedValues {
+                        use_var_label_as_mrset_label: true,
+                    },
+                ..
+            } => true,
+            _ => false,
+        }
+    }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum CategoryLabels {
+    VarLabels,
+    CountedValues { use_var_label_as_mrset_label: bool },
+}
+
 #[derive(Clone, Debug)]
 pub struct VariableSet {
     pub name: String,
@@ -1683,12 +1727,6 @@ impl Measure {
     }
 }
 
-#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub enum CategoryLabels {
-    VarLabels,
-    CountedValues,
-}
-
 #[cfg(test)]
 mod test {
     use std::collections::HashSet;
index 43ed553d6dc09ee258489faf51cab5f9e4125efd..378a598e44ef6b35af5cf8cd8d9ddb3c90874366 100644 (file)
@@ -1175,7 +1175,7 @@ impl BinWrite for Value {
                         2u8,
                         ValueMod::new(self),
                         format,
-                        number.value.unwrap_or(-f64::MAX),
+                        number.value.unwrap_or(f64::MIN),
                         SpvString::optional(&number.var_name),
                         SpvString::optional(&number.value_label),
                         Show::as_spv(&number.show),
@@ -1186,7 +1186,7 @@ impl BinWrite for Value {
                         1u8,
                         ValueMod::new(self),
                         format,
-                        number.value.unwrap_or(-f64::MAX),
+                        number.value.unwrap_or(f64::MIN),
                     )
                         .write_options(writer, endian, args)?;
                 }
index 9c5b7e0ab547c31a6abd5fc5c50dea80eee8472f..05dc637fb36af05faa0373da9dc164b6d026c01d 100644 (file)
@@ -792,7 +792,7 @@ impl Records {
             .collect();
 
         if let Some(integer_info) = self.integer_info.first() {
-            let floating_point_rep = integer_info.floating_point_rep;
+            let floating_point_rep = integer_info.inner.floating_point_rep;
             if floating_point_rep != 1 {
                 warn(Error::UnexpectedFloatFormat(floating_point_rep))
             }
@@ -801,7 +801,7 @@ impl Records {
                 Endian::Big => 1,
                 Endian::Little => 2,
             };
-            let actual = integer_info.endianness;
+            let actual = integer_info.inner.endianness;
             if actual != expected {
                 warn(Error::UnexpectedEndianess { actual, expected });
             }
@@ -834,7 +834,7 @@ impl Records {
                 && self
                     .integer_info
                     .get(0)
-                    .is_none_or(|info| info.version.0 != 13)
+                    .is_none_or(|info| info.inner.version.0 != 13)
             {
                 warn(Error::WrongVariablePositions {
                     actual: n_vars,
@@ -1417,7 +1417,7 @@ impl Metadata {
                 .or_else(|| header.n_cases.map(|n| n as u64)),
             product,
             product_ext: headers.product_info.first().map(|pe| fix_line_ends(&pe.0)),
-            version: headers.integer_info.first().map(|ii| ii.version),
+            version: headers.integer_info.first().map(|ii| ii.inner.version),
         }
     }
 }
index f4fecbab62cd856cbefece2411e74c62690a9ff5..0a2bffcf9b466d36c82c8c43df21757ade4f2890 100644 (file)
@@ -20,18 +20,24 @@ use std::sync::LazyLock;
 
 use crate::locale_charset::locale_charset;
 use encoding_rs::{Encoding, UTF_8};
+use thiserror::Error as ThisError;
 
 include!(concat!(env!("OUT_DIR"), "/encodings.rs"));
 
 /// Returns the code page number corresponding to `encoding`, or `None` if
 /// unknown.
-pub fn codepage_from_encoding(encoding: &str) -> Option<u32> {
+pub fn codepage_from_encoding_name(encoding: &str) -> Option<u32> {
     CODEPAGE_NAME_TO_NUMBER
         .get(encoding.to_ascii_lowercase().as_str())
         .copied()
 }
 
-use thiserror::Error as ThisError;
+/// Returns the code page number for `encoding`.
+pub fn codepage_from_encoding(encoding: &'static Encoding) -> u32 {
+    // This `unwrap()` is tested against all the actual [Encoding]s in a
+    // #[test].
+    codepage_from_encoding_name(encoding.name()).unwrap()
+}
 
 /// An error or warning related to encodings.
 #[derive(Clone, ThisError, Debug, PartialEq, Eq)]
@@ -104,3 +110,53 @@ pub fn get_encoding(
 
     Encoding::for_label(label.as_bytes()).ok_or(Error::UnknownEncoding(label.into()))
 }
+
+#[cfg(test)]
+mod tests {
+    use crate::sys::encoding::codepage_from_encoding;
+
+    /// Test that every `Encoding` has a codepage.
+    #[test]
+    fn codepages() {
+        codepage_from_encoding(&encoding_rs::BIG5);
+        codepage_from_encoding(&encoding_rs::EUC_JP);
+        codepage_from_encoding(&encoding_rs::EUC_KR);
+        codepage_from_encoding(&encoding_rs::GB18030);
+        codepage_from_encoding(&encoding_rs::GBK);
+        codepage_from_encoding(&encoding_rs::IBM866);
+        codepage_from_encoding(&encoding_rs::ISO_2022_JP);
+        codepage_from_encoding(&encoding_rs::ISO_8859_2);
+        codepage_from_encoding(&encoding_rs::ISO_8859_3);
+        codepage_from_encoding(&encoding_rs::ISO_8859_4);
+        codepage_from_encoding(&encoding_rs::ISO_8859_5);
+        codepage_from_encoding(&encoding_rs::ISO_8859_6);
+        codepage_from_encoding(&encoding_rs::ISO_8859_7);
+        codepage_from_encoding(&encoding_rs::ISO_8859_8);
+        codepage_from_encoding(&encoding_rs::ISO_8859_8_I);
+        codepage_from_encoding(&encoding_rs::ISO_8859_10);
+        codepage_from_encoding(&encoding_rs::ISO_8859_13);
+        codepage_from_encoding(&encoding_rs::ISO_8859_14);
+        codepage_from_encoding(&encoding_rs::ISO_8859_15);
+        codepage_from_encoding(&encoding_rs::ISO_8859_16);
+        codepage_from_encoding(&encoding_rs::KOI8_R);
+        codepage_from_encoding(&encoding_rs::KOI8_U);
+        codepage_from_encoding(&encoding_rs::MACINTOSH);
+        codepage_from_encoding(&encoding_rs::REPLACEMENT);
+        codepage_from_encoding(&encoding_rs::SHIFT_JIS);
+        codepage_from_encoding(&encoding_rs::UTF_8);
+        codepage_from_encoding(&encoding_rs::UTF_16BE);
+        codepage_from_encoding(&encoding_rs::UTF_16LE);
+        codepage_from_encoding(&encoding_rs::WINDOWS_874);
+        codepage_from_encoding(&encoding_rs::WINDOWS_1250);
+        codepage_from_encoding(&encoding_rs::WINDOWS_1251);
+        codepage_from_encoding(&encoding_rs::WINDOWS_1252);
+        codepage_from_encoding(&encoding_rs::WINDOWS_1253);
+        codepage_from_encoding(&encoding_rs::WINDOWS_1254);
+        codepage_from_encoding(&encoding_rs::WINDOWS_1255);
+        codepage_from_encoding(&encoding_rs::WINDOWS_1256);
+        codepage_from_encoding(&encoding_rs::WINDOWS_1257);
+        codepage_from_encoding(&encoding_rs::WINDOWS_1258);
+        codepage_from_encoding(&encoding_rs::X_MAC_CYRILLIC);
+        codepage_from_encoding(&encoding_rs::X_USER_DEFINED);
+    }
+}
index dca752f034eef228802bf9ae7675db9e2dd41282..23dd35d94b7b1598ba3acae210140606c0086a8c 100644 (file)
@@ -808,7 +808,7 @@ pub fn infer_encoding(
     let character_code = records
         .iter()
         .filter_map(|record| match record {
-            Record::IntegerInfo(record) => Some(record.character_code),
+            Record::IntegerInfo(record) => Some(record.inner.character_code),
             _ => None,
         })
         .next();
index 606bb4ef21cf5bba44967b1ca9289dee0634c3e2..a246f137c3ad51f4d3529c675b613ef10facdb9d 100644 (file)
@@ -254,7 +254,7 @@ impl FileHeader<RawString> {
     }
 }
 
-/// [Format](crate::format::Format) as represented in a system file.
+/// [Format] as represented in a system file.
 #[derive(Copy, Clone, PartialEq, Eq, Hash, BinRead, BinWrite)]
 pub struct RawFormat(
     /// The most-significant 16 bits are the type, the next 8 bytes are the
@@ -910,6 +910,13 @@ pub struct IntegerInfoRecord {
     /// File offsets occupied by the record.
     pub offsets: Range<u64>,
 
+    /// Details.
+    pub inner: RawIntegerInfoRecord,
+}
+
+/// Machine integer info record in [binrw] format.
+#[derive(Clone, Debug, BinRead, BinWrite)]
+pub struct RawIntegerInfoRecord {
     /// Version number.
     ///
     /// e.g. `(1,2,3)` for version 1.2.3.
@@ -938,18 +945,12 @@ impl IntegerInfoRecord {
     pub fn parse(ext: &Extension, endian: Endian) -> Result<Record, WarningDetails> {
         ext.check_size(Some(4), Some(8), "integer record")?;
 
-        let mut input = &ext.data[..];
-        let data: Vec<i32> = (0..8)
-            .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
-            .collect();
+        let inner =
+            RawIntegerInfoRecord::read_options(&mut Cursor::new(ext.data.as_slice()), endian, ())
+                .unwrap();
         Ok(Record::IntegerInfo(IntegerInfoRecord {
             offsets: ext.offsets.clone(),
-            version: (data[0], data[1], data[2]),
-            machine_code: data[3],
-            floating_point_rep: data[4],
-            compression_code: data[5],
-            endianness: data[6],
-            character_code: data[7],
+            inner,
         }))
     }
 }
@@ -959,20 +960,14 @@ impl FloatInfoRecord {
     pub fn parse(ext: &Extension, endian: Endian) -> Result<Record, WarningDetails> {
         ext.check_size(Some(8), Some(3), "floating point record")?;
 
-        let mut input = &ext.data[..];
-        let data: Vec<f64> = (0..3)
-            .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
-            .collect();
-        Ok(Record::FloatInfo(FloatInfoRecord {
-            sysmis: data[0],
-            highest: data[1],
-            lowest: data[2],
-        }))
+        let data = FloatInfoRecord::read_options(&mut Cursor::new(ext.data.as_slice()), endian, ())
+            .unwrap();
+        Ok(Record::FloatInfo(data))
     }
 }
 
 /// A floating-point info record.
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, BinRead, BinWrite)]
 pub struct FloatInfoRecord {
     /// Value used for system-missing values.
     pub sysmis: f64,
@@ -1212,16 +1207,23 @@ impl MultipleResponseType {
                 )
             }
             Some((b'E', input)) => {
-                let (labels, input) = if let Some(rest) = input.strip_prefix(b" 1 ") {
-                    (CategoryLabels::CountedValues, rest)
+                let (use_var_label_as_mrset_label, input) = if let Some(rest) =
+                    input.strip_prefix(b" 1 ")
+                {
+                    (false, rest)
                 } else if let Some(rest) = input.strip_prefix(b" 11 ") {
-                    (CategoryLabels::VarLabels, rest)
+                    (true, rest)
                 } else {
                     return Err(MultipleResponseWarning::InvalidMultipleDichotomyLabelType.into());
                 };
                 let (value, input) = parse_counted_string(input)?;
                 (
-                    MultipleResponseType::MultipleDichotomy { value, labels },
+                    MultipleResponseType::MultipleDichotomy {
+                        value,
+                        labels: CategoryLabels::CountedValues {
+                            use_var_label_as_mrset_label,
+                        },
+                    },
                     input,
                 )
             }
index c6695bd1bef026e3f5dd2ccb7313a75c48a0cfc7..7a8108073d9413fe19de3bb138150bccb8e0cb56 100644 (file)
@@ -552,7 +552,7 @@ impl<'a> Lexer<'a> {
                         "i64" => Token::I64,
                         "SYSMIS" => Token::Float(OrderedFloat(-f64::MAX)),
                         "PCSYSMIS" => Token::PcSysmis,
-                        "LOWEST" => Token::Float((-f64::MAX).next_up().into()),
+                        "LOWEST" => Token::Float(f64::MIN.next_up().into()),
                         "HIGHEST" => Token::Float(f64::MAX.into()),
                         "ENDIAN" => Token::Integer(if self.endian == Endian::Big { 1 } else { 2 }),
                         "COUNT" => Token::Count,
index 83d5663737f1fc0b9c900e055da5012f4bdb251f..a0869ead14bce748d2a7d8c5b44d142068fef217 100644 (file)
@@ -1,8 +1,10 @@
 #![allow(dead_code, missing_docs)]
 use core::f64;
 use std::{
+    borrow::Cow,
     collections::HashMap,
-    io::{Seek, Write},
+    fmt::Write as _,
+    io::{Cursor, Seek, Write},
 };
 
 use binrw::{BinWrite, Endian, Error as BinError};
@@ -12,13 +14,22 @@ use smallvec::SmallVec;
 
 use crate::{
     data::Datum,
-    dictionary::{Dictionary, ValueLabels, VarWidth},
-    format::Format,
+    dictionary::{
+        Alignment, Attributes, CategoryLabels, Dictionary, Measure, MultipleResponseType,
+        ValueLabels, VarWidth,
+    },
+    format::{DisplayPlainF64, Format},
     identifier::Identifier,
     output::spv::Zeros,
-    sys::raw::{
-        records::{Compression, RawFormat, RawHeader, RawVariableRecord},
-        Magic,
+    sys::{
+        encoding::codepage_from_encoding,
+        raw::{
+            records::{
+                Compression, FloatInfoRecord, RawFormat, RawHeader, RawIntegerInfoRecord,
+                RawVariableRecord,
+            },
+            Magic,
+        },
     },
 };
 
@@ -60,6 +71,20 @@ struct DictionaryWriter<'a, W> {
     dictionary: &'a Dictionary,
 }
 
+fn count_segments(case_vars: &[CaseVar]) -> u32 {
+    case_vars.iter().map(CaseVar::n_segments).sum::<usize>() as u32
+}
+
+fn put_attributes(attributes: &Attributes, s: &mut String) {
+    for (name, values) in attributes.iter(true) {
+        write!(s, "{name}(").unwrap();
+        for value in values {
+            writeln!(s, "'{value}'").unwrap();
+        }
+        write!(s, ")").unwrap()
+    }
+}
+
 impl<'a, W> DictionaryWriter<'a, W>
 where
     W: Write + Seek,
@@ -82,7 +107,21 @@ where
     pub fn write(&mut self) -> Result<(), BinError> {
         self.write_header()?;
         self.write_variables()?;
-        self.write_value_labels()
+        self.write_value_labels()?;
+        self.write_integer_record()?;
+        self.write_float_record()?;
+        self.write_var_sets()?;
+        self.write_mrsets(true)?;
+        self.write_variable_display_parameters()?;
+        self.write_long_variable_names()?;
+        self.write_very_long_strings()?;
+        self.write_long_string_value_labels()?;
+        self.write_long_string_missing_values()?;
+        self.write_data_file_attributes()?;
+        self.write_variable_attributes()?;
+        self.write_mrsets(false)?;
+        self.write_encoding()?;
+        (999u32, 0u32).write_le(self.writer)
     }
 
     fn write_header(&mut self) -> Result<(), BinError> {
@@ -92,10 +131,6 @@ where
             bytes.try_into().unwrap()
         }
 
-        fn count_segments(case_vars: &[CaseVar]) -> u32 {
-            case_vars.iter().map(CaseVar::n_segments).sum::<usize>() as u32
-        }
-
         let now = Local::now();
         let header = RawHeader {
             magic: if self.compression == Some(Compression::ZLib) {
@@ -172,7 +207,7 @@ where
             if !variable.width.is_long_string() {
                 if let Some(range) = variable.missing_values.range() {
                     (
-                        range.low().unwrap_or(-f64::MAX),
+                        range.low().unwrap_or(f64::MIN),
                         range.high().unwrap_or(f64::MAX),
                     )
                         .write_le(self.writer)?;
@@ -264,7 +299,7 @@ where
             // Variable record.
             (4u32, variables.len() as u32, variables).write_le(self.writer)?;
         }
-        todo!()
+        Ok(())
     }
 
     pub fn write_documents(&mut self) -> Result<(), BinError> {
@@ -277,6 +312,303 @@ where
         }
         Ok(())
     }
+
+    const fn version() -> (i32, i32, i32) {
+        const fn parse_integer(mut s: &[u8]) -> (i32, &[u8]) {
+            let mut value = 0;
+            let mut n = 0;
+            while let Some((c, rest)) = s.split_first()
+                && *c >= b'0'
+                && *c <= b'9'
+            {
+                value = value * 10 + (*c - b'0') as i32;
+                n += 1;
+                s = rest;
+            }
+            assert!(n > 0);
+            (value, s)
+        }
+
+        const fn skip_dot(s: &[u8]) -> &[u8] {
+            let Some((c, rest)) = s.split_first() else {
+                unreachable!()
+            };
+            assert!(*c == b'.');
+            rest
+        }
+
+        let s = env!("CARGO_PKG_VERSION").as_bytes();
+        let (first, s) = parse_integer(s);
+        let s = skip_dot(s);
+        let (second, s) = parse_integer(s);
+        let s = skip_dot(s);
+        let (third, s) = parse_integer(s);
+        assert!(matches!(s.first(), None | Some(b'-' | b'+')));
+        (first, second, third)
+    }
+
+    fn write_integer_record(&mut self) -> Result<(), BinError> {
+        (
+            7u32,
+            3u32,
+            4u32,
+            8u32,
+            RawIntegerInfoRecord {
+                version: Self::version(),
+                machine_code: -1,
+                floating_point_rep: 1,
+                compression_code: 1,
+                endianness: {
+                    // We always write files in little-endian.
+                    2
+                },
+                character_code: codepage_from_encoding(self.dictionary.encoding) as i32,
+            },
+        )
+            .write_le(self.writer)
+    }
+
+    fn write_float_record(&mut self) -> Result<(), BinError> {
+        (
+            7u32,
+            4u32,
+            8u32,
+            3u32,
+            FloatInfoRecord {
+                sysmis: f64::MIN,
+                highest: f64::MAX,
+                lowest: f64::MIN.next_up(),
+            },
+        )
+            .write_le(self.writer)
+    }
+
+    fn write_var_sets(&mut self) -> Result<(), BinError> {
+        let mut s = String::new();
+        for set in &self.dictionary.variable_sets {
+            write!(&mut s, "{}= ", set.name).unwrap();
+            for (index, variable) in set.variables.iter().enumerate() {
+                let prefix = if index > 0 { " " } else { "" };
+                write!(
+                    &mut s,
+                    "{prefix}{}",
+                    self.dictionary.variables[*variable].name
+                )
+                .unwrap();
+            }
+            writeln!(&mut s).unwrap();
+        }
+        self.write_string_record(5, &s)
+    }
+
+    /// If `pre_v14` is true, writes only sets supported by SPSS before release
+    /// 14, otherwise writes sets supported only by later versions.
+    fn write_mrsets(&mut self, pre_v14: bool) -> Result<(), BinError> {
+        let mut output = Vec::new();
+        for set in self
+            .dictionary
+            .mrsets
+            .iter()
+            .filter(|set| set.mr_type.supported_before_v14() == pre_v14)
+        {
+            output.extend_from_slice(&self.dictionary.encoding.encode(&set.name).0[..]);
+            output.push(b'=');
+            match &set.mr_type {
+                MultipleResponseType::MultipleDichotomy { datum, labels } => {
+                    let leader = match labels {
+                        CategoryLabels::VarLabels => b"D".as_slice(),
+                        CategoryLabels::CountedValues {
+                            use_var_label_as_mrset_label: true,
+                        } => b"E 11".as_slice(),
+                        CategoryLabels::CountedValues {
+                            use_var_label_as_mrset_label: false,
+                        } => b"E 1".as_slice(),
+                    };
+                    output.extend_from_slice(leader);
+
+                    let mut value = match datum {
+                        Datum::Number(Some(number)) => {
+                            DisplayPlainF64(*number).to_string().into_bytes()
+                        }
+                        Datum::Number(None) => vec![b'.'],
+                        Datum::String(raw_string) => raw_string.0.clone(),
+                    };
+                    write!(&mut output, "{} ", value.len()).unwrap();
+                    output.append(&mut value);
+                }
+                MultipleResponseType::MultipleCategory => write!(&mut output, "C").unwrap(),
+            }
+
+            let label = if set.mr_type.label_from_var_label() {
+                Cow::from(&[])
+            } else {
+                self.dictionary.encoding.encode(&set.label).0
+            };
+            write!(&mut output, "{} ", label.len()).unwrap();
+            output.extend_from_slice(&label[..]);
+
+            for variable in set.variables.iter().copied() {
+                // Only lowercase ASCII characters because other characters
+                // might expand upon lowercasing.
+                let short_name = self.short_names[variable][0].as_str().to_ascii_lowercase();
+                output.push(b' ');
+                output.extend_from_slice(&self.dictionary.encoding.encode(&short_name).0);
+            }
+            output.push(b'\n');
+        }
+        self.write_bytes_record(if pre_v14 { 7 } else { 19 }, &output)
+    }
+
+    fn write_variable_display_parameters(&mut self) -> Result<(), BinError> {
+        (7u32, 11u32, 4u32, count_segments(&self.case_vars) * 3).write_le(self.writer)?;
+        for variable in &self.dictionary.variables {
+            let measure = match variable.measure {
+                None => 0,
+                Some(Measure::Nominal) => 1,
+                Some(Measure::Ordinal) => 2,
+                Some(Measure::Scale) => 3,
+            };
+            let alignment = match variable.alignment {
+                Alignment::Left => 0,
+                Alignment::Right => 1,
+                Alignment::Center => 2,
+            };
+            for (index, segment) in SegmentWidths::new(variable.width).enumerate() {
+                let display_width = match index {
+                    0 => variable.display_width,
+                    _ => segment.default_display_width(),
+                };
+                (measure, display_width, alignment).write_le(self.writer)?;
+            }
+        }
+        Ok(())
+    }
+
+    fn write_long_variable_names(&mut self) -> Result<(), BinError> {
+        if self.version == Version::V2 {
+            return Ok(());
+        }
+
+        let mut s = String::new();
+        for (index, variable) in self.dictionary.variables.iter().enumerate() {
+            if index > 0 {
+                s.push('\t');
+            }
+            write!(&mut s, "{}={}", &self.short_names[index][0], variable.name).unwrap();
+        }
+        self.write_string_record(13, &s)
+    }
+
+    fn write_very_long_strings(&mut self) -> Result<(), BinError> {
+        let mut s = String::new();
+        for (index, variable) in self.dictionary.variables.iter().enumerate() {
+            if variable.width.is_very_long() {
+                let width = variable.width.as_string_width().unwrap();
+                write!(&mut s, "{}={width:05}\0\t", &self.short_names[index][0],).unwrap();
+            }
+        }
+        self.write_string_record(14, &s)
+    }
+
+    fn write_long_string_value_labels(&mut self) -> Result<(), BinError> {
+        let mut body = Vec::new();
+        let mut cursor = Cursor::new(&mut body);
+        for variable in &self.dictionary.variables {
+            if variable.value_labels.is_empty() || !variable.width.is_long_string() {
+                break;
+            }
+            let name = self.dictionary.encoding.encode(&variable.name).0;
+            (
+                name.len() as u32,
+                &name[..],
+                variable.width.as_string_width().unwrap() as u32,
+                variable.value_labels.0.len() as u32,
+            )
+                .write_le(&mut cursor)?;
+
+            for (value, label) in &variable.value_labels.0 {
+                let value = value.as_string().unwrap();
+                let label = self.dictionary.encoding.encode(&label).0;
+                (
+                    value.len() as u32,
+                    value.as_bytes(),
+                    label.len() as u32,
+                    &label[..],
+                )
+                    .write_le(&mut cursor)?;
+            }
+        }
+        self.write_bytes_record(21, &body)
+    }
+
+    fn write_long_string_missing_values(&mut self) -> Result<(), BinError> {
+        let mut body = Vec::new();
+        let mut cursor = Cursor::new(&mut body);
+        for variable in &self.dictionary.variables {
+            if variable.missing_values.is_empty() || !variable.width.is_long_string() {
+                break;
+            }
+            let name = self.dictionary.encoding.encode(&variable.name).0;
+            (
+                name.len() as u32,
+                &name[..],
+                variable.missing_values.values().len() as u32,
+                8u32,
+            )
+                .write_le(&mut cursor)?;
+
+            for value in variable.missing_values.values() {
+                let value = value.as_string().unwrap();
+                value.0[..8].write_le(&mut cursor).unwrap();
+            }
+        }
+        self.write_bytes_record(22, &body)
+    }
+
+    fn write_data_file_attributes(&mut self) -> Result<(), BinError> {
+        if self.version != Version::V2 {
+            return Ok(());
+        }
+        let mut s = String::new();
+        put_attributes(&self.dictionary.attributes, &mut s);
+        self.write_string_record(17, &s)
+    }
+
+    fn write_variable_attributes(&mut self) -> Result<(), BinError> {
+        if self.version != Version::V2 {
+            return Ok(());
+        }
+        let mut s = String::new();
+        for (index, variable) in self.dictionary.variables.iter().enumerate() {
+            let mut attributes = variable.attributes.clone();
+            attributes.0.insert(
+                Identifier::new("$@Role").unwrap(),
+                vec![i32::from(variable.role).to_string()],
+            );
+
+            if index > 0 {
+                s.push('/');
+            }
+            put_attributes(&attributes, &mut s);
+        }
+        self.write_string_record(18, &s)
+    }
+
+    fn write_encoding(&mut self) -> Result<(), BinError> {
+        self.write_string_record(20, self.dictionary.encoding.name())
+    }
+
+    fn write_bytes_record(&mut self, subtype: u32, bytes: &[u8]) -> Result<(), BinError> {
+        if !bytes.is_empty() {
+            (7u32, subtype, 1u32, bytes.len() as u32, bytes).write_le(self.writer)
+        } else {
+            Ok(())
+        }
+    }
+
+    fn write_string_record(&mut self, subtype: u32, s: &str) -> Result<(), BinError> {
+        self.write_bytes_record(subtype, &self.dictionary.encoding.encode(&s).0)
+    }
 }
 
 #[derive(BinWrite)]
@@ -357,9 +689,7 @@ impl BinWrite for Datum {
         _: (),
     ) -> binrw::BinResult<()> {
         match self {
-            Datum::Number(number) => number
-                .unwrap_or(-f64::MAX)
-                .write_options(writer, endian, ()),
+            Datum::Number(number) => number.unwrap_or(f64::MIN).write_options(writer, endian, ()),
             Datum::String(raw_string) => raw_string.0.write_options(writer, endian, ()),
         }
     }