work on writer
authorBen Pfaff <blp@cs.stanford.edu>
Sat, 19 Jul 2025 21:04:10 +0000 (14:04 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Sat, 19 Jul 2025 21:04:10 +0000 (14:04 -0700)
rust/pspp/src/main.rs
rust/pspp/src/sys/cooked.rs
rust/pspp/src/sys/mod.rs
rust/pspp/src/sys/test.rs
rust/pspp/src/sys/write.rs

index 458dd018397140966ce1e32a9b4ff927efaf233c..9cb88419c3f8ca532cb20401fc0bb56dbc51c6fa 100644 (file)
@@ -22,7 +22,7 @@ use pspp::{
     sys::{
         self,
         raw::{infer_encoding, records::Compression, Decoder, Magic, Reader, Record},
-        ReaderOptions, Records,
+        ReadOptions, Records,
     },
 };
 use std::{
@@ -126,7 +126,7 @@ impl Convert {
             eprintln!("warning: {warning}");
         }
 
-        let (dictionary, _, cases) = ReaderOptions::new()
+        let (dictionary, _, cases) = ReadOptions::new()
             .with_encoding(self.encoding)
             .with_password(self.password.clone())
             .open_file(&self.input, warn)?
index 802cd87d3deb4e1baae6a3c78f6a2b57d61e8513..11a6c59a9edf7d44afeb35c3429ffdc3e5ee8da8 100644 (file)
@@ -478,7 +478,7 @@ pub enum Error {
 
 /// Options for reading a system file.
 #[derive(Default, Clone, Debug)]
-pub struct ReaderOptions {
+pub struct ReadOptions {
     /// Character encoding for text in the system file.
     ///
     /// If not set, the character encoding will be determined from reading the
@@ -494,8 +494,8 @@ pub struct ReaderOptions {
     pub password: Option<String>,
 }
 
-impl ReaderOptions {
-    /// Construct a new `ReaderOptions` that initially does not specify an
+impl ReadOptions {
+    /// Construct a new `ReadOptions` that initially does not specify an
     /// encoding or password.
     pub fn new() -> Self {
         Self::default()
index c3549ed358c597dbd86f5d7ff84899eeae27ca27..911cc0f62f341f7bba1c8c368bdd80b81e1297b1 100644 (file)
@@ -22,7 +22,8 @@
 //! facilitate interchange between even the oldest and newest versions of
 //! software.
 //!
-//! To read a system file in the simplest way, use [ReaderOptions].
+//! Use [ReadOptions] to read a system file in the simplest way.
+//! Use [WriteOptions] to write a system file.
 
 // Warn about missing docs, but not for items declared with `#[cfg(test)]`.
 #![cfg_attr(not(test), warn(missing_docs))]
index 48e95a0f6b0b7c2aeefd95d793678c671ecdc921..a5fd40b92b6ab418e51b52bdf947e448540ee2ab 100644 (file)
@@ -29,7 +29,7 @@ use crate::{
         Details, Item, Text,
     },
     sys::{
-        cooked::ReaderOptions,
+        cooked::ReadOptions,
         raw::{self, ErrorDetails},
         sack::sack,
     },
@@ -553,7 +553,7 @@ fn encrypted_file() {
 
 #[test]
 fn encrypted_file_without_password() {
-    let error = ReaderOptions::new()
+    let error = ReadOptions::new()
         .open_file("src/crypto/testdata/test-encrypted.sav", |_| {
             panic!();
         })
@@ -615,7 +615,7 @@ where
     R: Read + Seek + 'static,
 {
     let mut warnings = Vec::new();
-    let output = match ReaderOptions::new().open_reader(sysfile, |warning| warnings.push(warning)) {
+    let output = match ReadOptions::new().open_reader(sysfile, |warning| warnings.push(warning)) {
         Ok(system_file) => {
             let (dictionary, metadata, cases) = system_file.into_parts();
             let (group, data) = metadata.to_pivot_rows();
index 53391f670bac877690d03575f01101b98d739485..2ba55c2926815f62b2d551da519d39ec6869c481 100644 (file)
@@ -1,11 +1,11 @@
-#![allow(dead_code, missing_docs)]
+#![allow(dead_code)]
 use core::f64;
 use std::{
     borrow::Cow,
     collections::HashMap,
     fmt::Write as _,
     fs::File,
-    io::{Cursor, Seek, Write},
+    io::{BufWriter, Cursor, Seek, Write},
     path::Path,
 };
 
@@ -48,6 +48,7 @@ pub enum Version {
     V3,
 }
 
+/// Options for writing a system file.
 #[derive(Copy, Clone, Debug)]
 pub struct WriteOptions {
     /// How to compress (if at all) data in the system file.
@@ -67,25 +68,36 @@ impl Default for WriteOptions {
 }
 
 impl WriteOptions {
+    /// Constructs a new set of default options.
     pub fn new() -> Self {
         Self::default()
     }
+
+    /// Returns `self` with the compression format set to `compression`.
     pub fn with_compression(self, compression: Option<Compression>) -> Self {
         Self {
             compression,
             ..self
         }
     }
+
+    /// Returns `self` with the version set to `version`.
     pub fn with_version(self, version: Version) -> Self {
         Self { version, ..self }
     }
+
+    /// Writes `dictionary` to `path` in system file format.  Returns a [Writer]
+    /// that can be used for writing cases to the new file.
     pub fn write_file(
         self,
         dictionary: &Dictionary,
         path: impl AsRef<Path>,
-    ) -> Result<Writer<File>, BinError> {
-        self.write_writer(dictionary, File::create(path)?)
+    ) -> Result<Writer<BufWriter<File>>, BinError> {
+        self.write_writer(dictionary, BufWriter::new(File::create(path)?))
     }
+
+    /// Writes `dictionary` to `writer` in system file format.  Returns a
+    /// [Writer] that can be used for writing cases to the new file.
     pub fn write_writer<W>(
         self,
         dictionary: &Dictionary,
@@ -123,6 +135,8 @@ fn put_attributes(attributes: &Attributes, s: &mut String) {
     }
 }
 
+const BIAS: f64 = 100.0;
+
 impl<'a, W> DictionaryWriter<'a, W>
 where
     W: Write + Seek,
@@ -146,6 +160,7 @@ where
         self.write_header()?;
         self.write_variables()?;
         self.write_value_labels()?;
+        self.write_documents()?;
         self.write_integer_record()?;
         self.write_float_record()?;
         self.write_var_sets()?;
@@ -196,7 +211,7 @@ where
                 0
             },
             n_cases: u32::MAX,
-            bias: 100.0,
+            bias: BIAS,
             creation_date: as_byte_array(now.format("%d %b %y").to_string()),
             creation_time: as_byte_array(now.format("%H:%M:%S").to_string()),
             file_label: as_byte_array(self.dictionary.file_label.clone().unwrap_or_default()),
@@ -345,7 +360,7 @@ where
         Ok(())
     }
 
-    pub fn write_documents(&mut self) -> Result<(), BinError> {
+    fn write_documents(&mut self) -> Result<(), BinError> {
         if !self.dictionary.documents.is_empty() {
             (6u32, self.dictionary.documents.len() as u32).write_le(self.writer)?;
             for line in &self.dictionary.documents {
@@ -787,6 +802,8 @@ impl CaseVar {
 pub struct Writer<W> {
     compression: Option<Compression>,
     case_vars: Vec<CaseVar>,
+    opcodes: Vec<u8>,
+    data: Vec<u8>,
     inner: W,
 }
 
@@ -795,6 +812,8 @@ impl<W> Writer<W> {
         Self {
             compression: options.compression,
             case_vars,
+            opcodes: Vec::with_capacity(8),
+            data: Vec::with_capacity(64),
             inner,
         }
     }
@@ -804,9 +823,11 @@ impl<W> Writer<W>
 where
     W: Write + Seek,
 {
+    /// Writes `case` to the system file.
     pub fn write_case(&mut self, case: &Case) -> Result<(), BinError> {
         match self.compression {
-            Some(_) => todo!(),
+            Some(Compression::Simple) => self.write_case_compressed(case),
+            Some(Compression::ZLib) => todo!(),
             None => self.write_case_uncompressed(case),
         }
     }
@@ -830,4 +851,66 @@ where
         }
         Ok(())
     }
+    fn flush_compressed(
+        opcodes: &mut Vec<u8>,
+        data: &mut Vec<u8>,
+        inner: &mut W,
+    ) -> Result<(), BinError> {
+        if !opcodes.is_empty() {
+            opcodes.resize(8, 0);
+            inner.write_all(opcodes)?;
+            inner.write(data)?;
+            opcodes.clear();
+            data.clear();
+        }
+        Ok(())
+    }
+    fn put_opcode(
+        opcodes: &mut Vec<u8>,
+        data: &mut Vec<u8>,
+        inner: &mut W,
+        opcode: u8,
+    ) -> Result<(), BinError> {
+        if opcodes.len() >= 8 {
+            Self::flush_compressed(opcodes, data, inner)?;
+        }
+        opcodes.push(opcode);
+        Ok(())
+    }
+    fn write_case_compressed(&mut self, case: &Case) -> Result<(), BinError> {
+        for (var, datum) in zip_eq(&self.case_vars, &case.0) {
+            match var {
+                CaseVar::Numeric => match datum.as_number().unwrap() {
+                    None => {
+                        Self::put_opcode(&mut self.opcodes, &mut self.data, &mut self.inner, 255)?
+                    }
+                    Some(number) => {
+                        if number >= 1.0 - BIAS
+                            && number <= 251.0 - BIAS
+                            && number == number.trunc()
+                        {
+                            Self::put_opcode(
+                                &mut self.opcodes,
+                                &mut self.data,
+                                &mut self.inner,
+                                (number + BIAS) as u8,
+                            )?
+                        } else {
+                            Self::put_opcode(
+                                &mut self.opcodes,
+                                &mut self.data,
+                                &mut self.inner,
+                                253,
+                            )?;
+
+                            number.write_le(&mut Cursor::new(&mut self.data)).unwrap();
+                        }
+                    }
+                },
+
+                CaseVar::String { width: _, encoding } => todo!(),
+            }
+        }
+        Ok(())
+    }
 }