From 00310ccf79d415db5dd63d1a701a884dac9d4dbf Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 20 Jul 2025 08:18:19 -0700 Subject: [PATCH] cleanup --- rust/pspp/src/main.rs | 2 +- rust/pspp/src/sys/write.rs | 128 ++++++++++++++----------------------- 2 files changed, 48 insertions(+), 82 deletions(-) diff --git a/rust/pspp/src/main.rs b/rust/pspp/src/main.rs index 9cb88419c3..8edd338f1e 100644 --- a/rust/pspp/src/main.rs +++ b/rust/pspp/src/main.rs @@ -116,7 +116,7 @@ struct CsvOptions { #[derive(Args, Clone, Debug)] struct SysOptions { /// How to compress data in the system file. - #[arg(long)] + #[arg(long, default_value = "simple")] compression: Option, } diff --git a/rust/pspp/src/sys/write.rs b/rust/pspp/src/sys/write.rs index 356988fa13..89901017de 100644 --- a/rust/pspp/src/sys/write.rs +++ b/rust/pspp/src/sys/write.rs @@ -803,19 +803,48 @@ impl CaseVar { pub struct Writer { compression: Option, case_vars: Vec, + inner: WriterInner, +} + +pub struct WriterInner { opcodes: Vec, data: Vec, inner: W, } +impl WriterInner +where + W: Write + Seek, +{ + fn flush_compressed(&mut self) -> Result<(), BinError> { + if !self.opcodes.is_empty() { + self.opcodes.resize(8, 0); + self.inner.write_all(&mut self.opcodes)?; + self.inner.write(&mut self.data)?; + self.opcodes.clear(); + self.data.clear(); + } + Ok(()) + } + fn put_opcode(&mut self, opcode: u8) -> Result<(), BinError> { + if self.opcodes.len() >= 8 { + self.flush_compressed()?; + } + self.opcodes.push(opcode); + Ok(()) + } +} + impl Writer { fn new(options: WriteOptions, case_vars: Vec, inner: W) -> Self { Self { compression: options.compression, case_vars, - opcodes: Vec::with_capacity(8), - data: Vec::with_capacity(64), - inner, + inner: WriterInner { + opcodes: Vec::with_capacity(8), + data: Vec::with_capacity(64), + inner, + }, } } } @@ -839,72 +868,34 @@ where .as_number() .unwrap() .unwrap_or(f64::MIN) - .write_le(&mut self.inner)?, + .write_le(&mut self.inner.inner)?, CaseVar::String { width: _, encoding } => { let mut s = datum.as_string().unwrap().as_bytes(); for segment in encoding { let data; (data, s) = s.split_at(segment.data_bytes); - (data, Pad::new(segment.padding_bytes, 0)).write_le(&mut self.inner)?; + (data, Pad::new(segment.padding_bytes, 0)).write_le(&mut self.inner.inner)?; } } } } Ok(()) } - fn flush_compressed( - opcodes: &mut Vec, - data: &mut Vec, - inner: &mut W, - ) -> Result<(), BinError> { - if !opcodes.is_empty() { - opcodes.resize(8, 0); - inner.write_all(opcodes)?; - inner.write(data)?; - opcodes.clear(); - data.clear(); - } - Ok(()) - } - fn put_opcode( - opcodes: &mut Vec, - data: &mut Vec, - inner: &mut W, - opcode: u8, - ) -> Result<(), BinError> { - if opcodes.len() >= 8 { - Self::flush_compressed(opcodes, data, inner)?; - } - opcodes.push(opcode); - Ok(()) - } fn write_case_compressed(&mut self, case: &Case) -> Result<(), BinError> { for (var, datum) in zip_eq(&self.case_vars, &case.0) { match var { CaseVar::Numeric => match datum.as_number().unwrap() { - None => { - Self::put_opcode(&mut self.opcodes, &mut self.data, &mut self.inner, 255)? - } + None => self.inner.put_opcode(255)?, Some(number) => { if number >= 1.0 - BIAS && number <= 251.0 - BIAS && number == number.trunc() { - Self::put_opcode( - &mut self.opcodes, - &mut self.data, - &mut self.inner, - (number + BIAS) as u8, - )? + self.inner.put_opcode((number + BIAS) as u8)? } else { - Self::put_opcode( - &mut self.opcodes, - &mut self.data, - &mut self.inner, - 253, - )?; - - number.write_le(&mut Cursor::new(&mut self.data)).unwrap(); + self.inner.put_opcode(253)?; + + number.write_le(&mut Cursor::new(&mut self.inner.data)).unwrap(); } } }, @@ -918,48 +909,23 @@ where let (chunks, remainder) = data.as_chunks::<8>(); for chunk in chunks { if chunk == b" " { - Self::put_opcode( - &mut self.opcodes, - &mut self.data, - &mut self.inner, - 254, - )?; + self.inner.put_opcode(254)?; } else { - Self::put_opcode( - &mut self.opcodes, - &mut self.data, - &mut self.inner, - 253, - )?; - self.data.extend_from_slice(chunk); + self.inner.put_opcode(253)?; + self.inner.data.extend_from_slice(chunk); } } if !remainder.is_empty() { if remainder.iter().all(|c| *c == b' ') { - Self::put_opcode( - &mut self.opcodes, - &mut self.data, - &mut self.inner, - 254, - )?; + self.inner.put_opcode(254)?; } else { - Self::put_opcode( - &mut self.opcodes, - &mut self.data, - &mut self.inner, - 253, - )?; - self.data.extend_from_slice(remainder); - self.data.extend(repeat_n(0, 8 - remainder.len())); + self.inner.put_opcode(253)?; + self.inner.data.extend_from_slice(remainder); + self.inner.data.extend(repeat_n(0, 8 - remainder.len())); } } for _ in 0..segment.padding_bytes / 8 { - Self::put_opcode( - &mut self.opcodes, - &mut self.data, - &mut self.inner, - 254, - )?; + self.inner.put_opcode(254)?; } } } -- 2.30.2