From f62de3206b9aa319ce3b237c60052371a9cf62b3 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 24 Mar 2025 10:46:53 -0700 Subject: [PATCH] continue work on binary formats --- rust/pspp/src/dictionary.rs | 5 +++ rust/pspp/src/format/mod.rs | 76 ++++++++++++++++++++++++------------- 2 files changed, 54 insertions(+), 27 deletions(-) diff --git a/rust/pspp/src/dictionary.rs b/rust/pspp/src/dictionary.rs index 7be3e9e744..3cc252692c 100644 --- a/rust/pspp/src/dictionary.rs +++ b/rust/pspp/src/dictionary.rs @@ -226,6 +226,11 @@ impl Value { Self::Number(None) } + /// Returns an object that implements [Display] for printing this `Value` as + /// `format`. `encoding` specifies this `Value`'s encoding (therefore, it + /// is used only if this is a `Value::String`). + /// + /// [Display]: std::fmt::Display pub fn display(&self, format: Format, encoding: &'static Encoding) -> DisplayValue { DisplayValue::new(format, self, encoding) } diff --git a/rust/pspp/src/format/mod.rs b/rust/pspp/src/format/mod.rs index b41fffb986..e4982f3823 100644 --- a/rust/pspp/src/format/mod.rs +++ b/rust/pspp/src/format/mod.rs @@ -9,7 +9,7 @@ use std::{ }; use chrono::{Datelike, Local, NaiveDate}; -use encoding_rs::Encoding; +use encoding_rs::{Encoding, UTF_8}; use enum_iterator::{all, Sequence}; use enum_map::{Enum, EnumMap}; use libm::frexp; @@ -1131,11 +1131,7 @@ impl<'a, 'b> Display for DisplayValue<'a, 'b> { Type::N => self.n(f, number), Type::Z => self.z(f, number), - Type::P => todo!(), - Type::PK => todo!(), - Type::IB => todo!(), - Type::PIB => todo!(), - Type::RB => todo!(), + Type::P | Type::PK | Type::IB | Type::PIB | Type::RB => self.fmt_binary(f), Type::PIBHex => self.pibhex(f, number), Type::RBHex => self.rbhex(f, number), @@ -1171,6 +1167,13 @@ impl<'a, 'b> DisplayValue<'a, 'b> { pub fn with_settings(self, settings: &'b Settings) -> Self { Self { settings, ..self } } + fn fmt_binary(&self, f: &mut Formatter) -> FmtResult { + let output = self.to_binary().unwrap(); + for b in output { + f.write_char(b as char)?; + } + Ok(()) + } fn number(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult { if number.is_finite() { let style = self.settings.number_style(self.format.type_); @@ -1215,7 +1218,7 @@ impl<'a, 'b> DisplayValue<'a, 'b> { fn missing(&self, f: &mut Formatter<'_>) -> FmtResult { match self.format.type_ { - //Type::P => return self.p(f, -f64::MAX), + Type::P | Type::PK | Type::IB | Type::PIB | Type::RB => return self.fmt_binary(f), Type::RBHex => return self.rbhex(f, -f64::MAX), _ => (), } @@ -1597,17 +1600,42 @@ impl<'a, 'b> DisplayValue<'a, 'b> { } } - pub fn write_binary(&self, w: W) -> Result + /// Writes this object to `w`. Writes binary formats ([Type::P], + /// [Type::PIB], and so on) as binary values, and writes other output + /// formats in the given `encoding`. + /// + /// If `dv` is a [DisplayValue], the difference between `write!(f, "{}", + /// dv)` and `dv.write(f, encoding)` is: + /// + /// * `write!` always outputs UTF-8. Binary formats are encoded as the + /// Unicode characters corresponding to their bytes. + /// + /// * `dv.write` outputs the desired `encoding`. Binary formats are not + /// encoded in `encoding` (and thus they might be invalid for the + /// encoding). + pub fn write(&self, mut w: W, encoding: &'static Encoding) -> Result<(), IoError> where W: IoWrite, { - let Some(number) = self.value.as_number() else { - return Ok(false); - }; + match self.to_binary() { + Some(binary) => w.write_all(&binary), + None if encoding == UTF_8 => { + write!(&mut w, "{}", self) + } + None => { + let mut temp = SmallString::<[u8; 64]>::new(); + write!(&mut temp, "{}", self).unwrap(); + w.write_all(&encoding.encode(&temp).0) + } + } + } + + fn to_binary(&self) -> Option> { + let number = self.value.as_number()?; match self.format.type_() { - Type::P => self.p(w, number).map(|()| true), - Type::PK => self.pk(w, number).map(|()| true), - _ => Ok(false), + Type::P => Some(self.p(number)), + Type::PK => Some(self.pk(number)), + _ => None, } } @@ -1642,29 +1670,23 @@ impl<'a, 'b> DisplayValue<'a, 'b> { } } - fn p(&self, mut w: W, number: Option) -> Result<(), IoError> - where - W: IoWrite, - { + fn p(&self, number: Option) -> SmallVec<[u8; 16]> { let (valid, mut output) = self.bcd(number, self.format.w() * 2 - 1); if valid && number.is_some_and(|number| number < 0.0) { *output.last_mut().unwrap() |= 0xd; } else { *output.last_mut().unwrap() |= 0xf; } - w.write_all(&*output) + output } - fn pk(&self, mut w: W, number: Option) -> Result<(), IoError> - where - W: IoWrite, - { + fn pk(&self, number: Option) -> SmallVec<[u8; 16]> { let number = match number { Some(number) if number < 0.0 => None, other => other, }; let (_valid, output) = self.bcd(number, self.format.w() * 2); - w.write_all(&*output) + output } } @@ -2283,10 +2305,10 @@ mod test { assert_eq!(tokens.get(1), Some(&Token::Punct(Punct::Colon))); let expected = tokens[2].as_string().unwrap(); let mut actual = SmallVec::<[u8; 16]>::new(); - assert!(Value::Number(value) + Value::Number(value) .display(format, UTF_8) - .write_binary(&mut actual) - .unwrap()); + .write(&mut actual, UTF_8) + .unwrap(); let mut actual_s = SmallString::<[u8; 32]>::new(); for b in actual { write!(&mut actual_s, "{:02x}", b).unwrap(); -- 2.30.2