From: Ben Pfaff Date: Tue, 31 Dec 2024 18:41:05 +0000 (-0800) Subject: work on formats X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d6b60518a8a855eed0a661bda952d58ab8cff933;p=pspp work on formats --- diff --git a/rust/pspp/src/format.rs b/rust/pspp/src/format.rs index cea27c3769..f07ab4cbeb 100644 --- a/rust/pspp/src/format.rs +++ b/rust/pspp/src/format.rs @@ -11,6 +11,7 @@ use encoding_rs::Encoding; use enum_map::{Enum, EnumMap}; use libm::frexp; use smallstr::SmallString; +use smallvec::SmallVec; use thiserror::Error as ThisError; use unicode_width::UnicodeWidthStr; @@ -326,6 +327,57 @@ impl Type { _ => Ok(()), } } + + /// For time and date formats, returns a template used for input and output + /// in a field of the given `width`. + /// + /// `width` only affects whether a 2-digit year or a 4-digit year is used, + /// that is, whether the returned string contains `yy` or `yyyy`, and + /// whether seconds are included, that is, whether the returned string + /// contains `:SS`. A caller that doesn't care whether the returned string + /// contains `yy` or `yyyy` or `:SS` can just specify 0 to omit them. + pub fn date_template(self, width: usize) -> Option<&'static str> { + let (short, long) = match self { + Self::F + | Self::Comma + | Self::Dot + | Self::Dollar + | Self::Pct + | Self::E + | Self::CC(_) + | Self::N + | Self::Z + | Self::P + | Self::PK + | Self::IB + | Self::PIB + | Self::PIBHex + | Self::RB + | Self::RBHex + | Self::WkDay + | Self::Month + | Self::A + | Self::AHex => return None, + Self::Date => ("dd-mmm-yy", "dd-mmm-yyyy"), + Self::ADate => ("mm/dd/yy", "mm/dd/yyyy"), + Self::EDate => ("dd.mm.yy", "dd.mm.yyyy"), + Self::JDate => ("yyddd", "yyyyddd"), + Self::SDate => ("yy/mm/dd", "yyyy/mm/dd"), + Self::QYr => ("q Q yy", "q Q yyyy"), + Self::MoYr => ("mmm yy", "mmm yyyy"), + Self::WkYr => ("ww WK yy", "ww WK yyyy"), + Self::DateTime => ("dd-mmm-yyyy HH:MM", "dd-mmm-yyyy HH:MM:SS"), + Self::YMDHMS => ("yyyy-mm-dd HH:MM", "yyyy-mm-dd HH:MM:SS"), + Self::MTime => ("MM", "MM:SS"), + Self::Time => ("HH:MM", "HH:MM:SS"), + Self::DTime => ("D HH:MM", "D HH:MM:SS"), + }; + if width >= long.len() { + Some(long) + } else { + Some(short) + } + } } impl Display for Type { @@ -447,11 +499,11 @@ impl Format { pub fn format(self) -> Type { self.type_ } - pub fn w(self) -> Width { - self.w + pub fn w(self) -> usize { + self.w as usize } - pub fn d(self) -> Decimals { - self.d + pub fn d(self) -> usize { + self.d as usize } pub fn default_for_width(var_width: VarWidth) -> Self { @@ -912,27 +964,29 @@ impl<'a> Display for DisplayValue<'a> { | Type::CC(_) => self.number(f, number), Type::N => self.n(f, number), Type::Z => self.z(f, number), + Type::P => todo!(), Type::PK => todo!(), Type::IB => todo!(), Type::PIB => todo!(), - Type::PIBHex => todo!(), Type::RB => todo!(), - Type::RBHex => todo!(), - Type::Date => todo!(), - Type::ADate => todo!(), - Type::EDate => todo!(), - Type::JDate => todo!(), - Type::SDate => todo!(), - Type::QYr => todo!(), - Type::MoYr => todo!(), - Type::WkYr => todo!(), - Type::DateTime => todo!(), - Type::YMDHMS => todo!(), - Type::MTime => todo!(), - Type::Time => todo!(), - Type::DTime => todo!(), - Type::WkDay => todo!(), + + Type::PIBHex => self.pibhex(f, number), + Type::RBHex => self.rbhex(f, number), + Type::Date | + Type::ADate | + Type::EDate | + Type::JDate | + Type::SDate | + Type::QYr | + Type::MoYr | + Type::WkYr | + Type::DateTime | + Type::YMDHMS | + Type::MTime | + Type::Time | + Type::DTime | + Type::WkDay => self.date(f, number), Type::Month => todo!(), Type::A => todo!(), Type::AHex => todo!(), @@ -946,7 +1000,7 @@ impl<'a> DisplayValue<'a> { let style = PsppSettings::global() .formats .number_style(self.format.type_); - if self.format.type_ != Type::E && number.abs() < 1.5 * power10(self.format.w) { + if self.format.type_ != Type::E && number.abs() < 1.5 * power10(self.format.w()) { let rounder = Rounder::new(style, number, self.format.d); if self.decimal(f, &rounder, style, true)? || self.scientific(f, number, style, true)? @@ -978,7 +1032,7 @@ impl<'a> DisplayValue<'a> { } else { "Unknown" }; - let w = self.format.w as usize; + let w = self.format.w(); write!(f, "{s:>0$.*}", w) } else { self.overflow(f) @@ -986,8 +1040,13 @@ impl<'a> DisplayValue<'a> { } fn missing(&self, f: &mut Formatter<'_>) -> FmtResult { - let w = self.format.w as isize; - let d = self.format.d as isize; + match self.format.type_ { + Type::RBHex => return self.rbhex(f, -f64::MAX), + _ => (), + } + + let w = self.format.w() as isize; + let d = self.format.d() as isize; let dot_position = match self.format.type_ { Type::N => w - 1, Type::Pct => w - d - 2, @@ -1032,14 +1091,14 @@ impl<'a> DisplayValue<'a> { if negative { width += style.neg_prefix.width; } - if width > self.format.w as usize { + if width > self.format.w() { continue; } // If there's room for the prefix and suffix, allocate // space. If the affixes are required, but there's no // space, give up. - let add_affixes = allocate_space(style.affix_width(), self.format.w, &mut width); + let add_affixes = allocate_space(style.affix_width(), self.format.w(), &mut width); if !add_affixes && require_affixes { continue; } @@ -1051,13 +1110,13 @@ impl<'a> DisplayValue<'a> { let grouping = style.grouping.filter(|_| { integer_digits > 3 && (self.format.d == 0 || decimals > 0) - && allocate_space((integer_digits - 1) / 3, self.format.w, &mut width) + && allocate_space((integer_digits - 1) / 3, self.format.w(), &mut width) }); // Assemble number. let magnitude = rounder.format(decimals as usize); let mut output = SmallString::<[u8; 40]>::new(); - for _ in width..self.format.w as usize { + for _ in width..self.format.w() { output.push(' '); } if negative { @@ -1090,8 +1149,8 @@ impl<'a> DisplayValue<'a> { } } - debug_assert!(output.len() >= self.format.w as usize); - debug_assert!(output.len() <= self.format.w as usize + style.extra_bytes); + debug_assert!(output.len() >= self.format.w()); + debug_assert!(output.len() <= self.format.w() + style.extra_bytes); write!(f, "{output}")?; return Ok(true); } @@ -1110,12 +1169,12 @@ impl<'a> DisplayValue<'a> { if number.is_sign_negative() { width += style.neg_prefix.width; } - if width > self.format.w as usize { + if width > self.format.w() { return Ok(false); } // Check for room for prefix and suffix. - let add_affixes = allocate_space(style.affix_width(), self.format.w, &mut width); + let add_affixes = allocate_space(style.affix_width(), self.format.w(), &mut width); if require_affixes && !add_affixes { return Ok(false); } @@ -1124,14 +1183,14 @@ impl<'a> DisplayValue<'a> { // (If that turns out to be `1`, then we'll output a decimal point // without any digits following.) let mut fraction_width = - min(self.format.d as usize + 1, self.format.w as usize - width).min(16); + min(self.format.d as usize + 1, self.format.w() - width).min(16); if self.format.type_ != Type::E && fraction_width == 1 { fraction_width = 0; } width += fraction_width; let mut output = SmallString::<[u8; 40]>::new(); - for _ in width..self.format.w as usize { + for _ in width..self.format.w() { output.push(' '); } if number.is_sign_negative() { @@ -1179,8 +1238,8 @@ impl<'a> DisplayValue<'a> { } } - debug_assert!(output.len() >= self.format.w as usize); - debug_assert!(output.len() <= self.format.w as usize + style.extra_bytes); + debug_assert!(output.len() >= self.format.w()); + debug_assert!(output.len() <= self.format.w() + style.extra_bytes); write!(f, "{output}")?; Ok(true) } @@ -1190,14 +1249,14 @@ impl<'a> DisplayValue<'a> { return self.missing(f); } - let number = number * power10(self.format.d as u16); + let number = number * power10(self.format.d()); let number = number.round().abs(); - if number >= power10(self.format.w) { + if number >= power10(self.format.w()) { return self.overflow(f); } let mut s = SmallString::<[u8; 40]>::new(); - write!(&mut s, "{number:0$.0}", self.format.w as usize).unwrap(); - if s.len() != self.format.w as usize { + write!(&mut s, "{number:0$.0}", self.format.w()).unwrap(); + if s.len() != self.format.w() { return self.overflow(f); } write!(f, "{s}") @@ -1205,16 +1264,16 @@ impl<'a> DisplayValue<'a> { fn z(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult { let negative = number < 0.0; - let number = number * power10(self.format.d as u16); + let number = number * power10(self.format.d()); let number = number.round().abs(); - if number >= power10(self.format.w) { + if number >= power10(self.format.w()) { return self.overflow(f); } let mut s = SmallString::<[u8; 40]>::new(); - write!(&mut s, "{number:0$.0}", self.format.w as usize).unwrap(); - if s.len() != self.format.w as usize { + write!(&mut s, "{number:0$.0}", self.format.w()).unwrap(); + if s.len() != self.format.w() { return self.overflow(f); } @@ -1224,9 +1283,50 @@ impl<'a> DisplayValue<'a> { } write!(f, "{s}") } + + fn pibhex(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult { + if number < 0.0 { + self.overflow(f) + } else { + let number = number.round(); + if number >= power256(self.format.w / 2) { + self.overflow(f) + } else { + let binary = integer_to_binary(number as u64, self.format.w / 2); + output_hex(f, &binary) + } + } + } + + fn rbhex(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult { + output_hex(f, &number.to_ne_bytes()) + } + + fn date(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult { + let template = self.format.type_.date_template(self.format.w()).unwrap(); + if self.format.type_.category() == Category::Date { + if number < 0.0 { + return self.missing(f); + } + + } + todo!() + } } -fn allocate_space(want: usize, capacity: u16, used: &mut usize) -> bool { +fn integer_to_binary(number: u64, width: u16) -> SmallVec<[u8; 8]> { + let bytes = (number << ((8 - width) * 8)).to_be_bytes(); + SmallVec::from_slice(&bytes[..width as usize]) +} + +fn output_hex(f: &mut Formatter<'_>, bytes: &[u8]) -> FmtResult { + for byte in bytes { + write!(f, "{byte:02X}")?; + } + Ok(()) +} + +fn allocate_space(want: usize, capacity: usize, used: &mut usize) -> bool { if *used + want <= capacity as usize { *used += want; true @@ -1444,14 +1544,33 @@ struct RounderWidth { } /// Returns `10^x`. -fn power10(x: u16) -> f64 { +fn power10(x: usize) -> f64 { const POWERS: [f64; 41] = [ 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, 1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, 1e40, ]; POWERS - .get(x as usize) + .get(x) .copied() .unwrap_or_else(|| 10.0_f64.powi(x as i32)) } + +/// Returns `256^x`. +fn power256(x: u16) -> f64 { + const POWERS: [f64; 9] = [ + 1.0, + 256.0, + 65536.0, + 16777216.0, + 4294967296.0, + 1099511627776.0, + 281474976710656.0, + 72057594037927936.0, + 18446744073709551616.0, + ]; + POWERS + .get(x as usize) + .copied() + .unwrap_or_else(|| 256.0_f64.powi(x as i32)) +}