From: Ben Pfaff Date: Tue, 31 Dec 2024 01:43:34 +0000 (-0800) Subject: data_out progress X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c2f02ace4acc27f380e884c3eb0e0cd1b1a71169;p=pspp data_out progress --- diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 2fa8a237e2..5e691e5ca2 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -634,6 +634,12 @@ version = "0.2.158" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439" +[[package]] +name = "libm" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" + [[package]] name = "linux-raw-sys" version = "0.4.14" @@ -896,11 +902,13 @@ dependencies = [ "indexmap", "lazy_static", "libc", + "libm", "num", "num-derive", "num-traits", "ordered-float", "pspp-derive", + "smallstr", "smallvec", "thiserror", "unicase", @@ -1074,6 +1082,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "smallstr" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63b1aefdf380735ff8ded0b15f31aab05daf1f70216c01c02a12926badd1df9d" +dependencies = [ + "smallvec", +] + [[package]] name = "smallvec" version = "1.13.2" diff --git a/rust/pspp/Cargo.toml b/rust/pspp/Cargo.toml index a9233a9df0..027a8440ff 100644 --- a/rust/pspp/Cargo.toml +++ b/rust/pspp/Cargo.toml @@ -31,7 +31,9 @@ flagset = "0.4.6" pspp-derive = { version = "0.1.0", path = "../pspp-derive" } either = "1.13.0" enum-iterator = "2.1.0" -smallvec = "1.13.2" +smallvec = { version = "1.13.2", features = ["const_generics", "write"] } +libm = "0.2.11" +smallstr = "0.3.0" [target.'cfg(windows)'.dependencies] windows-sys = { version = "0.48.0", features = ["Win32_Globalization"] } diff --git a/rust/pspp/src/format.rs b/rust/pspp/src/format.rs index 378f771e7b..cea27c3769 100644 --- a/rust/pspp/src/format.rs +++ b/rust/pspp/src/format.rs @@ -1,15 +1,23 @@ +use core::f64; use std::{ - fmt::{Display, Formatter, Result as FmtResult}, - ops::RangeInclusive, + cmp::min, + fmt::{Display, Error as FmtError, Formatter, Result as FmtResult, Write}, + ops::{Not, RangeInclusive}, str::FromStr, + sync::LazyLock, }; +use encoding_rs::Encoding; use enum_map::{Enum, EnumMap}; +use libm::frexp; +use smallstr::SmallString; use thiserror::Error as ThisError; +use unicode_width::UnicodeWidthStr; use crate::{ - dictionary::VarWidth, + dictionary::{Value, VarWidth}, raw::{self, VarType}, + settings::Settings as PsppSettings, }; #[derive(ThisError, Debug)] @@ -649,28 +657,146 @@ impl Display for UncheckedFormat { } } -#[derive(Clone, Debug)] +#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Enum)] +pub enum Decimal { + #[default] + Dot, + Comma, +} + +impl From for char { + fn from(value: Decimal) -> Self { + u8::from(value).into() + } +} + +impl From for u8 { + fn from(value: Decimal) -> Self { + match value { + Decimal::Dot => b'.', + Decimal::Comma => b',', + } + } +} + +impl Not for Decimal { + type Output = Self; + + fn not(self) -> Self::Output { + match self { + Self::Dot => Self::Comma, + Self::Comma => Self::Dot, + } + } +} + +#[derive(Clone, Debug, Default)] pub struct Settings { pub epoch: Option, /// Either `'.'` or `','`. - pub decimal: char, + pub decimal: Decimal, /// Format `F`, `E`, `COMMA`, and `DOT` with leading zero (e.g. `0.5` /// instead of `.5`)? - pub include_leading_zero: bool, + pub leading_zero: bool, /// Custom currency styles. pub ccs: EnumMap>, } -impl Default for Settings { - fn default() -> Self { +#[derive(Copy, Clone, Enum)] +struct StyleParams { + decimal: Decimal, + leading_zero: bool, +} +impl From<&Settings> for StyleParams { + fn from(value: &Settings) -> Self { Self { - epoch: None, - decimal: '.', - include_leading_zero: false, - ccs: Default::default(), + decimal: value.decimal, + leading_zero: value.leading_zero, + } + } +} + +struct StyleSet(EnumMap); + +impl StyleSet { + fn new(f: impl Fn(StyleParams) -> NumberStyle) -> Self { + Self(EnumMap::from_fn(f)) + } + fn get(&self, settings: &Settings) -> &NumberStyle { + &self.0[settings.into()] + } +} + +impl Settings { + fn number_style(&self, type_: Type) -> &NumberStyle { + static DEFAULT: LazyLock = + LazyLock::new(|| NumberStyle::new("", "", Decimal::Dot, None, false)); + + match type_ { + Type::F | Type::E => { + static F: LazyLock = LazyLock::new(|| { + StyleSet::new(|p| NumberStyle::new("", "", p.decimal, None, p.leading_zero)) + }); + &F.get(self) + } + Type::Comma => { + static COMMA: LazyLock = LazyLock::new(|| { + StyleSet::new(|p| { + NumberStyle::new("", "", p.decimal, Some(!p.decimal), p.leading_zero) + }) + }); + &COMMA.get(self) + } + Type::Dot => { + static DOT: LazyLock = LazyLock::new(|| { + StyleSet::new(|p| { + NumberStyle::new("", "", !p.decimal, Some(p.decimal), p.leading_zero) + }) + }); + &DOT.get(self) + } + Type::Dollar => { + static DOLLAR: LazyLock = LazyLock::new(|| { + StyleSet::new(|p| NumberStyle::new("$", "", p.decimal, Some(!p.decimal), false)) + }); + &DOLLAR.get(self) + } + Type::Pct => { + static PCT: LazyLock = LazyLock::new(|| { + StyleSet::new(|p| NumberStyle::new("", "%", p.decimal, None, false)) + }); + &PCT.get(self) + } + Type::CC(cc) => self.ccs[cc].as_ref().unwrap_or(&DEFAULT), + Type::N + | Type::Z + | Type::P + | Type::PK + | Type::IB + | Type::PIB + | Type::PIBHex + | Type::RB + | Type::RBHex + | Type::Date + | Type::ADate + | Type::EDate + | Type::JDate + | Type::SDate + | Type::QYr + | Type::MoYr + | Type::WkYr + | Type::DateTime + | Type::YMDHMS + | Type::MTime + | Type::Time + | Type::DTime + | Type::WkDay + | Type::Month + | Type::A + | Type::AHex => &DEFAULT, } } } @@ -684,14 +810,14 @@ pub struct NumberStyle { pub suffix: Affix, pub neg_suffix: Affix, - /// Decimal point: `'.'` or `','`. - pub decimal: char, + /// Decimal point. + pub decimal: Decimal, - /// Grouping character: `'.'` or `','` or `None`. - pub grouping: Option, + /// Grouping character. + pub grouping: Option, /// Format as `.5` or `0.5`? - pub include_leading_zero: bool, + pub leading_zero: bool, /// An `Affix` may require more bytes than its display width; for example, /// U+00A5 (Â¥) is 2 bytes in UTF-8 but occupies only one display column. @@ -703,6 +829,35 @@ pub struct NumberStyle { pub extra_bytes: usize, } +impl NumberStyle { + fn new( + prefix: &str, + suffix: &str, + decimal: Decimal, + grouping: Option, + leading_zero: bool, + ) -> Self { + // These assertions ensure that zero is correct for `extra_bytes`. + debug_assert!(prefix.is_ascii()); + debug_assert!(suffix.is_ascii()); + + Self { + neg_prefix: Affix::new("-"), + prefix: Affix::new(prefix), + suffix: Affix::new(suffix), + neg_suffix: Affix::new(""), + decimal, + grouping, + leading_zero, + extra_bytes: 0, + } + } + + fn affix_width(&self) -> usize { + self.prefix.width + self.suffix.width + } +} + #[derive(Clone, Debug)] pub struct Affix { /// String contents of affix. @@ -711,3 +866,592 @@ pub struct Affix { /// Display width in columns (see [unicode_width]) pub width: usize, } + +impl Affix { + fn new(s: &str) -> Self { + Self { + s: s.to_string(), + width: s.width(), + } + } +} + +pub struct DisplayValue<'a> { + format: Format, + value: &'a Value, + encoding: &'static Encoding, +} + +impl<'a> Display for DisplayValue<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { + let number = match self.value { + Value::Number(number) => *number, + Value::String(string) => { + if self.format.format() == Type::AHex { + for byte in string { + write!(f, "{byte:02x}")?; + } + } else { + write!(f, "{}", self.encoding.decode_without_bom_handling(string).0)?; + } + return Ok(()); + } + }; + + let Some(number) = number else { + return self.missing(f); + }; + + match self.format.format() { + Type::F + | Type::Comma + | Type::Dot + | Type::Dollar + | Type::Pct + | Type::E + | Type::CC(_) => self.number(f, number), + Type::N => self.n(f, number), + Type::Z => self.z(f, number), + Type::P => todo!(), + Type::PK => todo!(), + Type::IB => todo!(), + Type::PIB => todo!(), + Type::PIBHex => todo!(), + Type::RB => todo!(), + Type::RBHex => todo!(), + Type::Date => todo!(), + Type::ADate => todo!(), + Type::EDate => todo!(), + Type::JDate => todo!(), + Type::SDate => todo!(), + Type::QYr => todo!(), + Type::MoYr => todo!(), + Type::WkYr => todo!(), + Type::DateTime => todo!(), + Type::YMDHMS => todo!(), + Type::MTime => todo!(), + Type::Time => todo!(), + Type::DTime => todo!(), + Type::WkDay => todo!(), + Type::Month => todo!(), + Type::A => todo!(), + Type::AHex => todo!(), + } + } +} + +impl<'a> DisplayValue<'a> { + fn number(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult { + if number.is_finite() { + let style = PsppSettings::global() + .formats + .number_style(self.format.type_); + if self.format.type_ != Type::E && number.abs() < 1.5 * power10(self.format.w) { + let rounder = Rounder::new(style, number, self.format.d); + if self.decimal(f, &rounder, style, true)? + || self.scientific(f, number, style, true)? + || self.decimal(f, &rounder, style, false)? + { + return Ok(()); + } + } + + if !self.scientific(f, number, style, false)? { + self.overflow(f)?; + } + Ok(()) + } else { + self.infinite(f, number) + } + } + + fn infinite(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult { + if self.format.w >= 3 { + let s = if number.is_nan() { + "NaN" + } else if number.is_infinite() { + if number.is_sign_positive() { + "+Infinity" + } else { + "-Infinity" + } + } else { + "Unknown" + }; + let w = self.format.w as usize; + write!(f, "{s:>0$.*}", w) + } else { + self.overflow(f) + } + } + + fn missing(&self, f: &mut Formatter<'_>) -> FmtResult { + let w = self.format.w as isize; + let d = self.format.d as isize; + let dot_position = match self.format.type_ { + Type::N => w - 1, + Type::Pct => w - d - 2, + Type::E => w - d - 5, + _ => w - d - 1, + }; + let dot_position = dot_position.max(0) as u16; + + for i in 0..self.format.w { + if i == dot_position { + write!(f, ".")?; + } else { + write!(f, " ")?; + } + } + Ok(()) + } + + fn overflow(&self, f: &mut Formatter<'_>) -> FmtResult { + for _ in 0..self.format.w { + write!(f, "*")?; + } + Ok(()) + } + + fn decimal( + &self, + f: &mut Formatter<'_>, + rounder: &Rounder, + style: &NumberStyle, + require_affixes: bool, + ) -> Result { + for decimals in (0..=self.format.d).rev() { + // Make sure there's room for the number's magnitude, plus the + // negative suffix, plus (if negative) the negative prefix. + let RounderWidth { + mut width, + integer_digits, + negative, + } = rounder.width(decimals as usize); + width += style.neg_suffix.width; + if negative { + width += style.neg_prefix.width; + } + if width > self.format.w as usize { + continue; + } + + // If there's room for the prefix and suffix, allocate + // space. If the affixes are required, but there's no + // space, give up. + let add_affixes = allocate_space(style.affix_width(), self.format.w, &mut width); + if !add_affixes && require_affixes { + continue; + } + + // Check whether we should include grouping characters. We need + // room for a complete set or we don't insert any at all. We don't + // include grouping characters if decimal places were requested but + // they were all dropped. + let grouping = style.grouping.filter(|_| { + integer_digits > 3 + && (self.format.d == 0 || decimals > 0) + && allocate_space((integer_digits - 1) / 3, self.format.w, &mut width) + }); + + // Assemble number. + let magnitude = rounder.format(decimals as usize); + let mut output = SmallString::<[u8; 40]>::new(); + for _ in width..self.format.w as usize { + output.push(' '); + } + if negative { + output.push_str(&style.neg_prefix.s); + } + if add_affixes { + output.push_str(&style.prefix.s); + } + if let Some(grouping) = grouping { + for i in 0..integer_digits { + if i > 0 && (integer_digits - i) % 3 == 0 { + output.push(grouping.into()); + } + } + } else { + output.push_str(&magnitude[..integer_digits]); + } + if decimals > 0 { + output.push(style.decimal.into()); + output.push_str(&magnitude[integer_digits + 1..]); + } + if add_affixes { + output.push_str(&style.suffix.s); + } + if negative { + output.push_str(&style.neg_suffix.s); + } else { + for _ in 0..style.neg_suffix.width { + output.push(' '); + } + } + + debug_assert!(output.len() >= self.format.w as usize); + debug_assert!(output.len() <= self.format.w as usize + style.extra_bytes); + write!(f, "{output}")?; + return Ok(true); + } + Ok(false) + } + + fn scientific( + &self, + f: &mut Formatter<'_>, + number: f64, + style: &NumberStyle, + require_affixes: bool, + ) -> Result { + // Allocate minimum required space. + let mut width = 6 + style.neg_suffix.width; + if number.is_sign_negative() { + width += style.neg_prefix.width; + } + if width > self.format.w as usize { + return Ok(false); + } + + // Check for room for prefix and suffix. + let add_affixes = allocate_space(style.affix_width(), self.format.w, &mut width); + if require_affixes && !add_affixes { + return Ok(false); + } + + // Figure out number of characters we can use for the fraction, if any. + // (If that turns out to be `1`, then we'll output a decimal point + // without any digits following.) + let mut fraction_width = + min(self.format.d as usize + 1, self.format.w as usize - width).min(16); + if self.format.type_ != Type::E && fraction_width == 1 { + fraction_width = 0; + } + width += fraction_width; + + let mut output = SmallString::<[u8; 40]>::new(); + for _ in width..self.format.w as usize { + output.push(' '); + } + if number.is_sign_negative() { + output.push_str(&style.neg_prefix.s); + } + if add_affixes { + output.push_str(&style.prefix.s); + } + match fraction_width { + 0 => write!(&mut output, "{:.0E}", number.abs()).unwrap(), + 1 => write!(&mut output, "{:.0E}.", number.abs()).unwrap(), + _ => write!(&mut output, "{:.*E}.", fraction_width - 1, number.abs()).unwrap(), + }; + + // Rust always uses `.` as the decimal point. Translate to `,` if + // necessary. + if style.decimal == Decimal::Comma { + // SAFETY: This only changes only one ASCII character (`.`) to + // another ASCII character (`,`). + unsafe { + if let Some(dot) = output.as_bytes_mut().iter_mut().find(|c| **c == b'.') { + *dot = b','; + } + } + } + + // Make exponent have exactly three digits, plus sign. + let e = output.as_bytes().iter().position(|c| *c == b'E').unwrap(); + let exponent: isize = (&output[e + 1..]).parse().unwrap(); + if exponent.abs() > 999 { + return Ok(false); + } + output.truncate(e + 1); + write!(&mut output, "{exponent:+04}").unwrap(); + + // Add suffixes. + if add_affixes { + output.push_str(&style.suffix.s); + } + if number.is_sign_negative() { + output.push_str(&style.neg_suffix.s); + } else { + for _ in 0..style.neg_suffix.width { + output.push(' '); + } + } + + debug_assert!(output.len() >= self.format.w as usize); + debug_assert!(output.len() <= self.format.w as usize + style.extra_bytes); + write!(f, "{output}")?; + Ok(true) + } + + fn n(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult { + if !number.is_sign_positive() { + return self.missing(f); + } + + let number = number * power10(self.format.d as u16); + let number = number.round().abs(); + if number >= power10(self.format.w) { + return self.overflow(f); + } + let mut s = SmallString::<[u8; 40]>::new(); + write!(&mut s, "{number:0$.0}", self.format.w as usize).unwrap(); + if s.len() != self.format.w as usize { + return self.overflow(f); + } + write!(f, "{s}") + } + + fn z(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult { + let negative = number < 0.0; + let number = number * power10(self.format.d as u16); + let number = number.round().abs(); + + if number >= power10(self.format.w) { + return self.overflow(f); + } + + let mut s = SmallString::<[u8; 40]>::new(); + write!(&mut s, "{number:0$.0}", self.format.w as usize).unwrap(); + if s.len() != self.format.w as usize { + return self.overflow(f); + } + + if negative && !s.bytes().all(|c| c == b'0') { + let last = s.pop().unwrap().to_digit(10).unwrap() as usize; + s.push(b"}JKLMNOPQR"[last] as char); + } + write!(f, "{s}") + } +} + +fn allocate_space(want: usize, capacity: u16, used: &mut usize) -> bool { + if *used + want <= capacity as usize { + *used += want; + true + } else { + false + } +} + +/// A representation of a number that can be quickly rounded to any desired +/// number of decimal places (up to a specified maximum). +struct Rounder { + /// Magnitude of number with excess precision. + string: SmallString<[u8; 40]>, + + /// Number of digits before decimal point. + integer_digits: usize, + + /// Number of `9`s or `.`s at start of string. + leading_nines: usize, + + /// Number of `0`s or `.`s at start of string. + leading_zeros: usize, + + /// Is the number negative? + negative: bool, +} + +impl Rounder { + fn new(style: &NumberStyle, number: f64, max_decimals: u8) -> Self { + debug_assert!(number.abs() < 1e41); + debug_assert!((0..=16).contains(&max_decimals)); + + let mut string = SmallString::new(); + if max_decimals == 0 { + // Fast path. No rounding needed. + // + // We append `.00` to the integer representation because + // [Self::round_up] assumes that fractional digits are present. + write!(&mut string, "{:.0}.00", number.round().abs()).unwrap() + } else { + // Slow path. + // + // This is more difficult than it really should be because we have + // to make sure that numbers that are exactly halfway between two + // representations are always rounded away from zero. This is not + // what format! normally does (usually it rounds to even), so we + // have to fake it as best we can, by formatting with extra + // precision and then doing the rounding ourselves. + // + // We take up to two rounds to format numbers. In the first round, + // we obtain 2 digits of precision beyond those requested by the + // user. If those digits are exactly "50", then in a second round + // we format with as many digits as are significant in a "double". + // + // It might be better to directly implement our own floating-point + // formatting routine instead of relying on the system's sprintf + // implementation. But the classic Steele and White paper on + // printing floating-point numbers does not hint how to do what we + // want, and it's not obvious how to change their algorithms to do + // so. It would also be a lot of work. + write!( + &mut string, + "{:.*}", + max_decimals as usize + 2, + number.abs() + ) + .unwrap(); + if string.ends_with("50") { + let (_sig, binary_exponent) = frexp(number); + let decimal_exponent = binary_exponent * 3 / 10; + let format_decimals = (f64::DIGITS as i32 + 1) - decimal_exponent; + if format_decimals > max_decimals as i32 + 2 { + string.clear(); + write!(&mut string, "{:.*}", format_decimals as usize, number.abs()).unwrap(); + } + } + }; + + if !style.leading_zero && string.starts_with("0") { + string.remove(0); + } + let leading_zeros = string.bytes().filter(|c| *c == b'0' || *c == b'.').count(); + let leading_nines = string.bytes().filter(|c| *c == b'9' || *c == b'.').count(); + let integer_digits = string.bytes().filter(u8::is_ascii_digit).count(); + let negative = number.is_sign_negative(); + Self { + string, + integer_digits, + leading_nines, + leading_zeros, + negative, + } + } + + /// Returns a [RounderWdith] for formatting the magnitude to `decimals` + /// decimal places. `decimals` must be in `0..=16`. + fn width(&self, decimals: usize) -> RounderWidth { + // Calculate base measures. + let mut width = self.integer_digits; + if decimals > 0 { + width += decimals + 1; + } + let mut integer_digits = self.integer_digits; + let mut negative = self.negative; + + // Rounding can cause adjustments. + if self.should_round_up(decimals) { + // Rounding up leading `9s` adds a new digit (a `1`). + if self.leading_nines >= width { + width += 1; + integer_digits += 1; + } + } else { + // Rounding down. + if self.leading_zeros >= width { + // All digits that remain after rounding are zeros. Therefore + // we drop the negative sign. + negative = false; + if self.integer_digits == 0 && decimals == 0 { + // No digits at all are left. We need to display + // at least a single digit (a zero). + debug_assert_eq!(width, 0); + width += 1; + integer_digits = 1; + } + } + } + RounderWidth { + width, + integer_digits, + negative, + } + } + + /// Returns true if the number should be rounded up when chopped off at + /// `decimals` decimal places, false if it should be rounded down. + fn should_round_up(&self, decimals: usize) -> bool { + let digit = self.string.as_bytes()[self.integer_digits + decimals + 1]; + debug_assert!(digit.is_ascii_digit()); + digit >= b'5' + } + + /// Formats the number, rounding to `decimals` decimal places. Exactly as + /// many characters as indicated by [Self::width(decimals)] are written. + fn format(&self, decimals: usize) -> SmallString<[u8; 40]> { + let mut output = SmallString::new(); + let mut base_width = self.integer_digits; + if decimals > 0 { + base_width += decimals + 1; + } + + if self.should_round_up(decimals) { + if self.leading_nines < base_width { + // Rounding up. This is the common case where rounding up + // doesn't add an extra digit. + output.push_str(&self.string[..base_width]); + + // SAFETY: This loop only changes ASCII characters to other + // ASCII characters. + unsafe { + for c in output.as_bytes_mut().iter_mut().rev() { + match *c { + b'9' => *c = b'0', + b'0'..=b'8' => { + *c += 1; + break; + } + b'.' => (), + _ => unreachable!(), + } + } + } + } else { + // Rounding up leading 9s causes the result to be a 1 followed + // by a number of 0s, plus a decimal point. + output.push('1'); + for _ in 0..self.integer_digits { + output.push('0'); + } + if decimals > 0 { + output.push('.'); + for _ in 0..decimals { + output.push('0'); + } + } + debug_assert_eq!(output.len(), base_width + 1); + } + } else { + // Rounding down. + if self.integer_digits != 0 || decimals != 0 { + // Common case: just copy the digits. + output.push_str(&self.string); + } else { + // No digits remain. The output is just a zero. + output.push('0'); + } + } + output + } +} + +struct RounderWidth { + /// Number of characters required to format the number to a specified number + /// of decimal places. This includes integer digits and a decimal point and + /// fractional digits, if any, but it does not include any negative prefix + /// or suffix or other affixes. + width: usize, + + /// Number of digits before the decimal point, between 0 and 40. + integer_digits: usize, + + /// True if the number is negative and its rounded representation would + /// include at least one nonzero digit. + negative: bool, +} + +/// Returns `10^x`. +fn power10(x: u16) -> f64 { + const POWERS: [f64; 41] = [ + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, + 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, 1e30, 1e31, + 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, 1e40, + ]; + POWERS + .get(x as usize) + .copied() + .unwrap_or_else(|| 10.0_f64.powi(x as i32)) +} diff --git a/rust/pspp/src/output/pivot/mod.rs b/rust/pspp/src/output/pivot/mod.rs index 1ec18be1a1..dbbef36f20 100644 --- a/rust/pspp/src/output/pivot/mod.rs +++ b/rust/pspp/src/output/pivot/mod.rs @@ -57,7 +57,8 @@ use std::{ collections::HashMap, - ops::{Index, Range}, + fmt::Display, + ops::{Index, Not, Range}, sync::{Arc, OnceLock, Weak}, }; @@ -66,7 +67,10 @@ use enum_iterator::Sequence; use enum_map::{enum_map, Enum, EnumMap}; use smallvec::{smallvec, SmallVec}; -use crate::format::{Format, Settings as FormatSettings}; +use crate::{ + format::{Format, Settings as FormatSettings}, + settings::{Settings, Show}, +}; pub mod output; @@ -682,14 +686,16 @@ impl Stroke { } /// An axis of a 2-dimensional table. -#[derive(Copy, Clone, Debug, Enum)] +#[derive(Copy, Clone, Debug, Enum, PartialEq, Eq)] pub enum Axis2 { X, Y, } -impl Axis2 { - pub fn opposite(self) -> Self { +impl Not for Axis2 { + type Output = Self; + + fn not(self) -> Self::Output { match self { Self::X => Self::Y, Self::Y => Self::X, @@ -698,6 +704,7 @@ impl Axis2 { } /// A 2-dimensional `(x,y)` pair. +#[derive(Copy, Clone, Debug, Default)] pub struct Coord2(pub EnumMap); impl Coord2 { @@ -736,6 +743,39 @@ impl Index for Coord2 { } } +#[derive(Clone, Debug, Default)] +pub struct Rect2(pub EnumMap>); + +impl Rect2 { + fn new(x_range: Range, y_range: Range) -> Self { + Self(enum_map! { + Axis2::X => x_range.clone(), + Axis2::Y => y_range.clone(), + }) + } + fn for_ranges((a, a_range): (Axis2, Range), b_range: Range) -> Self { + let b = !a; + let mut ranges = EnumMap::default(); + ranges[a] = a_range; + ranges[b] = b_range; + Self(ranges) + } +} + +impl From>> for Rect2 { + fn from(value: EnumMap>) -> Self { + Self(value) + } +} + +impl Index for Rect2 { + type Output = Range; + + fn index(&self, index: Axis2) -> &Self::Output { + &self.0[index] + } +} + #[derive(Copy, Clone, Debug, Default)] pub enum FootnoteMarkerType { /// a, b, c, ... @@ -770,9 +810,9 @@ pub struct PivotTable { show_caption: bool, - show_value: Option, + show_values: Option, - show_variables: Option, + show_variables: Option, weight_format: Format, @@ -821,7 +861,7 @@ impl PivotTable { show_grid_lines: false, show_title: true, show_caption: true, - show_value: None, + show_values: None, show_variables: None, weight_format: Format::F40, current_layer: Vec::new(), @@ -885,19 +925,6 @@ impl PivotTable { } } -/// Whether to show variable or value labels or the underlying value or variable name. -#[derive(Copy, Clone, Debug)] -pub enum ValueShow { - /// Value or variable name only. - Value, - - /// Label only. - Label, - - /// Value and label. - Both, -} - #[derive(Clone, Debug)] pub struct Footnote { content: Value, @@ -925,17 +952,17 @@ pub struct Footnote { /// [F]: crate::format::Format::F /// [Pct]: crate::format::Format::Pct /// -/// 2. A numeric or string value obtained from data (PIVOT_VALUE_NUMERIC or -/// PIVOT_VALUE_STRING). If such a value corresponds to a variable, then the +/// 2. A numeric or string value obtained from data ([ValueInner::Number] or +/// [ValueInner::String]). If such a value corresponds to a variable, then the /// variable's name can be attached to the pivot_value. If the value has a /// value label, then that can also be attached. When a label is present, /// the user can control whether to show the value or the label or both. /// -/// 3. A variable name (PIVOT_VALUE_VARIABLE). The variable label, if any, can +/// 3. A variable name ([ValueInner::Variable]). The variable label, if any, can /// be attached too, and again the user can control whether to show the value /// or the label or both. /// -/// 4. A text string (PIVOT_VALUE_TEXT). The value stores the string in English +/// 4. A text string ([ValueInner::Text). The value stores the string in English /// and translated into the output language (localized). Use /// pivot_value_new_text() or pivot_value_new_text_format() for those cases. /// In some cases, only an English or a localized version is available for @@ -950,10 +977,79 @@ pub struct Value { inner: ValueInner, } +pub struct DisplayValue<'a, 'b> { + value: &'a Value, + table: &'b PivotTable, +} + +fn interpret_show( + global_show: impl Fn() -> Show, + table_show: Option, + value_show: Option, + has_label: bool, +) -> Show { + if !has_label { + Show::Value + } else { + value_show.or(table_show).unwrap_or_else(global_show) + } +} + +impl<'a, 'b> Display for DisplayValue<'a, 'b> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match &self.value.inner { + ValueInner::Number { + show, + format, + honor_small, + value, + var_name, + value_label, + } => { + let show = interpret_show( + || Settings::global().show_values, + self.table.show_values, + *show, + value_label.is_some(), + ); + todo!() + } + ValueInner::String { + show, + hex, + s, + var_name, + value_label, + } => todo!(), + ValueInner::Variable { + show, + var_name, + value_label, + } => todo!(), + ValueInner::Text { + user_provided, + local, + c, + id, + } => todo!(), + ValueInner::Template { args, local, id } => todo!(), + } + } +} + +impl Value { + // Returns an object that will format this value, including subscripts and + // superscripts and footnotes. Settings on `table` control whether variable + // and value labels are included. + fn display<'a, 'b>(&'a self, table: &'b PivotTable) -> DisplayValue<'a, 'b> { + DisplayValue { value: self, table } + } +} + #[derive(Clone, Debug)] pub enum ValueInner { Number { - show: ValueShow, + show: Option, format: Format, honor_small: bool, value: f64, @@ -961,14 +1057,14 @@ pub enum ValueInner { value_label: Option, }, String { - show: ValueShow, + show: Option, hex: bool, s: Option, var_name: Option, value_label: Option, }, Variable { - show: ValueShow, + show: Option, var_name: Option, value_label: Option, }, diff --git a/rust/pspp/src/output/pivot/output.rs b/rust/pspp/src/output/pivot/output.rs index f9d4dc2f23..62183b091e 100644 --- a/rust/pspp/src/output/pivot/output.rs +++ b/rust/pspp/src/output/pivot/output.rs @@ -1,20 +1,19 @@ -use std::sync::Arc; +use std::{ops::Range, sync::Arc}; use enum_map::{enum_map, EnumMap}; use smallvec::{SmallVec, ToSmallVec}; -use crate::output::table::Table; +use crate::output::table::{CellInner, Table}; use super::{ - Axis, Axis2, Axis3, Border, BorderStyle, Category, CategoryTrait, Color, Coord2, Dimension, - PivotTable, Stroke, + Area, Axis, Axis2, Axis3, Border, BorderStyle, BoxBorder, Category, CategoryTrait, Color, + Coord2, Dimension, PivotTable, Rect2, RowColBorder, Stroke, Value, }; /// All of the combinations of dimensions along an axis. struct AxisEnumeration { indexes: Vec, stride: usize, - position: usize, } impl AxisEnumeration { @@ -30,15 +29,29 @@ impl AxisEnumeration { let start = self.stride * index; &self.indexes[start..start + self.stride] } + + fn iter(&self) -> AxisEnumerationIter { + AxisEnumerationIter { + enumeration: &self, + position: 0, + } + } } -impl Iterator for AxisEnumeration { +struct AxisEnumerationIter<'a> { + enumeration: &'a AxisEnumeration, + position: usize, +} + +impl<'a> Iterator for AxisEnumerationIter<'a> { type Item = SmallVec<[usize; 4]>; fn next(&mut self) -> Option { - if self.position < self.indexes.len() { - let item = (&self.indexes[self.position..self.position + self.stride]).to_smallvec(); - self.position += self.stride; + if self.position < self.enumeration.indexes.len() { + let item = (&self.enumeration.indexes + [self.position..self.position + self.enumeration.stride]) + .to_smallvec(); + self.position += self.enumeration.stride; Some(item) } else { None @@ -100,10 +113,24 @@ impl PivotTable { AxisEnumeration { indexes, stride: axis.dimensions.len().max(1), - position: 0, } } + fn create_aux_table(&self, n: Coord2) -> Table { + Table::new( + n, + Coord2::new(0, 0), + self.look.areas.clone(), + self.borders(false), + ) + } + + fn borders(&self, printing: bool) -> EnumMap { + EnumMap::from_fn(|border| { + resolve_border_style(border, &self.look.borders, printing && self.show_grid_lines) + }) + } + pub fn output(&self, layer_indexes: &[usize], printing: bool) { let column_enumeration = self.enumerate_axis(Axis3::X, layer_indexes, self.look.omit_empty); let row_enumeration = self.enumerate_axis(Axis3::Y, layer_indexes, self.look.omit_empty); @@ -112,11 +139,119 @@ impl PivotTable { self.axes[Axis3::Y].label_depth, self.axes[Axis3::X].label_depth, ); - let borders = EnumMap::from_fn(|border| { - resolve_border_style(border, &self.look.borders, printing && self.show_grid_lines) - }); let n = EnumMap::from_fn(|axis| data[axis] + stub[axis]).into(); - let table = Table::new(n, stub, self.look.areas.clone(), borders); + let mut body = Table::new(n, stub, self.look.areas.clone(), self.borders(printing)); + compose_headings( + &mut body, + &self.axes[Axis3::X], + Axis2::X, + &self.axes[Axis3::Y], + &column_enumeration, + RowColBorder::ColHorz, + RowColBorder::ColVert, + self.rotate_outer_row_labels, + false, + Area::ColumnLabels, + ); + compose_headings( + &mut body, + &self.axes[Axis3::Y], + Axis2::Y, + &self.axes[Axis3::X], + &row_enumeration, + RowColBorder::RowVert, + RowColBorder::RowHorz, + false, + self.rotate_inner_column_labels, + Area::RowLabels, + ); + + for (y, row_indexes) in row_enumeration.iter().enumerate() { + let y = y + stub[Axis2::Y]; + for (x, column_indexes) in column_enumeration.iter().enumerate() { + let x = x + stub[Axis2::X]; + let presentation_indexes = enum_map! { + Axis3::X => &column_indexes, + Axis3::Y => &row_indexes, + Axis3::Z => layer_indexes, + }; + let data_indexes = self.convert_indexes_ptod(presentation_indexes); + let value = self.get(&data_indexes); + body.put( + Rect2::new(x..x + 1, y..y + 1), + CellInner { + rotate: false, + area: Area::Data, + value: value.cloned(), + }, + ); + } + } + + if (self.corner_text.is_some() || self.look.row_labels_in_corner) + && stub.x() > 0 + && stub.y() > 0 + { + body.put( + Rect2::new(0..stub.x(), 0..stub.y()), + CellInner { + rotate: false, + area: Area::Corner, + value: self.corner_text.clone(), + }, + ); + } + + if body.n.x() > 0 && body.n.y() > 0 { + body.h_line(Border::InnerFrame(BoxBorder::Top), 0..body.n.x(), 0); + body.h_line( + Border::InnerFrame(BoxBorder::Bottom), + 0..body.n.x(), + body.n.y(), + ); + body.v_line(Border::InnerFrame(BoxBorder::Left), 0, 0..body.n.y()); + body.v_line( + Border::InnerFrame(BoxBorder::Right), + body.n.x(), + 0..body.n.y(), + ); + + if stub.x() > 0 { + body.h_line(Border::DataTop, 0..body.n.x(), stub.y()); + body.v_line(Border::DataLeft, stub.x(), 0..body.n.y()); + } + } + + // Title. + let title = if self.title.is_some() && self.show_title { + let mut title = self.create_aux_table(Coord2::new(0, 0)); + title.put( + Rect2::new(0..1, 0..1), + CellInner { + rotate: false, + area: Area::Title, + value: self.title.clone(), + }, + ); + Some(title) + } else { + None + }; + + // Layers. + let n_layers: usize = self.nonempty_layer_dimensions().count(); +/* + let layers = if n_layers > 0 { + let mut layers = self.create_aux_table(Coord2::new(1, n_layers)); + for (y, dimension) in self.nonempty_layer_dimensions().enumerate() { + + } + todo!() + } else { None };*/ + } + + fn nonempty_layer_dimensions(&self) -> impl Iterator { + self.axes[Axis3::Z].dimensions.iter().rev().filter(|d| !d.data_leaves.is_empty()) } } @@ -135,6 +270,28 @@ fn find_category<'a>( Some(c) } +fn fill_cell( + table: &mut Table, + h: Axis2, + h_range: Range, + v_range: Range, + area: Area, + value: &Value, + rotate: bool, +) { + let mut region = EnumMap::default(); + region[h] = h_range; + region[!h] = v_range; + table.put( + region.into(), + CellInner { + rotate, + area, + value: Some(value.clone()), + }, + ); +} + /// Fills row or column headings into T. /// /// This function uses terminology and variable names for column headings, but @@ -148,14 +305,13 @@ fn compose_headings( h: Axis2, v_axis: &Axis, column_enumeration: &AxisEnumeration, - dim_col_horz: Border, - dim_col_vert: Border, - cat_col_horz: Border, - cat_col_vert: Border, + col_horz: RowColBorder, + col_vert: RowColBorder, rotate_inner_labels: bool, rotate_outer_labels: bool, + area: Area, ) { - let v = h.opposite(); + let v = !h; let v_size = h_axis.label_depth; let h_ofs = v_axis.label_depth; let n_columns = column_enumeration.len(); @@ -221,6 +377,7 @@ fn compose_headings( let mut vrules = vec![false; n_columns + 1]; vrules[0] = true; vrules[n_columns] = true; + for dim_index in (0..h_axis.dimensions.len()).rev() { let d = &h_axis.dimensions[dim_index]; if d.hide_all_labels { @@ -259,13 +416,15 @@ fn compose_headings( let is_outer_row = y1 == 0; let is_inner_row = y2 == v_size; if c.show_label() { - let bb = enum_map! { - Axis2::X => x1 + h_ofs..x2 + h_ofs + 1, - Axis2::Y => y1..y2 - 1, - }; - let rotate = (rotate_inner_labels && is_inner_row) - || (rotate_outer_labels && is_outer_row); - // fill_cell + table.put( + Rect2::for_ranges((h, x1 + h_ofs..x2 + h_ofs), y1..y2), + CellInner { + rotate: (rotate_inner_labels && is_inner_row) + || (rotate_outer_labels && is_outer_row), + area, + value: Some(c.name().clone()), + }, + ); // Draw all the vertical lines in our running example, other // than the far left and far right ones. Only the ones that @@ -284,17 +443,17 @@ fn compose_headings( // |aaaa1#aaaa2#aaaa3|aaaa1#aaaa2#aaaa3|aaaa1#aaaa2#aaaa3| // +-----+-----+-----+-----+-----+-----+-----+-----+-----+ // ``` - let style = if y1 == v_size - 1 { - cat_col_vert + let border = if y1 == v_size - 1 { + Border::Categories(col_vert) } else { - dim_col_vert + Border::Dimensions(col_vert) }; if !vrules[x2] { - // draw line + table.draw_line(border, (v, x2 + h_ofs), y1..table.n[v]); vrules[x2] = true; } if !vrules[x1] { - // draw line + table.draw_line(border, (v, x1 + h_ofs), y1..table.n[v]); vrules[x1] = true; } @@ -316,14 +475,25 @@ fn compose_headings( // +-----+-----+-----+-----+-----+-----+-----+-----+-----+ // ``` if c.parent().is_some_and(|parent| parent.show_label) { - // draw line + table.draw_line( + Border::Categories(col_horz), + (h, top_row), + h_ofs..table.n[h], + ); } x1 = x2; } } if d.root.show_label_in_corner && h_ofs > 0 { - // fill cell + table.put( + Rect2::for_ranges((h, 0..h_ofs), top_row..top_row + d.label_depth), + CellInner { + rotate: false, + area: Area::Corner, + value: Some(d.root.name.clone()), + }, + ); } // Draw the horizontal line between dimensions, e.g. the `=====` @@ -341,7 +511,11 @@ fn compose_headings( // +-----+-----+-----+-----+-----+-----+-----+-----+-----+ // ``` if dim_index != h_axis.dimensions.len() - 1 { - // draw lines + table.draw_line( + Border::Dimensions(col_horz), + (h, top_row), + h_ofs..table.n[h], + ); } top_row += d.label_depth; } diff --git a/rust/pspp/src/output/table.rs b/rust/pspp/src/output/table.rs index 5f9366bd2a..53f0e29a90 100644 --- a/rust/pspp/src/output/table.rs +++ b/rust/pspp/src/output/table.rs @@ -16,7 +16,9 @@ use enum_map::{enum_map, EnumMap}; use crate::output::pivot::Coord2; -use super::pivot::{Area, AreaStyle, Axis2, Border, BorderStyle, CellStyle, FontStyle, Value}; +use super::pivot::{ + Area, AreaStyle, Axis2, Border, BorderStyle, CellStyle, FontStyle, Rect2, Value, +}; #[derive(Clone)] pub enum Content { @@ -30,13 +32,13 @@ pub struct Cell { inner: CellInner, /// Occupied table region. - region: EnumMap>, + region: Rect2, font_style: Option>, cell_style: Option>, } impl Cell { - fn new(inner: CellInner, region: EnumMap>) -> Self { + fn new(inner: CellInner, region: Rect2) -> Self { Self { inner, region, @@ -49,32 +51,32 @@ impl Cell { #[derive(Clone)] pub struct CellInner { /// Rotate cell contents 90 degrees? - rotate: bool, + pub rotate: bool, - /// An index into `styles` in the `Table`. - style_idx: u8, + /// The area that the cell belongs to. + pub area: Area, - value: Value, + pub value: Option, } /// A table. pub struct Table { /// Number of rows and columns. - n: Coord2, + pub n: Coord2, /// Table header rows and columns. - headers: Coord2, + pub headers: Coord2, - contents: Vec, + pub contents: Vec, /// Styles for areas of the table. - areas: EnumMap, + pub areas: EnumMap, /// Styles for borders in the table. - borders: EnumMap, + pub borders: EnumMap, /// Horizontal and vertical rules. - rules: EnumMap>, + pub rules: EnumMap>, } impl Table { @@ -84,17 +86,15 @@ impl Table { areas: EnumMap, borders: EnumMap, ) -> Self { - let nr = n[Axis2::Y]; - let nc = n[Axis2::X]; Self { n, headers, - contents: vec![Content::Empty; nr * nc], + contents: vec![Content::Empty; n.y() * n.x()], areas, borders, rules: enum_map! { - Axis2::X => vec![Border::Title; (nr + 1) * nc], - Axis2::Y => vec![Border::Title; nr * (nc + 1)], + Axis2::X => vec![Border::Title; (n.y() + 1) * n.x()], + Axis2::Y => vec![Border::Title; n.y() * (n.x() + 1)], }, } } @@ -103,7 +103,7 @@ impl Table { pos.x() + self.n.x() * pos.y() } - pub fn put(&mut self, region: EnumMap>, inner: CellInner) { + pub fn put(&mut self, region: Rect2, inner: CellInner) { use Axis2::*; if region[X].len() == 1 && region[Y].len() == 1 { let offset = self.offset(Coord2::new(region[X].start, region[Y].start)); @@ -118,4 +118,34 @@ impl Table { } } } + + pub fn h_line(&mut self, border: Border, x: Range, y: usize) { + debug_assert!(y <= self.n.y()); + debug_assert!(x.start <= x.end); + debug_assert!(x.end <= self.n.x()); + for x in x { + self.rules[Axis2::X][x + self.n.x() * y] = border; + } + } + + pub fn v_line(&mut self, border: Border, x: usize, y: Range) { + debug_assert!(x <= self.n.x()); + debug_assert!(y.start <= y.end); + debug_assert!(y.end <= self.n.y()); + for y in y { + self.rules[Axis2::Y][x + (self.n.x() + 1) * y] = border; + } + } + + pub fn draw_line( + &mut self, + border: Border, + (a, a_value): (Axis2, usize), + b_range: Range, + ) { + match a { + Axis2::X => self.h_line(border, b_range, a_value), + Axis2::Y => self.v_line(border, a_value, b_range), + } + } } diff --git a/rust/pspp/src/settings.rs b/rust/pspp/src/settings.rs index b48420aefa..93b8df2e41 100644 --- a/rust/pspp/src/settings.rs +++ b/rust/pspp/src/settings.rs @@ -8,6 +8,31 @@ use crate::{ message::Severity, }; +/// Whether to show variable or value labels or the underlying value or variable +/// name. +#[derive(Copy, Clone, Debug, Default, PartialEq, Eq)] +pub enum Show { + /// Value or variable name only. + Value, + + /// Label only. + #[default] + Label, + + /// Value (or variable name) and label. + Both, +} + +impl Show { + pub fn show_value(&self) -> bool { + *self != Self::Label + } + + pub fn show_label(&self) -> bool { + *self != Self::Value + } +} + pub struct Settings { pub input_integer_format: Endian, pub input_float_format: Endian, @@ -40,6 +65,8 @@ pub struct Settings { pub syntax: Compatibility, pub formats: FormatSettings, pub small: f64, + pub show_values: Show, + pub show_variables: Show, } impl Default for Settings { @@ -73,6 +100,8 @@ impl Default for Settings { syntax: Compatibility::default(), formats: FormatSettings::default(), small: 0.0001, + show_values: Show::default(), + show_variables: Show::default(), } } }