From 5b45e25d0313da90159c04d149cdbc339ee87613 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 31 Dec 2024 12:31:36 -0800 Subject: [PATCH] data_out first draft done --- rust/pspp/src/calendar.rs | 61 +++++++++++ rust/pspp/src/dictionary.rs | 6 +- rust/pspp/src/format.rs | 208 ++++++++++++++++++++++++++++++------ rust/pspp/src/lib.rs | 15 +-- 4 files changed, 248 insertions(+), 42 deletions(-) create mode 100644 rust/pspp/src/calendar.rs diff --git a/rust/pspp/src/calendar.rs b/rust/pspp/src/calendar.rs new file mode 100644 index 0000000000..1bbcfb616a --- /dev/null +++ b/rust/pspp/src/calendar.rs @@ -0,0 +1,61 @@ +use chrono::{Datelike, Days, NaiveDate}; + +const EPOCH: NaiveDate = NaiveDate::from_ymd_opt(1582, 10, 14).unwrap(); + +/// Takes a count of days from 14 Oct 1582 and translates it into a Gregorian +/// calendar date, if possible. Positive and negative offsets are supported. +pub fn calendar_offset_to_gregorian(offset: f64) -> Option { + let offset = offset as i64; + if offset >= 0 { + EPOCH.checked_add_days(Days::new(offset as u64)) + } else { + EPOCH.checked_sub_days(Days::new(offset as u64)) + } +} + +/// Returns the day of the year, where January 1 is day 1. +pub fn day_of_year(date: NaiveDate) -> Option { + let january1 = NaiveDate::from_ymd_opt(date.year(), 1, 1)?; + let delta = date - january1; + Some(delta.num_days() as u32 + 1) +} + +/// Returns the name for a month as a 3-character all-caps string. +pub fn short_month_name(month: u32) -> Option<&'static str> { + let name = match month { + 1 => "JAN", + 2 => "FEB", + 3 => "MAR", + 4 => "APR", + 5 => "MAY", + 6 => "JUN", + 7 => "JUL", + 8 => "AUG", + 9 => "SEP", + 10 => "OCT", + 11 => "NOV", + 12 => "DEC", + _ => return None, + }; + Some(name) +} + +/// Returns the name for a month as an all-caps string. +pub fn month_name(month: u32) -> Option<&'static str> { + let name = match month { + 1 => "JANUARY", + 2 => "FEBRUARY", + 3 => "MARCH", + 4 => "APRIL", + 5 => "MAY", + 6 => "JUNE", + 7 => "JULY", + 8 => "AUGUST", + 9 => "SEPTEMBER", + 10 => "OCTOBER", + 11 => "NOVEMBER", + 12 => "DECEMBER", + _ => return None, + }; + Some(name) +} diff --git a/rust/pspp/src/dictionary.rs b/rust/pspp/src/dictionary.rs index 9edf063ec7..353806a324 100644 --- a/rust/pspp/src/dictionary.rs +++ b/rust/pspp/src/dictionary.rs @@ -14,7 +14,7 @@ use ordered_float::OrderedFloat; use unicase::UniCase; use crate::{ - format::Format, + format::{DisplayValue, Format}, identifier::{ByIdentifier, HasIdentifier, Identifier}, raw::{Alignment, CategoryLabels, Measure, MissingValues, VarType}, }; @@ -225,6 +225,10 @@ impl Value { pub fn sysmis() -> Self { Self::Number(None) } + + pub fn display(&self, format: Format, encoding: &'static Encoding) -> DisplayValue { + DisplayValue::new(format, self, encoding) + } } impl From for Value { diff --git a/rust/pspp/src/format.rs b/rust/pspp/src/format.rs index f07ab4cbeb..4aeba72e4d 100644 --- a/rust/pspp/src/format.rs +++ b/rust/pspp/src/format.rs @@ -7,15 +7,17 @@ use std::{ sync::LazyLock, }; +use chrono::{Datelike, Local, NaiveDate}; use encoding_rs::Encoding; use enum_map::{Enum, EnumMap}; use libm::frexp; use smallstr::SmallString; -use smallvec::SmallVec; +use smallvec::{Array, SmallVec}; use thiserror::Error as ThisError; use unicode_width::UnicodeWidthStr; use crate::{ + calendar::{calendar_offset_to_gregorian, day_of_year, month_name, short_month_name}, dictionary::{Value, VarWidth}, raw::{self, VarType}, settings::Settings as PsppSettings, @@ -742,9 +744,25 @@ impl Not for Decimal { } } +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Epoch(pub i32); + +impl Default for Epoch { + fn default() -> Self { + static DEFAULT: LazyLock = LazyLock::new(|| Epoch(Local::now().year() - 69)); + *DEFAULT + } +} + +impl Display for Epoch { + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { + write!(f, "{}", self.0) + } +} + #[derive(Clone, Debug, Default)] pub struct Settings { - pub epoch: Option, + pub epoch: Epoch, /// Either `'.'` or `','`. pub decimal: Decimal, @@ -973,28 +991,34 @@ impl<'a> Display for DisplayValue<'a> { Type::PIBHex => self.pibhex(f, number), Type::RBHex => self.rbhex(f, number), - Type::Date | - Type::ADate | - Type::EDate | - Type::JDate | - Type::SDate | - Type::QYr | - Type::MoYr | - Type::WkYr | - Type::DateTime | - Type::YMDHMS | - Type::MTime | - Type::Time | - Type::DTime | - Type::WkDay => self.date(f, number), - Type::Month => todo!(), - Type::A => todo!(), - Type::AHex => todo!(), + Type::Date + | Type::ADate + | Type::EDate + | Type::JDate + | Type::SDate + | Type::QYr + | Type::MoYr + | Type::WkYr + | Type::DateTime + | Type::YMDHMS + | Type::MTime + | Type::Time + | Type::DTime + | Type::WkDay => self.date(f, number), + Type::Month => self.month(f, number), + Type::A | Type::AHex => unreachable!(), } } } impl<'a> DisplayValue<'a> { + pub fn new(format: Format, value: &'a Value, encoding: &'static Encoding) -> Self { + Self { + format, + value, + encoding, + } + } fn number(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult { if number.is_finite() { let style = PsppSettings::global() @@ -1182,8 +1206,7 @@ impl<'a> DisplayValue<'a> { // Figure out number of characters we can use for the fraction, if any. // (If that turns out to be `1`, then we'll output a decimal point // without any digits following.) - let mut fraction_width = - min(self.format.d as usize + 1, self.format.w() - width).min(16); + let mut fraction_width = min(self.format.d as usize + 1, self.format.w() - width).min(16); if self.format.type_ != Type::E && fraction_width == 1 { fraction_width = 0; } @@ -1208,13 +1231,7 @@ impl<'a> DisplayValue<'a> { // Rust always uses `.` as the decimal point. Translate to `,` if // necessary. if style.decimal == Decimal::Comma { - // SAFETY: This only changes only one ASCII character (`.`) to - // another ASCII character (`,`). - unsafe { - if let Some(dot) = output.as_bytes_mut().iter_mut().find(|c| **c == b'.') { - *dot = b','; - } - } + fix_decimal_point(&mut output); } // Make exponent have exactly three digits, plus sign. @@ -1303,14 +1320,124 @@ impl<'a> DisplayValue<'a> { } fn date(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult { - let template = self.format.type_.date_template(self.format.w()).unwrap(); - if self.format.type_.category() == Category::Date { - if number < 0.0 { - return self.missing(f); + const MINUTE: f64 = 60.0; + const HOUR: f64 = 60.0 * 60.0; + const DAY: f64 = 60.0 * 60.0 * 24.0; + + let (date, mut time) = match self.format.type_.category() { + Category::Date => { + if number < 0.0 { + return self.missing(f); + } + let Some(date) = calendar_offset_to_gregorian(number / DAY) else { + return self.missing(f); + }; + (date, number % DAY) } - + Category::Time => (NaiveDate::MIN, number), + _ => unreachable!(), + }; + + let mut output = SmallString::<[u8; 40]>::new(); + let mut template = self + .format + .type_ + .date_template(self.format.w()) + .unwrap() + .bytes() + .peekable(); + while let Some(c) = template.next() { + let mut count = 1; + while template.next_if_eq(&c).is_some() { + count += 1; + } + match c { + b'd' if count < 3 => write!(&mut output, "{:02}", date.day()).unwrap(), + b'd' => write!(&mut output, "{:03}", day_of_year(date).unwrap_or(1)).unwrap(), + b'm' if count < 3 => write!(&mut output, "{:02}", date.month()).unwrap(), + b'm' => write!(&mut output, "{}", short_month_name(date.month()).unwrap()).unwrap(), + b'y' if count >= 4 => { + let year = date.year(); + if year <= 9999 { + write!(&mut output, "{year:04}").unwrap(); + } else if self.format.type_ == Type::DateTime + || self.format.type_ == Type::YMDHMS + { + write!(&mut output, "****").unwrap(); + } else { + return self.overflow(f); + } + } + b'y' => { + let epoch = PsppSettings::global().formats.epoch.0; + let offset = date.year() - epoch; + if offset < 0 || offset > 99 { + return self.overflow(f); + } + write!(&mut output, "{offset:02}").unwrap(); + } + b'q' => write!(&mut output, "{}", date.month0() / 3 + 1).unwrap(), + b'w' => write!( + &mut output, + "{:2}", + (day_of_year(date).unwrap_or(1) - 1) / 7 + 1 + ) + .unwrap(), + b'D' => { + if time < 0.0 { + output.push('-'); + } + time = time.abs(); + write!(&mut output, "{:1$.0}", (time / DAY).floor(), count).unwrap(); + time %= DAY; + } + b'H' => { + if time < 0.0 { + output.push('-'); + } + time = time.abs(); + write!(&mut output, "{:1$.0}", (time / HOUR).floor(), count).unwrap(); + time %= HOUR; + } + b'M' => { + if time < 0.0 { + output.push('-'); + } + time = time.abs(); + write!(&mut output, "{:02.0}", (time / MINUTE).floor()).unwrap(); + time %= MINUTE; + + let excess_width = self.format.w() as isize - output.len() as isize; + if excess_width < 0 || (self.format.type_ == Type::MTime && excess_width < 3) { + return self.overflow(f); + } + if excess_width == 3 + || excess_width == 4 + || (excess_width >= 5 && self.format.d == 0) + { + write!(&mut output, ":{:02.0}", time.floor()).unwrap(); + } else if excess_width >= 5 { + let d = min(self.format.d(), excess_width as usize); + let w = d + 3; + write!(&mut output, ":{:02$.*}", d, number, w).unwrap(); + if PsppSettings::global().formats.decimal == Decimal::Comma { + fix_decimal_point(&mut output); + } + } + } + c if count == 1 => output.push(c as char), + _ => unreachable!(), + } + } + write!(f, "{:>.*}", self.format.w(), &output) + } + + fn month(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult { + if let Some(month) = month_name(number as u32) { + write!(f, "{month:.*}", self.format.w()) + } else { + self.missing(f) } - todo!() } } @@ -1574,3 +1701,16 @@ fn power256(x: u16) -> f64 { .copied() .unwrap_or_else(|| 256.0_f64.powi(x as i32)) } + +fn fix_decimal_point(s: &mut SmallString) +where + A: Array, +{ + // SAFETY: This only changes only one ASCII character (`.`) to + // another ASCII character (`,`). + unsafe { + if let Some(dot) = s.as_bytes_mut().iter_mut().find(|c| **c == b'.') { + *dot = b','; + } + } +} diff --git a/rust/pspp/src/lib.rs b/rust/pspp/src/lib.rs index ff105ecb82..3540125c81 100644 --- a/rust/pspp/src/lib.rs +++ b/rust/pspp/src/lib.rs @@ -1,18 +1,19 @@ +pub mod calendar; +pub mod command; pub mod cooked; pub mod dictionary; pub mod encoding; pub mod endian; +pub mod engine; pub mod format; pub mod identifier; +pub mod integer; +pub mod lex; pub mod locale_charset; +pub mod macros; +pub mod message; pub mod output; +pub mod prompt; pub mod raw; pub mod sack; -pub mod lex; -pub mod prompt; -pub mod message; -pub mod macros; pub mod settings; -pub mod command; -pub mod integer; -pub mod engine; -- 2.30.2