data_out first draft done
authorBen Pfaff <blp@cs.stanford.edu>
Tue, 31 Dec 2024 20:31:36 +0000 (12:31 -0800)
committerBen Pfaff <blp@cs.stanford.edu>
Tue, 31 Dec 2024 20:31:36 +0000 (12:31 -0800)
rust/pspp/src/calendar.rs [new file with mode: 0644]
rust/pspp/src/dictionary.rs
rust/pspp/src/format.rs
rust/pspp/src/lib.rs

diff --git a/rust/pspp/src/calendar.rs b/rust/pspp/src/calendar.rs
new file mode 100644 (file)
index 0000000..1bbcfb6
--- /dev/null
@@ -0,0 +1,61 @@
+use chrono::{Datelike, Days, NaiveDate};
+
+const EPOCH: NaiveDate = NaiveDate::from_ymd_opt(1582, 10, 14).unwrap();
+
+/// Takes a count of days from 14 Oct 1582 and translates it into a Gregorian
+/// calendar date, if possible.  Positive and negative offsets are supported.
+pub fn calendar_offset_to_gregorian(offset: f64) -> Option<NaiveDate> {
+    let offset = offset as i64;
+    if offset >= 0 {
+        EPOCH.checked_add_days(Days::new(offset as u64))
+    } else {
+        EPOCH.checked_sub_days(Days::new(offset as u64))
+    }
+}
+
+/// Returns the day of the year, where January 1 is day 1.
+pub fn day_of_year(date: NaiveDate) -> Option<u32> {
+    let january1 = NaiveDate::from_ymd_opt(date.year(), 1, 1)?;
+    let delta = date - january1;
+    Some(delta.num_days() as u32 + 1)
+}
+
+/// Returns the name for a month as a 3-character all-caps string.
+pub fn short_month_name(month: u32) -> Option<&'static str> {
+    let name = match month {
+        1 => "JAN",
+        2 => "FEB",
+        3 => "MAR",
+        4 => "APR",
+        5 => "MAY",
+        6 => "JUN",
+        7 => "JUL",
+        8 => "AUG",
+        9 => "SEP",
+        10 => "OCT",
+        11 => "NOV",
+        12 => "DEC",
+        _ => return None,
+    };
+    Some(name)
+}
+
+/// Returns the name for a month as an all-caps string.
+pub fn month_name(month: u32) -> Option<&'static str> {
+    let name = match month {
+        1 => "JANUARY",
+        2 => "FEBRUARY",
+        3 => "MARCH",
+        4 => "APRIL",
+        5 => "MAY",
+        6 => "JUNE",
+        7 => "JULY",
+        8 => "AUGUST",
+        9 => "SEPTEMBER",
+        10 => "OCTOBER",
+        11 => "NOVEMBER",
+        12 => "DECEMBER",
+        _ => return None,
+    };
+    Some(name)
+}
index 9edf063ec7a9a49100ed042dffaaf18d9100ee83..353806a32448b77f3d89a5cf0999caf773f60951 100644 (file)
@@ -14,7 +14,7 @@ use ordered_float::OrderedFloat;
 use unicase::UniCase;
 
 use crate::{
-    format::Format,
+    format::{DisplayValue, Format},
     identifier::{ByIdentifier, HasIdentifier, Identifier},
     raw::{Alignment, CategoryLabels, Measure, MissingValues, VarType},
 };
@@ -225,6 +225,10 @@ impl Value {
     pub fn sysmis() -> Self {
         Self::Number(None)
     }
+
+    pub fn display(&self, format: Format, encoding: &'static Encoding) -> DisplayValue {
+        DisplayValue::new(format, self, encoding)
+    }
 }
 
 impl From<f64> for Value {
index f07ab4cbeb9aa95e6cc8cc52cc087a5e5ebe2aaf..4aeba72e4d0c1b0aa34f37d69c942910e0e75bee 100644 (file)
@@ -7,15 +7,17 @@ use std::{
     sync::LazyLock,
 };
 
+use chrono::{Datelike, Local, NaiveDate};
 use encoding_rs::Encoding;
 use enum_map::{Enum, EnumMap};
 use libm::frexp;
 use smallstr::SmallString;
-use smallvec::SmallVec;
+use smallvec::{Array, SmallVec};
 use thiserror::Error as ThisError;
 use unicode_width::UnicodeWidthStr;
 
 use crate::{
+    calendar::{calendar_offset_to_gregorian, day_of_year, month_name, short_month_name},
     dictionary::{Value, VarWidth},
     raw::{self, VarType},
     settings::Settings as PsppSettings,
@@ -742,9 +744,25 @@ impl Not for Decimal {
     }
 }
 
+#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub struct Epoch(pub i32);
+
+impl Default for Epoch {
+    fn default() -> Self {
+        static DEFAULT: LazyLock<Epoch> = LazyLock::new(|| Epoch(Local::now().year() - 69));
+        *DEFAULT
+    }
+}
+
+impl Display for Epoch {
+    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+        write!(f, "{}", self.0)
+    }
+}
+
 #[derive(Clone, Debug, Default)]
 pub struct Settings {
-    pub epoch: Option<i32>,
+    pub epoch: Epoch,
 
     /// Either `'.'` or `','`.
     pub decimal: Decimal,
@@ -973,28 +991,34 @@ impl<'a> Display for DisplayValue<'a> {
 
             Type::PIBHex => self.pibhex(f, number),
             Type::RBHex => self.rbhex(f, number),
-            Type::Date |
-            Type::ADate |
-            Type::EDate |
-            Type::JDate |
-            Type::SDate |
-            Type::QYr |
-            Type::MoYr |
-            Type::WkYr |
-            Type::DateTime |
-            Type::YMDHMS |
-            Type::MTime |
-            Type::Time |
-            Type::DTime |
-            Type::WkDay => self.date(f, number),
-            Type::Month => todo!(),
-            Type::A => todo!(),
-            Type::AHex => todo!(),
+            Type::Date
+            | Type::ADate
+            | Type::EDate
+            | Type::JDate
+            | Type::SDate
+            | Type::QYr
+            | Type::MoYr
+            | Type::WkYr
+            | Type::DateTime
+            | Type::YMDHMS
+            | Type::MTime
+            | Type::Time
+            | Type::DTime
+            | Type::WkDay => self.date(f, number),
+            Type::Month => self.month(f, number),
+            Type::A | Type::AHex => unreachable!(),
         }
     }
 }
 
 impl<'a> DisplayValue<'a> {
+    pub fn new(format: Format, value: &'a Value, encoding: &'static Encoding) -> Self {
+        Self {
+            format,
+            value,
+            encoding,
+        }
+    }
     fn number(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult {
         if number.is_finite() {
             let style = PsppSettings::global()
@@ -1182,8 +1206,7 @@ impl<'a> DisplayValue<'a> {
         // Figure out number of characters we can use for the fraction, if any.
         // (If that turns out to be `1`, then we'll output a decimal point
         // without any digits following.)
-        let mut fraction_width =
-            min(self.format.d as usize + 1, self.format.w() - width).min(16);
+        let mut fraction_width = min(self.format.d as usize + 1, self.format.w() - width).min(16);
         if self.format.type_ != Type::E && fraction_width == 1 {
             fraction_width = 0;
         }
@@ -1208,13 +1231,7 @@ impl<'a> DisplayValue<'a> {
         // Rust always uses `.` as the decimal point. Translate to `,` if
         // necessary.
         if style.decimal == Decimal::Comma {
-            // SAFETY: This only changes only one ASCII character (`.`) to
-            // another ASCII character (`,`).
-            unsafe {
-                if let Some(dot) = output.as_bytes_mut().iter_mut().find(|c| **c == b'.') {
-                    *dot = b',';
-                }
-            }
+            fix_decimal_point(&mut output);
         }
 
         // Make exponent have exactly three digits, plus sign.
@@ -1303,14 +1320,124 @@ impl<'a> DisplayValue<'a> {
     }
 
     fn date(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult {
-        let template = self.format.type_.date_template(self.format.w()).unwrap();
-        if self.format.type_.category() == Category::Date {
-            if number < 0.0 {
-                return self.missing(f);
+        const MINUTE: f64 = 60.0;
+        const HOUR: f64 = 60.0 * 60.0;
+        const DAY: f64 = 60.0 * 60.0 * 24.0;
+
+        let (date, mut time) = match self.format.type_.category() {
+            Category::Date => {
+                if number < 0.0 {
+                    return self.missing(f);
+                }
+                let Some(date) = calendar_offset_to_gregorian(number / DAY) else {
+                    return self.missing(f);
+                };
+                (date, number % DAY)
             }
-            
+            Category::Time => (NaiveDate::MIN, number),
+            _ => unreachable!(),
+        };
+
+        let mut output = SmallString::<[u8; 40]>::new();
+        let mut template = self
+            .format
+            .type_
+            .date_template(self.format.w())
+            .unwrap()
+            .bytes()
+            .peekable();
+        while let Some(c) = template.next() {
+            let mut count = 1;
+            while template.next_if_eq(&c).is_some() {
+                count += 1;
+            }
+            match c {
+                b'd' if count < 3 => write!(&mut output, "{:02}", date.day()).unwrap(),
+                b'd' => write!(&mut output, "{:03}", day_of_year(date).unwrap_or(1)).unwrap(),
+                b'm' if count < 3 => write!(&mut output, "{:02}", date.month()).unwrap(),
+                b'm' => write!(&mut output, "{}", short_month_name(date.month()).unwrap()).unwrap(),
+                b'y' if count >= 4 => {
+                    let year = date.year();
+                    if year <= 9999 {
+                        write!(&mut output, "{year:04}").unwrap();
+                    } else if self.format.type_ == Type::DateTime
+                        || self.format.type_ == Type::YMDHMS
+                    {
+                        write!(&mut output, "****").unwrap();
+                    } else {
+                        return self.overflow(f);
+                    }
+                }
+                b'y' => {
+                    let epoch = PsppSettings::global().formats.epoch.0;
+                    let offset = date.year() - epoch;
+                    if offset < 0 || offset > 99 {
+                        return self.overflow(f);
+                    }
+                    write!(&mut output, "{offset:02}").unwrap();
+                }
+                b'q' => write!(&mut output, "{}", date.month0() / 3 + 1).unwrap(),
+                b'w' => write!(
+                    &mut output,
+                    "{:2}",
+                    (day_of_year(date).unwrap_or(1) - 1) / 7 + 1
+                )
+                .unwrap(),
+                b'D' => {
+                    if time < 0.0 {
+                        output.push('-');
+                    }
+                    time = time.abs();
+                    write!(&mut output, "{:1$.0}", (time / DAY).floor(), count).unwrap();
+                    time %= DAY;
+                }
+                b'H' => {
+                    if time < 0.0 {
+                        output.push('-');
+                    }
+                    time = time.abs();
+                    write!(&mut output, "{:1$.0}", (time / HOUR).floor(), count).unwrap();
+                    time %= HOUR;
+                }
+                b'M' => {
+                    if time < 0.0 {
+                        output.push('-');
+                    }
+                    time = time.abs();
+                    write!(&mut output, "{:02.0}", (time / MINUTE).floor()).unwrap();
+                    time %= MINUTE;
+
+                    let excess_width = self.format.w() as isize - output.len() as isize;
+                    if excess_width < 0 || (self.format.type_ == Type::MTime && excess_width < 3) {
+                        return self.overflow(f);
+                    }
+                    if excess_width == 3
+                        || excess_width == 4
+                        || (excess_width >= 5 && self.format.d == 0)
+                    {
+                        write!(&mut output, ":{:02.0}", time.floor()).unwrap();
+                    } else if excess_width >= 5 {
+                        let d = min(self.format.d(), excess_width as usize);
+                        let w = d + 3;
+                        write!(&mut output, ":{:02$.*}", d, number, w).unwrap();
+                        if PsppSettings::global().formats.decimal == Decimal::Comma {
+                            fix_decimal_point(&mut output);
+                        }
+                    }
+                }
+                c if count == 1 => output.push(c as char),
+                _ => unreachable!(),
+            }
+        }
+        write!(f, "{:>.*}", self.format.w(), &output)
+    }
+
+    fn month(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult {
+        if let Some(month) = month_name(number as u32) {
+            write!(f, "{month:.*}", self.format.w())
+        } else {
+            self.missing(f)
         }
-        todo!()
     }
 }
 
@@ -1574,3 +1701,16 @@ fn power256(x: u16) -> f64 {
         .copied()
         .unwrap_or_else(|| 256.0_f64.powi(x as i32))
 }
+
+fn fix_decimal_point<A>(s: &mut SmallString<A>)
+where
+    A: Array<Item = u8>,
+{
+    // SAFETY: This only changes only one ASCII character (`.`) to
+    // another ASCII character (`,`).
+    unsafe {
+        if let Some(dot) = s.as_bytes_mut().iter_mut().find(|c| **c == b'.') {
+            *dot = b',';
+        }
+    }
+}
index ff105ecb826e6e4d246b610334d3f4e35dbe5af9..3540125c81d379af17fbe91ed74816a6b8cf86b2 100644 (file)
@@ -1,18 +1,19 @@
+pub mod calendar;
+pub mod command;
 pub mod cooked;
 pub mod dictionary;
 pub mod encoding;
 pub mod endian;
+pub mod engine;
 pub mod format;
 pub mod identifier;
+pub mod integer;
+pub mod lex;
 pub mod locale_charset;
+pub mod macros;
+pub mod message;
 pub mod output;
+pub mod prompt;
 pub mod raw;
 pub mod sack;
-pub mod lex;
-pub mod prompt;
-pub mod message;
-pub mod macros;
 pub mod settings;
-pub mod command;
-pub mod integer;
-pub mod engine;