work on testing date input
authorBen Pfaff <blp@cs.stanford.edu>
Sun, 30 Mar 2025 23:27:07 +0000 (16:27 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Sun, 30 Mar 2025 23:27:07 +0000 (16:27 -0700)
rust/pspp/src/calendar.rs
rust/pspp/src/format/mod.rs
rust/pspp/src/format/parse.rs

index 1bbcfb616a4d6a13a6d821e900fdf79946c19e84..7a8562d061bc608c5f249c972ba150e9565a1f4b 100644 (file)
@@ -1,4 +1,7 @@
 use chrono::{Datelike, Days, NaiveDate};
+use thiserror::Error as ThisError;
+
+use crate::format::Settings;
 
 const EPOCH: NaiveDate = NaiveDate::from_ymd_opt(1582, 10, 14).unwrap();
 
@@ -59,3 +62,72 @@ pub fn month_name(month: u32) -> Option<&'static str> {
     };
     Some(name)
 }
+
+#[derive(Copy, Clone, Debug, ThisError)]
+pub enum DateError {
+    /// Date is too early.
+    #[error("Date {y:04}-{m:02}-{d:02} is before the earliest supported date 1582-10-15.")]
+    InvalidDate { y: i32, m: i32, d: i32 },
+
+    /// Invalid month.
+    #[error("Month {0} is not in the acceptable range of 0 to 13, inclusive.")]
+    InvalidMonth(i32),
+
+    /// Invalid day.
+    #[error("Day {0} is not in the acceptable range of 0 to 31, inclusive.")]
+    InvalidDay(i32),
+}
+
+pub fn calendar_gregorian_adjust(
+    y: i32,
+    m: i32,
+    d: i32,
+    settings: &Settings,
+) -> Result<(i32, i32, i32), DateError> {
+    let y = settings.epoch.apply(y);
+
+    let (y, m) = match m {
+        0 => (y - 1, 12),
+        1..=12 => (y, m),
+        13 => (y + 1, 1),
+        _ => return Err(DateError::InvalidMonth(m)),
+    };
+
+    if !(0..=31).contains(&d) {
+        Err(DateError::InvalidDay(d))
+    } else if y < 1582 || (y == 1582 && (m < 10 || (m == 10 && d < 15))) {
+        Err(DateError::InvalidDate { y, m, d })
+    } else {
+        Ok((y, m, d))
+    }
+}
+
+pub fn calendar_raw_gregorian_to_offset(y: i32, m: i32, d: i32) -> i32 {
+    fn is_leap_year(y: i32) -> bool {
+        y % 4 == 0 && (y % 100 != 0 || y % 400 == 0)
+    }
+
+    -577735 + 365 * (y - 1) + (y - 1) / 4 - (y - 1) / 100
+        + (y - 1) / 400
+        + (367 * m - 362) / 12
+        + if m <= 2 {
+            0
+        } else if m >= 2 && is_leap_year(y) {
+            -1
+        } else {
+            -2
+        }
+        + d
+}
+
+/// Returns the number of days from 14 Oct 1582 to `(y,m,d)` in the Gregorian
+/// calendar.  Returns an error for dates before 14 Oct 1582.
+pub fn calendar_gregorian_to_offset(
+    y: i32,
+    m: i32,
+    d: i32,
+    settings: &Settings,
+) -> Result<i32, DateError> {
+    let (y, m, d) = calendar_gregorian_adjust(y, m, d, settings)?;
+    Ok(calendar_raw_gregorian_to_offset(y, m, d))
+}
index 01f2df2872bb858803075ff9f5ce095e0dcaa3c4..ff350f2d1639eb6c94e94d3485f4f8216c96c554 100644 (file)
@@ -774,7 +774,7 @@ impl Epoch {
     ///   converted it to the correct year considering the epoch.
     ///
     /// - Otherwise, returns `year` unchanged.
-    fn apply(&self, year: i32) -> i32 {
+    pub fn apply(&self, year: i32) -> i32 {
         match year {
             0..=99 => {
                 let century = self.0 / 100 * 100;
index f67a739c1851f422d28b8460488dbcd377b3f00e..217186cb3beef3300cf337fd5813eafb9e920b93 100644 (file)
@@ -1,4 +1,5 @@
 use crate::{
+    calendar::{calendar_gregorian_to_offset, DateError},
     dictionary::Value,
     format::{DateTemplate, Format, Settings, TemplateItem, Type},
     settings::{EndianSettings, Settings as PsppSettings},
@@ -91,6 +92,14 @@ enum ParseErrorKind {
     /// Expected character.
     #[error("{0:?} expected in date field.")]
     ExpectedChar(char),
+
+    /// Trailing garbage.
+    #[error("Trailing garbage {0:?} follows date.")]
+    TrailingGarbage(String),
+
+    /// Invalid date.
+    #[error("{0}")]
+    InvalidDate(#[from] DateError),
 }
 
 pub struct ParseValue<'a> {
@@ -141,19 +150,19 @@ impl<'a> ParseValue<'a> {
             Type::PIBHex => todo!(),
             Type::RB => todo!(),
             Type::RBHex => todo!(),
-            Type::Date => todo!(),
-            Type::ADate => todo!(),
-            Type::EDate => todo!(),
-            Type::JDate => todo!(),
-            Type::SDate => todo!(),
-            Type::QYr => todo!(),
-            Type::MoYr => todo!(),
-            Type::WkYr => todo!(),
-            Type::DateTime => todo!(),
-            Type::YMDHMS => todo!(),
-            Type::MTime => todo!(),
-            Type::Time => todo!(),
-            Type::DTime => todo!(),
+            Type::Date
+            | Type::ADate
+            | Type::EDate
+            | Type::JDate
+            | Type::SDate
+            | Type::QYr
+            | Type::MoYr
+            | Type::WkYr
+            | Type::DateTime
+            | Type::YMDHMS
+            | Type::MTime
+            | Type::Time
+            | Type::DTime => self.parse_date(s),
             Type::WkDay => todo!(),
             Type::Month => todo!(),
             Type::A => todo!(),
@@ -371,7 +380,23 @@ impl<'a> ParseValue<'a> {
                 }
             }
         }
-        todo!()
+        p.strip_ws();
+        if !p.0.is_empty() {
+            return Err(ParseErrorKind::TrailingGarbage(p.0.into()));
+        }
+
+        let date = if let Some(year) = year {
+            let date = calendar_gregorian_to_offset(year, month, day, self.settings)? + yday - 1;
+            date as f64 * 60.0 * 60.0 * 24.0
+        } else {
+            0.0
+        };
+        let time_date = if time_sign == Some(Sign::Negative) {
+            date - time
+        } else {
+            date + time
+        };
+        Ok(Value::Number(Some(time_date)))
     }
 
     fn parse_minute_second<'b>(&self, p: &mut StrParser<'b>) -> Result<f64, ParseErrorKind> {
@@ -384,7 +409,6 @@ impl<'a> ParseValue<'a> {
         if parse_time_delimiter(p).is_err() || !p.0.starts_with(|c: char| c.is_ascii_digit()) {
             return Ok(time);
         }
-        let seconds_start = p.0;
         let integer = p.strip_matches(|c| c.is_ascii_digit());
         let fraction = if p.strip_prefix(self.settings.decimal.as_str()) {
             p.strip_matches(|c| c.is_ascii_digit())
@@ -401,6 +425,7 @@ impl<'a> ParseValue<'a> {
     }
 }
 
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
 enum Sign {
     Positive,
     Negative,
@@ -624,9 +649,10 @@ mod test {
     };
 
     use encoding_rs::UTF_8;
+    use serde::de::Expected;
 
     use crate::{
-        format::{Format, Type},
+        format::{parse::Sign, DateTemplate, Format, Type},
         settings::Settings,
     };
 
@@ -688,12 +714,159 @@ mod test {
         test("pct.txt", Type::Pct);
     }
 
-    /*
-    #[test]
-    fn legacy() {
-        for i in 0..=u16::MAX {
-            let input = i.to_be_bytes();
+    #[derive(Clone, Debug)]
+    struct TestDate {
+        year: i32,
+        month: i32,
+        day: i32,
+        yday: i32,
+        hour: i32,
+        minute: i32,
+        second: i32,
+    }
+
+    struct ExpectDate {
+        year: i32,
+        month: i32,
+        day: i32,
+        time: i32,
+        sign: Sign,
+    }
+
+    struct DateVisitor<'a> {
+        date: TestDate,
+        template: &'a str,
+    }
 
+    impl<'a> DateVisitor<'a> {
+        fn visit(&self, formatted: String, expected: ExpectDate) {
+            if !self.template.is_empty() {
+                fn years(y: i32) -> Vec<i32> {
+                    match y {
+                        1930..2030 => vec![y, y % 100],
+                        _ => vec![y],
+                    }
+                }
+                let mut iter = self.template.chars();
+                let first = iter.next().unwrap();
+                let next = DateVisitor {
+                    date: self.date.clone(),
+                    template: iter.as_str(),
+                };
+                match first {
+                    'd' => {
+                        let expected = ExpectDate {
+                            day: self.date.day,
+                            ..expected
+                        };
+                        next.visit(format!("{formatted}{}", self.date.day), expected);
+                        next.visit(format!("{formatted}{:02}", self.date.day), expected);
+                    }
+                    'm' => {
+                        let m = self.date.month as usize - 1;
+                        static ROMAN: [&'static str; 12] = [
+                            "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix", "x", "xi",
+                            "xii",
+                        ];
+                        static ENGLISH: [&'static str; 12] = [
+                            "january",
+                            "february",
+                            "march",
+                            "april",
+                            "may",
+                            "june",
+                            "july",
+                            "august",
+                            "september",
+                            "october",
+                            "november",
+                            "december",
+                        ];
+                        let roman = ROMAN[m];
+                        let english = ENGLISH[m];
+                        let expected = ExpectDate {
+                            month: self.date.month,
+                            ..expected
+                        };
+                        for formatted in [
+                            format!("{formatted}{}", self.date.month),
+                            format!("{formatted}{:02}", self.date.month),
+                            format!("{formatted}{}", roman),
+                            format!("{formatted}{}", roman.to_ascii_uppercase()),
+                            format!("{formatted}{}", english),
+                            format!("{formatted}{}", english.to_ascii_uppercase()),
+                            format!("{formatted}{}", &english[..3]),
+                            format!("{formatted}{}", &english[..3].to_ascii_uppercase()),
+                        ] {
+                            next.visit(formatted, expected);
+                        }
+                    }
+                    'y' => {
+                        let expected = ExpectDate {
+                            year: self.date.year,
+                            ..expected
+                        };
+                        for year in years(self.date.year) {
+                            next.visit(format!("{formatted}{year}"), expected);
+                        }
+                    }
+                    'j' => {
+                        let expected = ExpectDate {
+                            year: self.date.year,
+                            month: self.date.month,
+                            day: self.date.day,
+                            ..expected
+                        };
+                        for year in years(self.date.year) {
+                            next.visit(format!("{formatted}{year}{:03}", self.date.yday), expected);
+                        }
+                    }
+                    'q' => {
+                        let quarter = (self.date.month - 1) / 3 + 1;
+                        let month = (quarter - 1) * 3 + 1;
+                        next.visit(
+                            format!("{formatted}{}", quarter),
+                            ExpectDate { month, ..expected },
+                        );
+                    }
+                    'w' => {
+                        let week = (self.date.yday - 1) / 7 + 1;
+                        let mut month = self.date.month;
+                        let mut day = self.date.day - (self.date.yday - 1) % 7;
+                        if day < 1 {
+                            month -= 1;
+                            day += days_in_month(self.date.year, month);
+                        }
+                        next.visit(
+                            format!("{formatted}{week}"),
+                            ExpectDate {
+                                month,
+                                day,
+                                ..expected
+                            },
+                        );
+                    }
+                }
+            }
         }
-    }*/
+    }
+
+    fn days_in_month(year: i32, month: i32) -> i32 {
+        match month {
+            0 => 31,
+            1 if year % 4 == 0 && (year % 100 != 0 || year % 400 == 0) => 29,
+            1 => 28,
+            2 => 31,
+            3 => 30,
+            4 => 31,
+            5 => 30,
+            6 => 31,
+            7 => 31,
+            8 => 30,
+            9 => 31,
+            10 => 30,
+            11 => 31,
+            _ => unreachable!(),
+        }
+    }
 }