From 5911a535978c36cbca9f579e79cc8493bb20ee16 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 31 Mar 2025 13:56:35 -0700 Subject: [PATCH] MONTH parsing --- rust/pspp/src/format/parse.rs | 84 ++++++++++++++++++++++++++++++++++- 1 file changed, 82 insertions(+), 2 deletions(-) diff --git a/rust/pspp/src/format/parse.rs b/rust/pspp/src/format/parse.rs index 5093abc97c..c396df0926 100644 --- a/rust/pspp/src/format/parse.rs +++ b/rust/pspp/src/format/parse.rs @@ -164,7 +164,7 @@ impl<'a> ParseValue<'a> { | Type::Time | Type::DTime => self.parse_date(s), Type::WkDay => self.parse_wkday(s), - Type::Month => todo!(), + Type::Month => self.parse_month(s), Type::A => todo!(), Type::AHex => todo!(), } @@ -428,6 +428,17 @@ impl<'a> ParseValue<'a> { Ok(Value::Number(Some(weekday as f64))) } } + + fn parse_month(&self, input: &str) -> Result { + let mut p = StrParser(input.trim()); + if p.0.is_empty() || p.0 == "." { + Ok(Value::sysmis()) + } else { + let month = parse_month(&mut p)?; + parse_trailer(&mut p)?; + Ok(Value::Number(Some(month as f64))) + } + } } #[derive(Copy, Clone, Debug, Default, PartialEq, Eq)] @@ -507,7 +518,7 @@ fn parse_month<'a>(p: &mut StrParser<'a>) -> Result { static ROMAN_NAMES: [&str; 12] = [ "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix", "x", "xi", "xii", ]; - if let Some(month) = match_name(name, &ROMAN_NAMES) { + if let Some(month) = match_name(&name[..4.min(name.len())], &ROMAN_NAMES) { return Ok(month); } } @@ -1250,4 +1261,73 @@ mod test { } } } + + #[test] + fn month() { + for (input, expected, with_shortening) in [ + ("", None, false), + ("i", Some(1.0), false), + ("ii", Some(2.0), false), + ("iii", Some(3.0), false), + ("iiii", None, false), + ("iv", Some(4.0), false), + ("v", Some(5.0), false), + ("vi", Some(6.0), false), + ("vii", Some(7.0), false), + ("viii", Some(8.0), false), + ("ix", Some(9.0), false), + ("viiii", Some(8.0), false), + ("x", Some(10.0), false), + ("xi", Some(11.0), false), + ("xii", Some(12.0), false), + ("0", None, false), + ("1", Some(1.0), false), + ("2", Some(2.0), false), + ("3", Some(3.0), false), + ("4", Some(4.0), false), + ("5", Some(5.0), false), + ("6", Some(6.0), false), + ("7", Some(7.0), false), + ("8", Some(8.0), false), + ("9", Some(9.0), false), + ("10", Some(10.0), false), + ("11", Some(11.0), false), + ("12", Some(12.0), false), + ("13", None, false), + ("january", Some(1.0), true), + ("JANAURY", Some(1.0), true), + ("February", Some(2.0), true), + ("fEbraury", Some(2.0), true), + ("MArch", Some(3.0), true), + ("marhc", Some(3.0), true), + ("apRIL", Some(4.0), true), + ("may", Some(5.0), true), + ("june", Some(6.0), true), + ("july", Some(7.0), true), + ("august", Some(8.0), true), + ("september", Some(9.0), true), + ("october", Some(10.0), true), + ("november", Some(11.0), true), + ("decmeber", Some(12.0), true), + ("december", Some(12.0), true), + ] { + let lengths = if with_shortening { + (3..input.len()).rev().collect() + } else { + vec![input.len()] + }; + + for length in lengths { + let input = &input[..length]; + let parsed = Format::new(Type::Month, 40, 0) + .unwrap() + .parser() + .parse(input, UTF_8) + .unwrap_or(Value::Number(None)) + .as_number() + .unwrap(); + assert_eq!(parsed, expected, "parsing {input}"); + } + } + } } -- 2.30.2