From 001954fa7b08fefbb5ba12100e08fe00b1e12047 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 31 Mar 2025 12:44:23 -0700 Subject: [PATCH] wkday format --- rust/pspp/src/format/parse.rs | 84 +++++++++++++++++++++++++---------- 1 file changed, 60 insertions(+), 24 deletions(-) diff --git a/rust/pspp/src/format/parse.rs b/rust/pspp/src/format/parse.rs index 20617fc13a..5093abc97c 100644 --- a/rust/pspp/src/format/parse.rs +++ b/rust/pspp/src/format/parse.rs @@ -132,7 +132,7 @@ impl<'a> ParseValue<'a> { /// Parses `s` as this format. For string formats, `encoding` specifies the /// output encoding. - fn parse(&self, s: &str, _encoding: &'static Encoding) -> Result { + pub fn parse(&self, s: &str, _encoding: &'static Encoding) -> Result { if s.is_empty() { return Ok(self.format.default_value()); } @@ -163,7 +163,7 @@ impl<'a> ParseValue<'a> { | Type::MTime | Type::Time | Type::DTime => self.parse_date(s), - Type::WkDay => todo!(), + Type::WkDay => self.parse_wkday(s), Type::Month => todo!(), Type::A => todo!(), Type::AHex => todo!(), @@ -303,12 +303,10 @@ impl<'a> ParseValue<'a> { } fn parse_date(&self, input: &str) -> Result { - let orig_input = input; - let input = input.trim(); - if input.is_empty() || input == "." { + let mut p = StrParser(input.trim()); + if p.0.is_empty() || p.0 == "." { return Ok(Value::sysmis()); } - let mut p = StrParser(input); let mut day = 1; let mut yday = 1; @@ -379,10 +377,7 @@ impl<'a> ParseValue<'a> { } } } - p.strip_ws(); - if !p.0.is_empty() { - return Err(ParseErrorKind::TrailingGarbage(p.0.into())); - } + parse_trailer(&mut p)?; let date = if let Some(year) = year { let date = calendar_gregorian_to_offset(year, month, day, self.settings)? + yday - 1; @@ -422,6 +417,17 @@ impl<'a> ParseValue<'a> { let seconds = number.parse::().unwrap(); Ok(time + seconds) } + + fn parse_wkday(&self, input: &str) -> Result { + let mut p = StrParser(input.trim()); + if p.0.is_empty() || p.0 == "." { + Ok(Value::sysmis()) + } else { + let weekday = parse_weekday(&mut p)?; + parse_trailer(&mut p)?; + Ok(Value::Number(Some(weekday as f64))) + } + } } #[derive(Copy, Clone, Debug, Default, PartialEq, Eq)] @@ -431,6 +437,15 @@ enum Sign { Negative, } +fn parse_trailer<'a>(p: &mut StrParser<'a>) -> Result<(), ParseErrorKind> { + p.strip_ws(); + if p.0.is_empty() { + Ok(()) + } else { + Err(ParseErrorKind::TrailingGarbage(p.0.into())) + } +} + fn parse_sign<'a>(p: &mut StrParser<'a>, sign: Option) -> Sign { if let Some(sign) = sign { sign @@ -502,7 +517,9 @@ fn parse_month<'a>(p: &mut StrParser<'a>) -> Result { fn parse_weekday<'a>(p: &mut StrParser<'a>) -> Result { static WEEKDAY_NAMES: [&str; 7] = ["su", "mo", "tu", "we", "th", "fr", "sa"]; let name = p.strip_matches(|c| c.is_ascii_alphabetic()); - match_name(name, &WEEKDAY_NAMES).ok_or(ParseErrorKind::InvalidWeekdayName) + name.get(..2) + .and_then(|name| match_name(name, &WEEKDAY_NAMES)) + .ok_or(ParseErrorKind::InvalidWeekdayName) } fn parse_quarter<'a>(p: &mut StrParser<'a>) -> Result { @@ -547,13 +564,6 @@ fn match_name(name: &str, candidates: &[&str]) -> Option { None } -fn strip_name(input: &str) -> (&str, &str) { - take( - input, - input.trim_start_matches(|c: char| c.is_ascii_alphabetic()), - ) -} - fn parse_year<'a>( p: &mut StrParser<'a>, settings: &Settings, @@ -573,10 +583,6 @@ fn parse_year<'a>( Ok(settings.epoch.apply(year)) } -fn take<'a>(input: &'a str, rest: &'a str) -> (&'a str, &'a str) { - (&input[..input.len() - rest.len()], rest) -} - fn parse_int<'a, T>(p: &mut StrParser<'a>) -> Result where T: FromStr, @@ -655,7 +661,6 @@ mod test { calendar::{days_in_month, is_leap_year}, dictionary::Value, format::{parse::Sign, Epoch, Format, Settings as FormatSettings, Type}, - settings::{self, Settings as PsppSettings}, }; fn test(name: &str, type_: Type) { @@ -663,7 +668,6 @@ mod test { let input_stream = BufReader::new(File::open(base.join("num-in.txt")).unwrap()); let expected_stream = BufReader::new(File::open(base.join(name)).unwrap()); let format = Format::new(type_, 40, 1).unwrap(); - let settings = PsppSettings::global().formats.number_style(type_); for (line_number, (input, expected)) in input_stream .lines() .map(|result| result.unwrap()) @@ -1214,4 +1218,36 @@ mod test { TimeTester::test("+D H:M", Type::DTime); TimeTester::test("+D H:M:S", Type::DTime); } + + #[test] + fn wkday() { + for (mut input, expected) in [ + ("", None), + (".", None), + ("sudnay", Some(1.0)), + ("monady", Some(2.0)), + ("tuseday", Some(3.0)), + ("WEDENSDAY", Some(4.0)), + ("Thrudsay", Some(5.0)), + ("fRidya", Some(6.0)), + ("SAturady", Some(7.0)), + ("sturday", None), + ] { + loop { + let parsed = Format::new(Type::WkDay, 40, 0) + .unwrap() + .parser() + .parse(input, UTF_8) + .unwrap_or(Value::Number(None)) + .as_number() + .unwrap(); + assert_eq!(parsed, expected); + + if input.len() <= 2 { + break; + } + input = &input[..input.len() - 1]; + } + } + } } -- 2.30.2