MONTH parsing
authorBen Pfaff <blp@cs.stanford.edu>
Mon, 31 Mar 2025 20:56:35 +0000 (13:56 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Mon, 31 Mar 2025 20:56:35 +0000 (13:56 -0700)
rust/pspp/src/format/parse.rs

index 5093abc97c5f2db568ab9e63904c389b4ad90ed3..c396df0926b0482eced70e094e3e5c9c6149b64f 100644 (file)
@@ -164,7 +164,7 @@ impl<'a> ParseValue<'a> {
             | Type::Time
             | Type::DTime => self.parse_date(s),
             Type::WkDay => self.parse_wkday(s),
-            Type::Month => todo!(),
+            Type::Month => self.parse_month(s),
             Type::A => todo!(),
             Type::AHex => todo!(),
         }
@@ -428,6 +428,17 @@ impl<'a> ParseValue<'a> {
             Ok(Value::Number(Some(weekday as f64)))
         }
     }
+
+    fn parse_month(&self, input: &str) -> Result<Value, ParseErrorKind> {
+        let mut p = StrParser(input.trim());
+        if p.0.is_empty() || p.0 == "." {
+            Ok(Value::sysmis())
+        } else {
+            let month = parse_month(&mut p)?;
+            parse_trailer(&mut p)?;
+            Ok(Value::Number(Some(month as f64)))
+        }
+    }
 }
 
 #[derive(Copy, Clone, Debug, Default, PartialEq, Eq)]
@@ -507,7 +518,7 @@ fn parse_month<'a>(p: &mut StrParser<'a>) -> Result<i32, ParseErrorKind> {
         static ROMAN_NAMES: [&str; 12] = [
             "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix", "x", "xi", "xii",
         ];
-        if let Some(month) = match_name(name, &ROMAN_NAMES) {
+        if let Some(month) = match_name(&name[..4.min(name.len())], &ROMAN_NAMES) {
             return Ok(month);
         }
     }
@@ -1250,4 +1261,73 @@ mod test {
             }
         }
     }
+
+    #[test]
+    fn month() {
+        for (input, expected, with_shortening) in [
+            ("", None, false),
+            ("i", Some(1.0), false),
+            ("ii", Some(2.0), false),
+            ("iii", Some(3.0), false),
+            ("iiii", None, false),
+            ("iv", Some(4.0), false),
+            ("v", Some(5.0), false),
+            ("vi", Some(6.0), false),
+            ("vii", Some(7.0), false),
+            ("viii", Some(8.0), false),
+            ("ix", Some(9.0), false),
+            ("viiii", Some(8.0), false),
+            ("x", Some(10.0), false),
+            ("xi", Some(11.0), false),
+            ("xii", Some(12.0), false),
+            ("0", None, false),
+            ("1", Some(1.0), false),
+            ("2", Some(2.0), false),
+            ("3", Some(3.0), false),
+            ("4", Some(4.0), false),
+            ("5", Some(5.0), false),
+            ("6", Some(6.0), false),
+            ("7", Some(7.0), false),
+            ("8", Some(8.0), false),
+            ("9", Some(9.0), false),
+            ("10", Some(10.0), false),
+            ("11", Some(11.0), false),
+            ("12", Some(12.0), false),
+            ("13", None, false),
+            ("january", Some(1.0), true),
+            ("JANAURY", Some(1.0), true),
+            ("February", Some(2.0), true),
+            ("fEbraury", Some(2.0), true),
+            ("MArch", Some(3.0), true),
+            ("marhc", Some(3.0), true),
+            ("apRIL", Some(4.0), true),
+            ("may", Some(5.0), true),
+            ("june", Some(6.0), true),
+            ("july", Some(7.0), true),
+            ("august", Some(8.0), true),
+            ("september", Some(9.0), true),
+            ("october", Some(10.0), true),
+            ("november", Some(11.0), true),
+            ("decmeber", Some(12.0), true),
+            ("december", Some(12.0), true),
+        ] {
+            let lengths = if with_shortening {
+                (3..input.len()).rev().collect()
+            } else {
+                vec![input.len()]
+            };
+
+            for length in lengths {
+                let input = &input[..length];
+                let parsed = Format::new(Type::Month, 40, 0)
+                    .unwrap()
+                    .parser()
+                    .parse(input, UTF_8)
+                    .unwrap_or(Value::Number(None))
+                    .as_number()
+                    .unwrap();
+                assert_eq!(parsed, expected, "parsing {input}");
+            }
+        }
+    }
 }