calendar::{calendar_offset_to_gregorian, day_of_year, month_name, short_month_name},
dictionary::Value,
endian::ToBytes,
- format::{Category, Decimal, Format, NumberStyle, Settings, Type},
+ format::{Category, DateTemplate, Decimal, Format, NumberStyle, Settings, TemplateItem, Type},
settings::{EndianSettings, Settings as PsppSettings},
};
};
let mut output = SmallString::<[u8; 40]>::new();
- let mut template = self
- .format
- .type_
- .date_template(self.format.w())
- .unwrap()
- .bytes()
- .peekable();
- while let Some(c) = template.next() {
- let mut count = 1;
- while template.next_if_eq(&c).is_some() {
- count += 1;
- }
+ for TemplateItem { c, n } in DateTemplate::for_format(self.format).unwrap() {
match c {
- b'd' if count < 3 => write!(&mut output, "{:02}", date.day()).unwrap(),
- b'd' => write!(&mut output, "{:03}", day_of_year(date).unwrap_or(1)).unwrap(),
- b'm' if count < 3 => write!(&mut output, "{:02}", date.month()).unwrap(),
- b'm' => write!(&mut output, "{}", short_month_name(date.month()).unwrap()).unwrap(),
- b'y' if count >= 4 => {
+ 'd' if n < 3 => write!(&mut output, "{:02}", date.day()).unwrap(),
+ 'd' => write!(&mut output, "{:03}", day_of_year(date).unwrap_or(1)).unwrap(),
+ 'm' if n < 3 => write!(&mut output, "{:02}", date.month()).unwrap(),
+ 'm' => write!(&mut output, "{}", short_month_name(date.month()).unwrap()).unwrap(),
+ 'y' if n >= 4 => {
let year = date.year();
if year <= 9999 {
write!(&mut output, "{year:04}").unwrap();
return self.overflow(f);
}
}
- b'y' => {
+ 'y' => {
let epoch = self.settings.epoch.0;
let offset = date.year() - epoch;
if offset < 0 || offset > 99 {
}
write!(&mut output, "{offset:02}").unwrap();
}
- b'q' => write!(&mut output, "{}", date.month0() / 3 + 1).unwrap(),
- b'w' => write!(
+ 'q' => write!(&mut output, "{}", date.month0() / 3 + 1).unwrap(),
+ 'w' => write!(
&mut output,
"{:2}",
(day_of_year(date).unwrap_or(1) - 1) / 7 + 1
)
.unwrap(),
- b'D' => {
+ 'D' => {
if time < 0.0 {
output.push('-');
}
time = time.abs();
- write!(&mut output, "{:1$.0}", (time / DAY).floor(), count).unwrap();
+ write!(&mut output, "{:1$.0}", (time / DAY).floor(), n).unwrap();
time %= DAY;
}
- b'H' => {
+ 'H' => {
if time < 0.0 {
output.push('-');
}
time = time.abs();
- write!(&mut output, "{:1$.0}", (time / HOUR).floor(), count).unwrap();
+ write!(&mut output, "{:1$.0}", (time / HOUR).floor(), n).unwrap();
time %= HOUR;
}
- b'M' => {
+ 'M' => {
if time < 0.0 {
output.push('-');
}
}
}
}
- c if count == 1 => output.push(c as char),
+ c if n == 1 => output.push(c as char),
_ => unreachable!(),
}
}
}
}
- /// For time and date formats, returns a template used for input and output
- /// in a field of the given `width`.
- ///
- /// `width` only affects whether a 2-digit year or a 4-digit year is used,
- /// that is, whether the returned string contains `yy` or `yyyy`, and
- /// whether seconds are included, that is, whether the returned string
- /// contains `:SS`. A caller that doesn't care whether the returned string
- /// contains `yy` or `yyyy` or `:SS` can just specify 0 to omit them.
- pub fn date_template(self, width: usize) -> Option<&'static str> {
- let (short, long) = match self {
- Self::F
- | Self::Comma
- | Self::Dot
- | Self::Dollar
- | Self::Pct
- | Self::E
- | Self::CC(_)
- | Self::N
- | Self::Z
- | Self::P
- | Self::PK
- | Self::IB
- | Self::PIB
- | Self::PIBHex
- | Self::RB
- | Self::RBHex
- | Self::WkDay
- | Self::Month
- | Self::A
- | Self::AHex => return None,
- Self::Date => ("dd-mmm-yy", "dd-mmm-yyyy"),
- Self::ADate => ("mm/dd/yy", "mm/dd/yyyy"),
- Self::EDate => ("dd.mm.yy", "dd.mm.yyyy"),
- Self::JDate => ("yyddd", "yyyyddd"),
- Self::SDate => ("yy/mm/dd", "yyyy/mm/dd"),
- Self::QYr => ("q Q yy", "q Q yyyy"),
- Self::MoYr => ("mmm yy", "mmm yyyy"),
- Self::WkYr => ("ww WK yy", "ww WK yyyy"),
- Self::DateTime => ("dd-mmm-yyyy HH:MM", "dd-mmm-yyyy HH:MM:SS"),
- Self::YMDHMS => ("yyyy-mm-dd HH:MM", "yyyy-mm-dd HH:MM:SS"),
- Self::MTime => ("MM", "MM:SS"),
- Self::Time => ("HH:MM", "HH:MM:SS"),
- Self::DTime => ("D HH:MM", "D HH:MM:SS"),
- };
- if width >= long.len() {
- Some(long)
- } else {
- Some(short)
- }
- }
-
pub fn as_string(&self) -> &'static str {
match self {
Self::F => "F",
})
}
}
+
+/// An item within a [DateTemplate].
+pub struct TemplateItem {
+ /// Character in the template.
+ pub c: char,
+
+ /// Number of repetitions of the character.
+ pub n: usize,
+}
+
+/// A template for date and time formats.
+#[derive(Clone)]
+pub struct DateTemplate(&'static str);
+
+impl DateTemplate {
+ /// Returns a [DateTemplate] used for date and time input and output in a
+ /// field of the given `type_` and `width`.
+ ///
+ /// `width` only affects whether a 2-digit year or a 4-digit year is used,
+ /// that is, whether the returned string contains `yy` or `yyyy`, and
+ /// whether seconds are included, that is, whether the returned string
+ /// contains `:SS`. A caller that doesn't care whether the returned string
+ /// contains `yy` or `yyyy` or `:SS` can just specify 0 to omit them.
+ pub fn new(type_: Type, width: usize) -> Option<Self> {
+ let (short, long) = match type_ {
+ Type::F
+ | Type::Comma
+ | Type::Dot
+ | Type::Dollar
+ | Type::Pct
+ | Type::E
+ | Type::CC(_)
+ | Type::N
+ | Type::Z
+ | Type::P
+ | Type::PK
+ | Type::IB
+ | Type::PIB
+ | Type::PIBHex
+ | Type::RB
+ | Type::RBHex
+ | Type::WkDay
+ | Type::Month
+ | Type::A
+ | Type::AHex => return None,
+ Type::Date => ("dd-mmm-yy", "dd-mmm-yyyy"),
+ Type::ADate => ("mm/dd/yy", "mm/dd/yyyy"),
+ Type::EDate => ("dd.mm.yy", "dd.mm.yyyy"),
+ Type::JDate => ("yyddd", "yyyyddd"),
+ Type::SDate => ("yy/mm/dd", "yyyy/mm/dd"),
+ Type::QYr => ("q Q yy", "q Q yyyy"),
+ Type::MoYr => ("mmm yy", "mmm yyyy"),
+ Type::WkYr => ("ww WK yy", "ww WK yyyy"),
+ Type::DateTime => ("dd-mmm-yyyy HH:MM", "dd-mmm-yyyy HH:MM:SS"),
+ Type::YMDHMS => ("yyyy-mm-dd HH:MM", "yyyy-mm-dd HH:MM:SS"),
+ Type::MTime => ("MM", "MM:SS"),
+ Type::Time => ("HH:MM", "HH:MM:SS"),
+ Type::DTime => ("D HH:MM", "D HH:MM:SS"),
+ };
+ if width >= long.len() {
+ Some(DateTemplate(long))
+ } else {
+ Some(DateTemplate(short))
+ }
+ }
+
+ pub fn for_format(format: Format) -> Option<Self> {
+ Self::new(format.type_(), format.w())
+ }
+
+ pub fn len(&self) -> usize {
+ self.0.len()
+ }
+}
+
+impl Iterator for DateTemplate {
+ type Item = TemplateItem;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let mut iter = self.0.chars();
+ let c = iter.next()?;
+ self.0 = iter.as_str();
+ let mut n = 1;
+ while iter.next() == Some(c) {
+ self.0 = iter.as_str();
+ n += 1;
+ }
+ Some(TemplateItem { c, n })
+ }
+}
use crate::{
dictionary::Value,
- format::{Format, Settings, Type},
+ format::{DateTemplate, Format, Settings, TemplateItem, Type},
settings::{EndianSettings, Settings as PsppSettings},
};
use encoding_rs::Encoding;
pub struct ParseError {
format: Format,
input: String,
- details: ParseErrorDetails,
+ details: ParseErrorKind,
}
impl std::error::Error for ParseError {}
}
#[derive(ThisError, Clone, Debug)]
-enum ParseErrorDetails {
+enum ParseErrorKind {
/// Field contents are not numeric.
#[error("Field contents are not numeric.")]
NotNumeric,
/// Field contains unexpected non-digit.
#[error("Field contains unexpected non-digit {0:?}.")]
Nondigit(char),
+
+ /// Day must be between 1 and 31.
+ #[error("Day ({0}) must be between 1 and 31.")]
+ InvalidDay(i32),
+
+ /// Syntax error in date field.
+ #[error("Syntax error in date field.")]
+ DateSyntax,
+
+ /// Julian day must have exactly three digits.
+ #[error("Julian day must have exactly three digits (not {0}).")]
+ InvalidYDayLen(usize),
+
+ /// Julian day must be between 1 and 366, inclusive.
+ #[error("Julian day ({0}) must be between 1 and 366, inclusive.")]
+ InvalidYDay(i32),
+
+ /// Unrecognized month format.
+ #[error("Unrecognized month format. Months may be specified as Arabic or Roman numerals or as at least 3 letters of their English names.")]
+ InvalidMonth,
}
pub struct ParseValue<'a> {
})
}
- fn parse_number(&self, input: &str, type_: Type) -> Result<Value, ParseErrorDetails> {
+ /*
+ /// Parses `s`, which is encoded in `encoding`. For string formats,
+ /// `encoding` is also the output encoding.
+ fn parse_encoded(&self, s: &[u8], encoding: &'static Encoding) -> Result<Value, ParseError> {
+ if s.is_empty() {
+ return Ok(self.format.default_value());
+ }
+ match self.format.type_ {
+ Type::F | Type::Comma | Type::Dot | Type::Dollar | Type::Pct | Type::E => {
+ self.parse_number(s, self.format.type_)
+ }
+ Type::CC(_) => self.parse_number(s, Type::F),
+ Type::N => self.parse_n(s),
+ Type::Z => todo!(),
+ Type::P => todo!(),
+ Type::PK => todo!(),
+ Type::IB => todo!(),
+ Type::PIB => todo!(),
+ Type::PIBHex => todo!(),
+ Type::RB => todo!(),
+ Type::RBHex => todo!(),
+ Type::Date => todo!(),
+ Type::ADate => todo!(),
+ Type::EDate => todo!(),
+ Type::JDate => todo!(),
+ Type::SDate => todo!(),
+ Type::QYr => todo!(),
+ Type::MoYr => todo!(),
+ Type::WkYr => todo!(),
+ Type::DateTime => todo!(),
+ Type::YMDHMS => todo!(),
+ Type::MTime => todo!(),
+ Type::Time => todo!(),
+ Type::DTime => todo!(),
+ Type::WkDay => todo!(),
+ Type::Month => todo!(),
+ Type::A => todo!(),
+ Type::AHex => todo!(),
+ }
+ .map_err(|details| ParseError {
+ format: self.format,
+ input: s.into(),
+ details,
+ })
+ }
+ */
+
+ fn parse_number(&self, input: &str, type_: Type) -> Result<Value, ParseErrorKind> {
let style = self.settings.number_style(type_);
let input = input.trim();
}
input
}
- fn take<'a>(input: &'a str, rest: &'a str) -> (&'a str, &'a str) {
- (&input[..input.len() - rest.len()], rest)
- }
let (_, input) = strip_prefix(input, &*style.prefix.s);
let (sign, input) = strip_one_of(input, &['-', '+']);
let (_, input) = strip_prefix(input, &*style.suffix.s);
if !input.is_empty() {
- return Err(ParseErrorDetails::NotNumeric);
+ return Err(ParseErrorKind::NotNumeric);
}
let mut number = SmallString::<[u8; 64]>::new();
match f64::from_str(&number) {
Ok(value) => Ok(Value::Number(Some(value))),
- Err(_) => Err(ParseErrorDetails::InvalidNumericSyntax),
+ Err(_) => Err(ParseErrorKind::InvalidNumericSyntax),
}
}
- fn parse_n(&self, input: &str) -> Result<Value, ParseErrorDetails> {
+ fn parse_n(&self, input: &str) -> Result<Value, ParseErrorKind> {
match input.chars().find(|c| !c.is_ascii_digit()) {
None => Ok(Value::Number(Some(input.parse().unwrap()))),
- Some(nondigit) => Err(ParseErrorDetails::Nondigit(nondigit)),
+ Some(nondigit) => Err(ParseErrorKind::Nondigit(nondigit)),
+ }
+ }
+
+ fn parse_date(&self, input: &str) -> Result<Value, ParseErrorKind> {
+ let orig_input = input;
+ let mut input = input.trim();
+ if input.is_empty() || input == "." {
+ return Ok(Value::sysmis());
+ }
+
+ let mut day = 1;
+ let mut yday = 1;
+ let mut month = 1;
+ let mut year = None;
+
+ let mut iter = DateTemplate::for_format(self.format).unwrap();
+ let template_width = iter.len();
+ while let Some(TemplateItem { c, n }) = iter.next() {
+ match c {
+ 'd' if n < 3 => {
+ day = parse_day(&mut input)?;
+ }
+ 'd' => {
+ yday = parse_yday(&mut input)?;
+ }
+ 'm' => {
+ month = parse_month(&mut input)?;
+ }
+ 'y' => {
+ let max_digits = if !iter
+ .clone()
+ .next()
+ .is_some_and(|item| item.c.is_ascii_alphabetic())
+ {
+ usize::MAX
+ } else if orig_input.len() >= template_width + 2 {
+ 4
+ } else {
+ 2
+ };
+ //year = Some(parse_year(&mut input, max_digits)?);
+ year = Some(1);
+ }
+ _ => (),
+ }
}
+ todo!()
+ }
+}
+
+fn parse_day(s: &mut &str) -> Result<i32, ParseErrorKind> {
+ let day = parse_int::<i32>(s)?;
+ if (1..=31).contains(&day) {
+ Ok(day)
+ } else {
+ Err(ParseErrorKind::InvalidDay(day))
+ }
+}
+
+fn parse_yday(input: &mut &str) -> Result<i32, ParseErrorKind> {
+ let mut rest = *input;
+ let yday = parse_int::<i32>(&mut rest)?;
+ let yday_len = input.len() - rest.len();
+ if yday_len != 3 {
+ return Err(ParseErrorKind::InvalidYDayLen(yday_len));
+ } else if !(1..=366).contains(&yday) {
+ return Err(ParseErrorKind::InvalidYDay(yday));
+ } else {
+ *input = rest;
+ Ok(yday)
+ }
+}
+
+fn parse_month(input: &mut &str) -> Result<i32, ParseErrorKind> {
+ if input.starts_with(|c: char| c.is_ascii_digit()) {
+ let month = parse_int(input)?;
+ if (1..=12).contains(&month) {
+ return Ok(month);
+ }
+ } else {
+ let name;
+ (name, *input) = strip_name(*input);
+ let name = name.as_bytes();
+
+ static ENGLISH_NAMES: [&[u8]; 12] = [
+ b"jan", b"feb", b"mar", b"apr", b"may", b"jun", b"jul", b"aug", b"sep", b"oct", b"nov",
+ b"dec",
+ ];
+ if let Some(month) = match_name(&name[..3.min(name.len())], &ENGLISH_NAMES) {
+ return Ok(month);
+ }
+
+ static ROMAN_NAMES: [&[u8]; 12] = [
+ b"i", b"ii", b"iii", b"iv", b"v", b"vi", b"vii", b"viii", b"ix", b"x", b"xi", b"xii",
+ ];
+ if let Some(month) = match_name(&name, &ENGLISH_NAMES) {
+ return Ok(month);
+ }
+ }
+ Err(ParseErrorKind::InvalidMonth)
+}
+
+fn match_name(name: &[u8], candidates: &[&[u8]]) -> Option<i32> {
+ for (index, candidate) in candidates.iter().enumerate() {
+ if candidate.eq_ignore_ascii_case(name) {
+ return Some(index as i32 + 1);
+ }
+ }
+ None
+}
+
+fn strip_name(input: &str) -> (&str, &str) {
+ take(
+ input,
+ input.trim_start_matches(|c: char| c.is_ascii_alphabetic()),
+ )
+}
+
+fn take<'a>(input: &'a str, rest: &'a str) -> (&'a str, &'a str) {
+ (&input[..input.len() - rest.len()], rest)
+}
+
+fn parse_int<T>(input: &mut &str) -> Result<T, ParseErrorKind>
+where
+ T: FromStr,
+{
+ fn strip_prefix<'a>(input: &'a str, prefix: &str) -> (bool, &'a str) {
+ if prefix.is_empty() {
+ (false, input)
+ } else if let Some(rest) = input.strip_prefix(prefix) {
+ (true, rest.trim_start())
+ } else {
+ (false, input)
+ }
+ }
+ fn strip_one_of<'a>(input: &'a str, chars: &[char]) -> (Option<char>, &'a str) {
+ let mut iter = input.chars();
+ match iter.next() {
+ Some(c) if chars.contains(&c) => (Some(c), iter.as_str().trim_start()),
+ _ => (None, input),
+ }
+ }
+ fn strip_integer(mut input: &str, grouping: Option<char>) -> &str {
+ while let Some(rest) = input.strip_prefix(|c: char| c.is_ascii_digit()) {
+ let rest = if let Some(grouping) = grouping {
+ rest.strip_prefix(grouping).unwrap_or(rest)
+ } else {
+ rest
+ };
+ input = rest;
+ }
+ input
+ }
+
+ let (_, rest) = strip_one_of(*input, &['+', '-']);
+ let (_, rest) = take(rest, rest.trim_start_matches(|c: char| c.is_ascii_digit()));
+ let (number, rest) = take(input, rest);
+ match number.parse::<T>() {
+ Ok(value) => {
+ *input = rest;
+ Ok(value)
+ }
+ Err(_) => Err(ParseErrorKind::DateSyntax),
}
}
fs::File,
io::{BufRead, BufReader},
path::Path,
- str::from_utf8,
};
use encoding_rs::UTF_8;
use crate::{
- dictionary::Value,
format::{Format, Type},
settings::Settings,
};
fn pct() {
test("pct.txt", Type::Pct);
}
+
+ /*
+ #[test]
+ fn legacy() {
+ for i in 0..=u16::MAX {
+ let input = i.to_be_bytes();
+
+ }
+ }*/
}