};
use thiserror::Error as ThisError;
+#[derive(Clone, Debug)]
+pub enum EncodedString {
+ Encoded {
+ bytes: Vec<u8>,
+ encoding: &'static Encoding,
+ },
+ Utf8 {
+ s: String,
+ },
+}
+
+impl<'a> From<EncodedStr<'a>> for EncodedString {
+ fn from(value: EncodedStr<'a>) -> Self {
+ match value {
+ EncodedStr::Encoded { bytes, encoding } => Self::Encoded {
+ bytes: bytes.into(),
+ encoding,
+ },
+ EncodedStr::Utf8 { s } => Self::Utf8 { s: s.into() },
+ }
+ }
+}
+
pub enum EncodedStr<'a> {
Encoded {
bytes: &'a [u8],
}
}
+impl<'a> From<&'a String> for EncodedStr<'a> {
+ fn from(s: &'a String) -> Self {
+ Self::Utf8 { s: s.as_str() }
+ }
+}
+
#[derive(Clone, Debug)]
pub struct ParseError {
format: Format,
- input: String,
+ input: EncodedString,
kind: ParseErrorKind,
}
}
}
- /// Parses `s`.
+ /// Parses `input`.
///
- /// This is only appropriate if `s` was originally encoded in UTF-8
- /// Otherwise, binary formats will not yield sensible parse results, because
- /// recoding bytes from (e.g.) windows-1252 into UTF-8, and then
+ /// # Input encoding
+ ///
+ /// Be careful about the encoding of `input`. It's tempting to recode all
+ /// input into UTF-8, but this will screw up parsing of binary formats,
+ /// because recoding bytes from (e.g.) windows-1252 into UTF-8, and then
/// interpreting them as a binary number yields nonsense.
- pub fn parse(&self, s: &str) -> Result<Value, ParseError> {
- if s.is_empty() {
- return Ok(self.format.default_value());
- }
- match self.format.type_ {
- Type::F | Type::Comma | Type::Dot | Type::Dollar | Type::Pct | Type::E => {
- self.parse_number(s, self.format.type_)
- }
- Type::CC(_) => self.parse_number(s, Type::F),
- Type::N => self.parse_n(s),
- Type::Z => self.parse_z(s),
- Type::PIBHex => self.parse_pibhex(s),
- Type::RBHex => self.parse_rbhex(s),
- Type::Date
- | Type::ADate
- | Type::EDate
- | Type::JDate
- | Type::SDate
- | Type::QYr
- | Type::MoYr
- | Type::WkYr
- | Type::DateTime
- | Type::YmdHms
- | Type::MTime
- | Type::Time
- | Type::DTime => self.parse_date(s),
- Type::WkDay => self.parse_wkday(s),
- Type::Month => self.parse_month(s),
- Type::P | Type::PK | Type::IB | Type::PIB | Type::RB | Type::AHex => {
- todo!()
- }
- Type::A => Ok(Value::String(self.output_encoding.encode(s).0.into())),
- }
- .map_err(|details| ParseError {
- format: self.format,
- input: s.into(),
- kind: details,
- })
- }
-
- pub fn parse_all<'b, T>(&self, input: T) -> Result<Value, ParseError>
+ pub fn parse<'b, T>(&self, input: T) -> Result<Value, ParseError>
where
T: Into<EncodedStr<'b>>,
{
)),
Type::AHex => todo!(),
}
- .map_err(|details| ParseError {
+ .map_err(|kind| ParseError {
format: self.format,
- input: todo!(),
- kind: details,
+ input: input.into(),
+ kind,
})
}
.unwrap()
.parser(UTF_8)
.with_endian(EndianSettings::new(Endian::Big))
- .parse_all(EncodedStr::new(&raw[..], UTF_8))
+ .parse(EncodedStr::new(&raw[..], UTF_8))
.unwrap()
.as_number()
.unwrap()