dictionary::Value,
endian::{Endian, Parse},
format::{DateTemplate, Format, Settings, TemplateItem, Type},
+ raw::{EncodedStr, EncodedString},
settings::{EndianSettings, Settings as PsppSettings},
};
use encoding_rs::Encoding;
use smallstr::SmallString;
use std::{
- borrow::Cow,
fmt::{Display, Write},
str::FromStr,
};
use thiserror::Error as ThisError;
-#[derive(Clone, Debug)]
-pub enum EncodedString {
- Encoded {
- bytes: Vec<u8>,
- encoding: &'static Encoding,
- },
- Utf8 {
- s: String,
- },
-}
-
-impl<'a> From<EncodedStr<'a>> for EncodedString {
- fn from(value: EncodedStr<'a>) -> Self {
- match value {
- EncodedStr::Encoded { bytes, encoding } => Self::Encoded {
- bytes: bytes.into(),
- encoding,
- },
- EncodedStr::Utf8 { s } => Self::Utf8 { s: s.into() },
- }
- }
-}
-
-pub enum EncodedStr<'a> {
- Encoded {
- bytes: &'a [u8],
- encoding: &'static Encoding,
- },
- Utf8 {
- s: &'a str,
- },
-}
-
-impl<'a> EncodedStr<'a> {
- pub fn new(bytes: &'a [u8], encoding: &'static Encoding) -> Self {
- Self::Encoded { bytes, encoding }
- }
- pub fn as_str(&self) -> Cow<'_, str> {
- match self {
- EncodedStr::Encoded { bytes, encoding } => {
- encoding.decode_without_bom_handling(&bytes).0
- }
- EncodedStr::Utf8 { s } => Cow::from(*s),
- }
- }
- pub fn as_bytes(&self) -> &[u8] {
- match self {
- EncodedStr::Encoded { bytes, .. } => bytes,
- EncodedStr::Utf8 { s } => s.as_bytes(),
- }
- }
- pub fn to_encoding(&self, encoding: &'static Encoding) -> Cow<[u8]> {
- match self {
- EncodedStr::Encoded { bytes, encoding } => {
- let utf8 = encoding.decode_without_bom_handling(bytes).0;
- match encoding.encode(&utf8).0 {
- Cow::Borrowed(_) => {
- // Recoding into UTF-8 and then back did not change anything.
- Cow::from(*bytes)
- }
- Cow::Owned(owned) => Cow::Owned(owned),
- }
- }
- EncodedStr::Utf8 { s } => encoding.encode(s).0,
- }
- }
- pub fn is_empty(&self) -> bool {
- match self {
- EncodedStr::Encoded { bytes, .. } => bytes.is_empty(),
- EncodedStr::Utf8 { s } => s.is_empty(),
- }
- }
-}
-
-impl<'a> From<&'a str> for EncodedStr<'a> {
- fn from(s: &'a str) -> Self {
- Self::Utf8 { s }
- }
-}
-
-impl<'a> From<&'a String> for EncodedStr<'a> {
- fn from(s: &'a String) -> Self {
- Self::Utf8 { s: s.as_str() }
- }
-}
-
#[derive(Clone, Debug)]
pub struct ParseError {
format: Format,
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
- "{:?} cannot be parsed as {}: {}",
- &self.input, &self.format, &self.kind
+ "{} cannot be parsed as {}: {}",
+ self.input.borrowed().quoted(),
+ &self.format,
+ &self.kind
)
}
}
-#[derive(ThisError, Clone, Debug)]
+#[derive(ThisError, Clone, Debug, PartialEq, Eq)]
enum ParseErrorKind {
/// Field contents are not numeric.
#[error("Field contents are not numeric.")]
#[error("Field contains unexpected non-hex digit {0:?}.")]
NonHexDigit(char),
+ /// Field contains odd number of hex digits.
+ #[error("Field contains {0:?} hex digits but only an even number is allowed.")]
+ OddLength(usize),
+
/// Field contains invalid BCD digit.
#[error("Field contains invalid BCD digit ({0:?}).")]
NonBDCDigit(u8),
+ /// Invalid BCD sign.
+ #[error("Invalid BCD sign. 0x{0:x}.")]
+ InvalidBCDSign(u8),
+
/// Day must be between 1 and 31.
#[error("Day ({0}) must be between 1 and 31.")]
InvalidDay(i32),
/// Invalid zoned decimal (Z) syntax.
#[error("Invalid zoned decimal (Z) syntax.")]
InvalidZ,
-
- /// Invalid BCD sign.
- #[error("Invalid BCD sign. 0x{0:x}.")]
- InvalidBCDSign(u8),
}
pub struct ParseValue<'a> {
Type::A => Ok(Value::String(
input.to_encoding(self.output_encoding).into(),
)),
- Type::AHex => todo!(),
+ Type::AHex => self.parse_ahex(&input.as_str()),
}
.map_err(|kind| ParseError {
format: self.format,
Ok(Value::Number(number))
}
+ fn parse_ahex(&self, input: &str) -> Result<Value, ParseErrorKind> {
+ let n = self.format.w() / 2;
+ let mut result = Vec::with_capacity(n);
+ let mut iter = input.chars();
+ while let Some(hi) = iter.next() {
+ let Some(lo) = iter.next() else {
+ return Err(ParseErrorKind::OddLength(input.len()));
+ };
+ let Some(hi) = hi.to_digit(16) else {
+ return Err(ParseErrorKind::NonHexDigit(hi));
+ };
+ let Some(lo) = lo.to_digit(16) else {
+ return Err(ParseErrorKind::NonHexDigit(lo));
+ };
+ result.push((hi * 16 + lo) as u8);
+ }
+ result.resize(n, 0);
+ Ok(Value::String(result.into()))
+ }
+
fn parse_hex(&self, input: &str) -> Result<Option<u64>, ParseErrorKind> {
let input = input.trim();
if input.is_empty() || input == "." {
- return Ok(None);
- }
- if let Ok(value) = u64::from_str_radix(input, 16) {
+ Ok(None)
+ } else if let Ok(value) = u64::from_str_radix(input, 16) {
Ok(Some(value))
} else {
- println!("{input:?} {:?}", u64::from_str_radix(input, 16));
let c = input.chars().find(|c| !c.is_ascii_hexdigit()).unwrap();
Err(ParseErrorKind::NonHexDigit(c))
}
dictionary::Value,
endian::Endian,
format::{
- parse::{EncodedStr, ParseError, ParseErrorKind, Sign},
+ parse::{ParseError, ParseErrorKind, Sign},
Epoch, Format, Settings as FormatSettings, Type,
},
+ raw::EncodedStr,
settings::EndianSettings,
};
}
}
}
+
+ #[test]
+ fn ahex() {
+ let parser = Format::new(Type::AHex, 16, 0).unwrap().parser(UTF_8);
+
+ // Parse correct number of hex digits.
+ assert_eq!(
+ parser
+ .parse("6162636465666768")
+ .unwrap()
+ .as_string()
+ .unwrap()
+ .as_encoded(UTF_8)
+ .as_str(),
+ "abcdefgh"
+ );
+
+ // Parse too few hex digits.
+ assert_eq!(
+ parser
+ .parse("61626364656667")
+ .unwrap()
+ .as_string()
+ .unwrap()
+ .as_encoded(UTF_8)
+ .as_str(),
+ "abcdefg\u{0}"
+ );
+
+ // Parse too many hex digits.
+ assert_eq!(
+ parser
+ .parse("616263646566676869")
+ .unwrap()
+ .as_string()
+ .unwrap()
+ .as_encoded(UTF_8)
+ .as_str(),
+ "abcdefgh"
+ );
+
+ // Non-hex digit.
+ assert_eq!(
+ parser.parse("61626364656667xyzzy").unwrap_err().kind,
+ ParseErrorKind::NonHexDigit('x')
+ );
+
+ // Odd number of hex digits.
+ assert_eq!(
+ parser.parse("616263646566676").unwrap_err().kind,
+ ParseErrorKind::OddLength(15)
+ );
+ }
}
pub fn spaces(n: usize) -> Self {
Self(std::iter::repeat_n(b' ', n).collect())
}
+ pub fn as_encoded(&self, encoding: &'static Encoding) -> EncodedStr<'_> {
+ EncodedStr::new(&self.0, encoding)
+ }
}
impl From<Cow<'_, [u8]>> for RawString {
}
}
+#[derive(Clone, Debug)]
+pub enum EncodedString {
+ Encoded {
+ bytes: Vec<u8>,
+ encoding: &'static Encoding,
+ },
+ Utf8 {
+ s: String,
+ },
+}
+
+impl EncodedString {
+ pub fn borrowed(&self) -> EncodedStr<'_> {
+ match self {
+ EncodedString::Encoded { bytes, encoding } => EncodedStr::Encoded {
+ bytes: &bytes,
+ encoding,
+ },
+ EncodedString::Utf8 { s } => EncodedStr::Utf8 { s: &s },
+ }
+ }
+}
+
+impl<'a> From<EncodedStr<'a>> for EncodedString {
+ fn from(value: EncodedStr<'a>) -> Self {
+ match value {
+ EncodedStr::Encoded { bytes, encoding } => Self::Encoded {
+ bytes: bytes.into(),
+ encoding,
+ },
+ EncodedStr::Utf8 { s } => Self::Utf8 { s: s.into() },
+ }
+ }
+}
+
+pub enum EncodedStr<'a> {
+ Encoded {
+ bytes: &'a [u8],
+ encoding: &'static Encoding,
+ },
+ Utf8 {
+ s: &'a str,
+ },
+}
+
+impl<'a> EncodedStr<'a> {
+ pub fn new(bytes: &'a [u8], encoding: &'static Encoding) -> Self {
+ Self::Encoded { bytes, encoding }
+ }
+ pub fn as_str(&self) -> Cow<'_, str> {
+ match self {
+ EncodedStr::Encoded { bytes, encoding } => {
+ encoding.decode_without_bom_handling(&bytes).0
+ }
+ EncodedStr::Utf8 { s } => Cow::from(*s),
+ }
+ }
+ pub fn as_bytes(&self) -> &[u8] {
+ match self {
+ EncodedStr::Encoded { bytes, .. } => bytes,
+ EncodedStr::Utf8 { s } => s.as_bytes(),
+ }
+ }
+ pub fn to_encoding(&self, encoding: &'static Encoding) -> Cow<[u8]> {
+ match self {
+ EncodedStr::Encoded { bytes, encoding } => {
+ let utf8 = encoding.decode_without_bom_handling(bytes).0;
+ match encoding.encode(&utf8).0 {
+ Cow::Borrowed(_) => {
+ // Recoding into UTF-8 and then back did not change anything.
+ Cow::from(*bytes)
+ }
+ Cow::Owned(owned) => Cow::Owned(owned),
+ }
+ }
+ EncodedStr::Utf8 { s } => encoding.encode(s).0,
+ }
+ }
+ pub fn is_empty(&self) -> bool {
+ match self {
+ EncodedStr::Encoded { bytes, .. } => bytes.is_empty(),
+ EncodedStr::Utf8 { s } => s.is_empty(),
+ }
+ }
+ pub fn quoted(&self) -> QuotedEncodedStr {
+ QuotedEncodedStr(self)
+ }
+}
+
+impl<'a> From<&'a str> for EncodedStr<'a> {
+ fn from(s: &'a str) -> Self {
+ Self::Utf8 { s }
+ }
+}
+
+impl<'a> From<&'a String> for EncodedStr<'a> {
+ fn from(s: &'a String) -> Self {
+ Self::Utf8 { s: s.as_str() }
+ }
+}
+
+pub struct QuotedEncodedStr<'a>(&'a EncodedStr<'a>);
+
+impl<'a> Display for QuotedEncodedStr<'a> {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{:?}", self.0.as_str())
+ }
+}
+
#[derive(Clone, Debug)]
pub struct ValueLabel<V, S>
where