calendar::{calendar_gregorian_to_offset, DateError},
dictionary::Value,
endian::{Endian, Parse},
- format::{DateTemplate, Format, Settings, TemplateItem, Type},
+ format::{DateTemplate, Decimals, Settings, TemplateItem, Type},
raw::{EncodedStr, EncodedString},
settings::{EndianSettings, Settings as PsppSettings},
};
#[derive(Clone, Debug)]
pub struct ParseError {
- format: Format,
+ type_: Type,
input: EncodedString,
kind: ParseErrorKind,
}
f,
"{} cannot be parsed as {}: {}",
self.input.borrowed().quoted(),
- &self.format,
+ &self.type_,
&self.kind
)
}
#[derive(ThisError, Clone, Debug, PartialEq, Eq)]
enum ParseErrorKind {
- /// Field contents are not numeric.
- #[error("Field contents are not numeric.")]
+ /// Input is not numeric.
+ #[error("Input is not numeric.")]
NotNumeric,
/// Invalid numeric systax.
}
pub struct ParseValue<'a> {
- format: Format,
+ type_: Type,
settings: &'a Settings,
endian: EndianSettings,
- implied_decimals: bool,
+ implied_decimals: Option<Decimals>,
output_encoding: &'static Encoding,
}
-impl Format {
+impl Type {
pub fn parser(&self, output_encoding: &'static Encoding) -> ParseValue<'static> {
ParseValue::new(*self, output_encoding)
}
}
impl ParseValue<'static> {
- pub fn new(format: Format, output_encoding: &'static Encoding) -> Self {
+ pub fn new(type_: Type, output_encoding: &'static Encoding) -> Self {
let settings = PsppSettings::global();
Self {
- format,
+ type_,
settings: &settings.formats,
endian: settings.endian,
- implied_decimals: false,
+ implied_decimals: None,
output_encoding,
}
}
pub fn with_endian(self, endian: EndianSettings) -> Self {
Self { endian, ..self }
}
- pub fn with_implied_decimals(self) -> Self {
+ pub fn with_implied_decimals(self, d: Decimals) -> Self {
Self {
- implied_decimals: true,
+ implied_decimals: if d > 0 { Some(d) } else { None },
..self
}
}
{
let input: EncodedStr = input.into();
if input.is_empty() {
- return Ok(self.format.default_value());
+ return Ok(self.type_.default_value());
}
- match self.format.type_ {
+ match self.type_ {
Type::F | Type::Comma | Type::Dot | Type::Dollar | Type::Pct | Type::E => {
- self.parse_number(&input.as_str(), self.format.type_)
+ self.parse_number(&input.as_str(), self.type_)
}
Type::CC(_) => self.parse_number(&input.as_str(), Type::F),
Type::N => self.parse_n(&input.as_str()),
Type::AHex => self.parse_ahex(&input.as_str()),
}
.map_err(|kind| ParseError {
- format: self.format,
+ type_: self.type_,
input: input.into(),
kind,
})
_ => return Err(ParseErrorKind::InvalidZ),
}
}
- if self.implied_decimals && !dot && self.format.d() != 0 {
- write!(&mut number, "e-{}", self.format.d()).unwrap();
+ match self.implied_decimals {
+ Some(d) if !dot && d > 0 => write!(&mut number, "e-{d}").unwrap(),
+ _ => (),
}
let number = number.parse::<f64>().unwrap();
let number = if sign == Some(Sign::Negative) {
}
fn apply_decimals(&self, number: f64) -> f64 {
- if self.implied_decimals && self.format.d() > 0 {
- number / 10.0f64.powi(self.format.d() as i32)
- } else {
- number
+ match self.implied_decimals {
+ Some(d) if d > 0 => number / 10.0f64.powi(d as i32),
+ _ => number,
}
}
}
fn parse_ahex(&self, input: &str) -> Result<Value, ParseErrorKind> {
- let n = self.format.w() / 2;
- let mut result = Vec::with_capacity(n);
+ let mut result = Vec::with_capacity(input.len() / 2);
let mut iter = input.chars();
while let Some(hi) = iter.next() {
let Some(lo) = iter.next() else {
};
result.push((hi * 16 + lo) as u8);
}
- result.resize(n, 0);
Ok(Value::String(result.into()))
}
let mut time_sign = None;
let mut time = 0.0;
- let mut iter = DateTemplate::new(self.format.type_, 0).unwrap();
+ let mut iter = DateTemplate::new(self.type_, 0).unwrap();
let template_width = iter.len();
while let Some(TemplateItem { c, n }) = iter.next() {
match c {
time += parse_time(&mut p)? * 60.0 * 60.0;
}
'M' => {
- if self.format.type_ == Type::MTime {
+ if self.type_ == Type::MTime {
time_sign = Some(parse_sign(&mut p, time_sign));
}
time += self.parse_minute_second(&mut p)?;
'-' | '/' | '.' => parse_date_delimiter(&mut p)?,
':' => parse_time_delimiter(&mut p)?,
' ' => {
- if self.format.type_ != Type::MoYr {
+ if self.type_ != Type::MoYr {
p.strip_ws();
} else {
parse_date_delimiter(&mut p)?
fn parse_minute_second(&self, p: &mut StrParser<'_>) -> Result<f64, ParseErrorKind> {
let minute = parse_int::<i32>(p)?;
- if self.format.type_ != Type::MTime && !(0..=59).contains(&minute) {
+ if self.type_ != Type::MTime && !(0..=59).contains(&minute) {
return Err(ParseErrorKind::InvalidMinute(minute));
}
let time = minute as f64 * 60.0;
let base = Path::new(env!("CARGO_MANIFEST_DIR")).join("src/format/testdata/parse");
let input_stream = BufReader::new(File::open(base.join("num-in.txt")).unwrap());
let expected_stream = BufReader::new(File::open(base.join(name)).unwrap());
- let format = Format::new(type_, 40, 1).unwrap();
for ((input, expected), line_number) in input_stream
.lines()
.map(|result| result.unwrap())
.zip(expected_stream.lines().map(|result| result.unwrap()))
.zip(1..)
{
- let result = format.parser(UTF_8).parse(&input);
+ let result = type_.parser(UTF_8).parse(&input);
let error = result.clone().err();
let value = result
- .unwrap_or(format.default_value())
+ .unwrap_or(type_.default_value())
.display(Format::new(Type::F, 10, 4).unwrap(), UTF_8)
.to_string();
if value != expected {
panic!(
- "parsing {input:?} as {format} failed ({name}:{line_number}):\n got: {value:?}\nexpected: {expected:?}\ndecode error: {error:?}",
+ "parsing {input:?} as {type_} failed ({name}:{line_number}):\n got: {value:?}\nexpected: {expected:?}\ndecode error: {error:?}",
);
}
}
expected + time as i64
};
let settings = FormatSettings::default().with_epoch(Epoch(1930));
- let parsed = Format::new(self.type_, 40, 0)
- .unwrap()
+ let parsed = self
+ .type_
.parser(UTF_8)
.with_settings(&settings)
.parse(&formatted)
Sign::Negative => -expected,
};
- let parsed = Format::new(self.type_, 40, 0)
- .unwrap()
+ let parsed = self
+ .type_
.parser(UTF_8)
.parse(&formatted)
.unwrap()
("sturday", None),
] {
loop {
- let parsed = Format::new(Type::WkDay, 40, 0)
- .unwrap()
+ let parsed = Type::WkDay
.parser(UTF_8)
.parse(input)
.unwrap_or(Value::Number(None))
for length in lengths {
let input = &input[..length];
- let parsed = Format::new(Type::Month, 40, 0)
- .unwrap()
+ let parsed = Type::Month
.parser(UTF_8)
.parse(input)
.unwrap_or(Value::Number(None))
.chain((0xa..=0xf).zip('A'..='F'))
.chain(std::iter::once((0, 'x')))
}
- let parser = Format::new(Type::PIBHex, 2, 0).unwrap().parser(UTF_8);
+ let parser = Type::PIBHex.parser(UTF_8);
for (a, ac) in hex_digits() {
for (b, bc) in hex_digits() {
let s = [ac, bc].into_iter().collect::<String>();
for _ in 0..10000 {
let number = random::<f64>();
let formatted = format!("{:016x}", number.to_bits());
- let parsed = Format::new(Type::RBHex, 16, 0)
- .unwrap()
+ let parsed = Type::RBHex
.parser(UTF_8)
.parse(&formatted)
.unwrap()
for _ in 0..10000 {
let number = random::<f64>();
let raw = number.to_be_bytes();
- let parsed = Format::new(Type::RB, 8, 0)
- .unwrap()
+ let parsed = Type::RB
.parser(UTF_8)
.with_endian(EndianSettings::new(Endian::Big))
.parse(EncodedStr::new(&raw[..], UTF_8))
#[test]
fn n() {
- let parser = Format::new(Type::N, 2, 0).unwrap().parser(UTF_8);
+ let parser = Type::N.parser(UTF_8);
for number in 0..=99 {
let formatted = format!("{:02}", number);
let parsed = parser
#[test]
fn z() {
- let parser = Format::new(Type::Z, 2, 0).unwrap().parser(UTF_8);
+ let parser = Type::Z.parser(UTF_8);
for number in -99i32..=99 {
for mut formatted in [
format!("{:02}", number.abs()),
}
assert_eq!(parser.parse(".").unwrap(), Value::Number(None));
- let parser = Format::new(Type::Z, 4, 1)
- .unwrap()
- .parser(UTF_8)
- .with_implied_decimals();
+ let parser = Type::Z.parser(UTF_8).with_implied_decimals(1);
for number in -999i32..=999 {
let tenths = number as f64 / 10.0;
for mut formatted in [format!("{}", number.abs()), format!("{:.1}", tenths.abs())] {
#[test]
fn ahex() {
- let parser = Format::new(Type::AHex, 16, 0).unwrap().parser(UTF_8);
+ let parser = Type::AHex.parser(UTF_8);
- // Parse correct number of hex digits.
+ // Correct.
assert_eq!(
parser
.parse("6162636465666768")
"abcdefgh"
);
- // Parse too few hex digits.
- assert_eq!(
- parser
- .parse("61626364656667")
- .unwrap()
- .as_string()
- .unwrap()
- .as_encoded(UTF_8)
- .as_str(),
- "abcdefg\u{0}"
- );
-
- // Parse too many hex digits.
- assert_eq!(
- parser
- .parse("616263646566676869")
- .unwrap()
- .as_string()
- .unwrap()
- .as_encoded(UTF_8)
- .as_str(),
- "abcdefgh"
- );
-
// Non-hex digit.
assert_eq!(
parser.parse("61626364656667xyzzy").unwrap_err().kind,