From: Ben Pfaff Date: Thu, 3 Apr 2025 23:08:12 +0000 (-0700) Subject: RB tests X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a7619b24791cc67a630b10b0df5cf36c046b4f7d;p=pspp RB tests --- diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 181a98afc8..e7ff05b88e 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -548,6 +548,18 @@ dependencies = [ "slab", ] +[[package]] +name = "getrandom" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasi 0.14.2+wasi-0.2.4", +] + [[package]] name = "gimli" version = "0.29.0" @@ -752,7 +764,7 @@ checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" dependencies = [ "hermit-abi 0.3.9", "libc", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", "windows-sys 0.52.0", ] @@ -931,6 +943,15 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + [[package]] name = "proc-macro2" version = "1.0.86" @@ -972,6 +993,7 @@ dependencies = [ "ordered-float", "pspp-derive", "quick-xml", + "rand", "serde", "smallstr", "smallvec", @@ -1022,6 +1044,42 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" + +[[package]] +name = "rand" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94" +dependencies = [ + "rand_chacha", + "rand_core", + "zerocopy", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +dependencies = [ + "getrandom", +] + [[package]] name = "redox_syscall" version = "0.5.3" @@ -1476,6 +1534,15 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasi" +version = "0.14.2+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +dependencies = [ + "wit-bindgen-rt", +] + [[package]] name = "wasm-bindgen" version = "0.2.93" @@ -1715,3 +1782,32 @@ name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "wit-bindgen-rt" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" +dependencies = [ + "bitflags 2.6.0", +] + +[[package]] +name = "zerocopy" +version = "0.8.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2586fea28e186957ef732a5f8b3be2da217d65c5969d4b1e17f973ebbe876879" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a996a8f63c5c4448cd959ac1bab0aaa3306ccfd060472f85943ee0750f0169be" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] diff --git a/rust/pspp/Cargo.toml b/rust/pspp/Cargo.toml index e563b43c58..4b1015b212 100644 --- a/rust/pspp/Cargo.toml +++ b/rust/pspp/Cargo.toml @@ -62,3 +62,4 @@ harness = false [dev-dependencies] diff = "0.1.13" +rand = "0.9.0" diff --git a/rust/pspp/src/endian.rs b/rust/pspp/src/endian.rs index d35b7daead..dc94b6d32e 100644 --- a/rust/pspp/src/endian.rs +++ b/rust/pspp/src/endian.rs @@ -109,7 +109,7 @@ impl ToBytes for Endian { } } -/// Parses an `N`-byte slice in one of the supported formats into native format +/// Parses an `N`-byte array in one of the supported formats into native format /// as type `T`. pub trait Parse { /// Given 'bytes', returns `T`. @@ -193,3 +193,17 @@ impl Parse, 8> for Endian { (number != -f64::MAX).then_some(number) } } +impl Parse for Endian { + fn parse(self, bytes: [u8; 4]) -> f32 { + match self { + Endian::Big => f32::from_be_bytes(bytes), + Endian::Little => f32::from_le_bytes(bytes), + } + } +} +impl Parse, 4> for Endian { + fn parse(self, bytes: [u8; 4]) -> Option { + let number: f32 = self.parse(bytes); + (number != -f32::MAX).then_some(number) + } +} diff --git a/rust/pspp/src/format/display.rs b/rust/pspp/src/format/display.rs index 8ff4c41898..a8a801dfcc 100644 --- a/rust/pspp/src/format/display.rs +++ b/rust/pspp/src/format/display.rs @@ -640,9 +640,7 @@ impl<'a, 'b> DisplayValue<'a, 'b> { } else { integer }; - self.endian - .output_integer_format - .to_smallvec(integer, self.format.w()) + self.endian.output.to_smallvec(integer, self.format.w()) } fn pib(&self, number: Option) -> SmallVec<[u8; 16]> { @@ -653,14 +651,12 @@ impl<'a, 'b> DisplayValue<'a, 'b> { number }; let integer = number.abs() as u64; - self.endian - .output_integer_format - .to_smallvec(integer, self.format.w()) + self.endian.output.to_smallvec(integer, self.format.w()) } fn rb(&self, number: Option, w: usize) -> SmallVec<[u8; 16]> { let number = number.unwrap_or(-f64::MAX); - let bytes: [u8; 8] = self.endian.output_float_format.to_bytes(number); + let bytes: [u8; 8] = self.endian.output.to_bytes(number); let mut vec = SmallVec::new(); vec.extend_from_slice(&bytes); vec.resize(w, 0); @@ -1076,16 +1072,11 @@ mod test { .with_cc(CC::C, "((,[,],))".parse().unwrap()) .with_cc(CC::D, ",XXX,,-".parse().unwrap()) .with_cc(CC::E, ",,YYY,-".parse().unwrap()); - let endian = EndianSettings { - output_integer_format: Endian::Big, - output_float_format: Endian::Big, - ..EndianSettings::default() - }; + let endian = EndianSettings::new(Endian::Big); let mut value = Some(0.0); let mut value_name = String::new(); - for (line_number, line) in input.lines().map(|r| r.unwrap()).enumerate() { + for (line, line_number) in input.lines().map(|r| r.unwrap()).zip(1..) { let line = line.trim(); - let line_number = line_number + 1; let tokens = StringScanner::new(&line, Syntax::Interactive, true) .unwrapped() .collect::>(); @@ -1280,14 +1271,9 @@ mod test { let mut value = None; let mut value_name = String::new(); - let endian = EndianSettings { - output_integer_format: Endian::Big, - output_float_format: Endian::Big, - ..EndianSettings::default() - }; - for (line_number, line) in input.lines().map(|r| r.unwrap()).enumerate() { + let endian = EndianSettings::new(Endian::Big); + for (line, line_number) in input.lines().map(|r| r.unwrap()).zip(1..) { let line = line.trim(); - let line_number = line_number + 1; let tokens = StringScanner::new(&line, Syntax::Interactive, true) .unwrapped() .collect::>(); diff --git a/rust/pspp/src/format/parse.rs b/rust/pspp/src/format/parse.rs index c396df0926..dec3cc6592 100644 --- a/rust/pspp/src/format/parse.rs +++ b/rust/pspp/src/format/parse.rs @@ -1,6 +1,7 @@ use crate::{ calendar::{calendar_gregorian_to_offset, DateError}, dictionary::Value, + endian::{Endian, Parse}, format::{DateTemplate, Format, Settings, TemplateItem, Type}, settings::{EndianSettings, Settings as PsppSettings}, }; @@ -12,11 +13,16 @@ use std::{ }; use thiserror::Error as ThisError; +pub enum ParseInput { + String(String), + Bytes(Box<[u8]>), +} + #[derive(Clone, Debug)] pub struct ParseError { format: Format, input: String, - details: ParseErrorKind, + kind: ParseErrorKind, } impl std::error::Error for ParseError {} @@ -26,7 +32,7 @@ impl Display for ParseError { write!( f, "{:?} cannot be parsed as {}: {}", - &self.input, &self.format, &self.details + &self.input, &self.format, &self.kind ) } } @@ -45,6 +51,14 @@ enum ParseErrorKind { #[error("Field contains unexpected non-digit {0:?}.")] Nondigit(char), + /// Field contains unexpected non-hex digit. + #[error("Field contains unexpected non-hex digit {0:?}.")] + NonHexDigit(char), + + /// Field contains invalid BCD digit. + #[error("Field contains invalid BCD digit ({0:?}).")] + NonBDCDigit(u8), + /// Day must be between 1 and 31. #[error("Day ({0}) must be between 1 and 31.")] InvalidDay(i32), @@ -100,35 +114,54 @@ enum ParseErrorKind { /// Invalid date. #[error("{0}")] InvalidDate(#[from] DateError), + + /// Invalid zoned decimal (Z) syntax. + #[error("Invalid zoned decimal (Z) syntax.")] + InvalidZ, + + /// Invalid BCD sign. + #[error("Invalid BCD sign. 0x{0:x}.")] + InvalidBCDSign(u8), } pub struct ParseValue<'a> { format: Format, settings: &'a Settings, endian: EndianSettings, + implied_decimals: bool, } impl Format { - pub fn parser(&self) -> ParseValue { + pub fn parser(&self) -> ParseValue<'static> { ParseValue::new(*self) } } -impl<'a> ParseValue<'a> { +impl ParseValue<'static> { pub fn new(format: Format) -> Self { let settings = PsppSettings::global(); Self { format, settings: &settings.formats, endian: settings.endian, + implied_decimals: false, } } +} + +impl<'a> ParseValue<'a> { pub fn with_settings(self, settings: &'a Settings) -> Self { Self { settings, ..self } } pub fn with_endian(self, endian: EndianSettings) -> Self { Self { endian, ..self } } + pub fn with_implied_decimals(self) -> Self { + Self { + implied_decimals: true, + ..self + } + } /// Parses `s` as this format. For string formats, `encoding` specifies the /// output encoding. @@ -142,14 +175,14 @@ impl<'a> ParseValue<'a> { } Type::CC(_) => self.parse_number(s, Type::F), Type::N => self.parse_n(s), - Type::Z => todo!(), + Type::Z => self.parse_z(s), Type::P => todo!(), Type::PK => todo!(), Type::IB => todo!(), Type::PIB => todo!(), - Type::PIBHex => todo!(), + Type::PIBHex => self.parse_pibhex(s), Type::RB => todo!(), - Type::RBHex => todo!(), + Type::RBHex => self.parse_rbhex(s), Type::Date | Type::ADate | Type::EDate @@ -171,56 +204,61 @@ impl<'a> ParseValue<'a> { .map_err(|details| ParseError { format: self.format, input: s.into(), - details, + kind: details, }) } - /* - /// Parses `s`, which is encoded in `encoding`. For string formats, - /// `encoding` is also the output encoding. - fn parse_encoded(&self, s: &[u8], encoding: &'static Encoding) -> Result { - if s.is_empty() { - return Ok(self.format.default_value()); - } - match self.format.type_ { - Type::F | Type::Comma | Type::Dot | Type::Dollar | Type::Pct | Type::E => { - self.parse_number(s, self.format.type_) - } - Type::CC(_) => self.parse_number(s, Type::F), - Type::N => self.parse_n(s), - Type::Z => todo!(), - Type::P => todo!(), - Type::PK => todo!(), - Type::IB => todo!(), - Type::PIB => todo!(), - Type::PIBHex => todo!(), - Type::RB => todo!(), - Type::RBHex => todo!(), - Type::Date => todo!(), - Type::ADate => todo!(), - Type::EDate => todo!(), - Type::JDate => todo!(), - Type::SDate => todo!(), - Type::QYr => todo!(), - Type::MoYr => todo!(), - Type::WkYr => todo!(), - Type::DateTime => todo!(), - Type::YMDHMS => todo!(), - Type::MTime => todo!(), - Type::Time => todo!(), - Type::DTime => todo!(), - Type::WkDay => todo!(), - Type::Month => todo!(), - Type::A => todo!(), - Type::AHex => todo!(), - } - .map_err(|details| ParseError { - format: self.format, - input: s.into(), - details, - }) - } - */ + /// Parses `s`, which is encoded in `encoding`. For string formats, + /// `encoding` is also the output encoding. + pub fn parse_encoded( + &self, + input: &[u8], + encoding: &'static Encoding, + ) -> Result { + if input.is_empty() { + return Ok(self.format.default_value()); + } + match self.format.type_ { + Type::P => self.parse_p(input), + Type::PK => self.parse_pk(input), + Type::IB => self.parse_ib(input), + Type::PIB => self.parse_pib(input), + Type::RB => self.parse_rb(input), + Type::F + | Type::Comma + | Type::Dot + | Type::Dollar + | Type::Pct + | Type::E + | Type::CC(_) + | Type::N + | Type::Z + | Type::PIBHex + | Type::RBHex + | Type::Date + | Type::ADate + | Type::EDate + | Type::JDate + | Type::SDate + | Type::QYr + | Type::MoYr + | Type::WkYr + | Type::DateTime + | Type::YmdHms + | Type::MTime + | Type::Time + | Type::DTime + | Type::WkDay + | Type::Month => todo!(), + Type::A => todo!(), + Type::AHex => todo!(), + } + .map_err(|kind| ParseError { + format: self.format, + input: String::new(), + kind, + }) + } fn parse_number(&self, input: &str, type_: Type) -> Result { let style = self.settings.number_style(type_); @@ -302,6 +340,164 @@ impl<'a> ParseValue<'a> { } } + fn parse_z(&self, input: &str) -> Result { + let input = input.trim(); + if input.is_empty() || input == "." { + return Ok(Value::sysmis()); + } + + enum ZChar { + Digit(u32), + SignedDigit(u32, Sign), + Dot, + Invalid, + } + + impl From for ZChar { + fn from(c: char) -> Self { + match c { + '0'..='9' => ZChar::Digit(c as u32 - '0' as u32), + '{' => ZChar::SignedDigit(0, Sign::Positive), + 'A'..='I' => ZChar::SignedDigit(c as u32 - 'A' as u32 + 1, Sign::Positive), + '}' => ZChar::SignedDigit(0, Sign::Negative), + 'J'..='R' => ZChar::SignedDigit(c as u32 - 'J' as u32 + 1, Sign::Negative), + '.' => ZChar::Dot, + _ => ZChar::Invalid, + } + } + } + + let mut number = SmallString::<[u8; 40]>::new(); + let mut sign = None; + let mut dot = false; + for c in input.chars().map(ZChar::from) { + match c { + ZChar::Digit(digit) if sign.is_none() => { + number.push(char::from_digit(digit, 10).unwrap()); + } + ZChar::SignedDigit(digit, s) if sign.is_none() => { + assert!(digit < 10, "{digit}"); + number.push(char::from_digit(digit, 10).unwrap()); + sign = Some(s); + } + ZChar::Dot if !dot => { + number.push('.'); + dot = true; + } + _ => return Err(ParseErrorKind::InvalidZ), + } + } + if self.implied_decimals && !dot && self.format.d() != 0 { + write!(&mut number, "e-{}", self.format.d()).unwrap(); + } + let number = number.parse::().unwrap(); + let number = if sign == Some(Sign::Negative) { + -number + } else { + number + }; + Ok(Value::Number(Some(number))) + } + + fn parse_bcd(input: &[u8]) -> Result { + let mut value = 0; + for byte in input.into_iter().copied() { + let hi = nibble(byte >> 4)?; + let lo = nibble(byte & 0x0f)?; + value = value * 100 + hi * 10 + lo; + } + Ok(value) + } + + fn apply_decimals(&self, number: f64) -> f64 { + if self.implied_decimals && self.format.d() > 0 { + number / 10.0f64.powi(self.format.d() as i32) + } else { + number + } + } + + fn parse_pk(&self, input: &[u8]) -> Result { + let number = Self::parse_bcd(input)?; + Ok(Value::Number(Some(self.apply_decimals(number as f64)))) + } + + fn parse_p(&self, input: &[u8]) -> Result { + if input.is_empty() { + return Ok(Value::Number(None)); + }; + let (head, tail) = input.split_at(input.len() - 1); + let number = Self::parse_bcd(head)?; + let number = number * 10 + nibble(tail[0] >> 4)?; + let number = match tail[0] & 0x0f { + 0xf => number as f64, + 0xd => -(number as f64), + other => return Err(ParseErrorKind::InvalidBCDSign(other)), + }; + Ok(Value::Number(Some(self.apply_decimals(number)))) + } + + fn parse_binary(&self, input: &[u8]) -> u128 { + match self.endian.input { + Endian::Big => input.iter().fold(0, |acc, b| (acc << 8) + *b as u128), + Endian::Little => input.iter().rev().fold(0, |acc, b| (acc << 8) + *b as u128), + } + } + + fn parse_ib(&self, input: &[u8]) -> Result { + let number = self.parse_binary(input); + let sign_bit = 1 << (input.len() * 8 - 1); + let number = if (number & sign_bit) == 0 { + number as i128 + } else { + -(number.wrapping_sub(sign_bit << 1) as i128) + }; + Ok(Value::Number(Some(self.apply_decimals(number as f64)))) + } + + fn parse_pib(&self, input: &[u8]) -> Result { + let number = self.parse_binary(input); + Ok(Value::Number(Some(self.apply_decimals(number as f64)))) + } + + fn parse_rb(&self, input: &[u8]) -> Result { + let mut bytes = [0; 8]; + let len = input.len().min(8); + bytes[..len].copy_from_slice(&input[..len]); + let bits: u64 = self.endian.input.parse(bytes); + + const SYSMIS: f64 = -f64::MAX; + let number = match f64::from_bits(bits) { + SYSMIS => None, + other => Some(other), + }; + Ok(Value::Number(number)) + } + + fn parse_hex(&self, input: &str) -> Result, ParseErrorKind> { + let input = input.trim(); + if input.is_empty() || input == "." { + return Ok(None); + } + if let Ok(value) = u64::from_str_radix(input, 16) { + Ok(Some(value)) + } else { + println!("{input:?} {:?}", u64::from_str_radix(input, 16)); + let c = input.chars().find(|c| !c.is_ascii_hexdigit()).unwrap(); + Err(ParseErrorKind::NonHexDigit(c)) + } + } + + fn parse_pibhex(&self, input: &str) -> Result { + self.parse_hex(input) + .map(|value| Value::Number(value.map(|number| number as f64))) + } + + fn parse_rbhex(&self, input: &str) -> Result { + self.parse_hex(input) + .map(|value| Value::Number(value.map(|number| f64::from_bits(number)))) + } + fn parse_date(&self, input: &str) -> Result { let mut p = StrParser(input.trim()); if p.0.is_empty() || p.0 == "." { @@ -658,6 +854,64 @@ impl<'a> StrParser<'a> { } } +/* +#[derive(Copy, Clone, Debug)] +pub struct ByteParser<'a>(pub &'a [u8]); + +impl<'a> ByteParser<'a> { + pub fn new(s: &'a [u8]) -> Self { + Self(s) + } + + pub fn strip_prefix(&mut self, prefix: &'a [u8]) -> bool { + if prefix.is_empty() { + false + } else if let Some(rest) = self.0.strip_prefix(prefix) { + self.0 = rest; + true + } else { + false + } + } + + fn strip_one_of(&mut self, chars: &[char]) -> Option { + let mut iter = self.0.iter(); + match iter.next() { + Some(c) if chars.contains(&c) => { + self.0 = iter.as_str(); + Some(c) + } + _ => None, + } + } + + fn strip_matches(&mut self, f: impl Fn(char) -> bool) -> &'a [u8] { + self.advance(self.0.trim_start_matches(f)) + } + + fn strip_ws(&mut self) { + self.0 = self.0.trim_start(); + } + + fn advance(&mut self, rest: &'a [u8]) -> &'a [u8] { + let head = self.up_to(rest); + self.0 = rest; + head + } + + fn up_to(&self, rest: &'a [u8]) -> &'a [u8] { + &self.0[..self.0.len() - rest.len()] + } +}*/ + +fn nibble(b: u8) -> Result { + if b < 10 { + Ok(b as u128) + } else { + Err(ParseErrorKind::NonBDCDigit(b)) + } +} + #[cfg(test)] mod test { use std::{ @@ -667,11 +921,17 @@ mod test { }; use encoding_rs::UTF_8; + use rand::random; use crate::{ calendar::{days_in_month, is_leap_year}, dictionary::Value, - format::{parse::Sign, Epoch, Format, Settings as FormatSettings, Type}, + endian::Endian, + format::{ + parse::{ParseError, ParseErrorKind, Sign}, + Epoch, Format, Settings as FormatSettings, Type, + }, + settings::EndianSettings, }; fn test(name: &str, type_: Type) { @@ -679,13 +939,12 @@ mod test { let input_stream = BufReader::new(File::open(base.join("num-in.txt")).unwrap()); let expected_stream = BufReader::new(File::open(base.join(name)).unwrap()); let format = Format::new(type_, 40, 1).unwrap(); - for (line_number, (input, expected)) in input_stream + for ((input, expected), line_number) in input_stream .lines() .map(|result| result.unwrap()) .zip(expected_stream.lines().map(|result| result.unwrap())) - .enumerate() + .zip(1..) { - let line_number = line_number + 1; let result = format.parser().parse(&input, UTF_8); let error = result.clone().err(); let value = result @@ -1330,4 +1589,148 @@ mod test { } } } + + #[test] + fn pibhex() { + fn hex_digits() -> impl Iterator { + ((0..=9).zip('0'..='9')) + .chain((0xa..=0xf).zip('a'..='f')) + .chain((0xa..=0xf).zip('A'..='F')) + .chain(std::iter::once((0, 'x'))) + } + let parser = Format::new(Type::PIBHex, 2, 0).unwrap().parser(); + for (a, ac) in hex_digits() { + for (b, bc) in hex_digits() { + let s = [ac, bc].into_iter().collect::(); + let parsed = parser + .parse(&s, UTF_8) + .unwrap_or(Value::Number(None)) + .as_number() + .unwrap(); + let expected = if ac == 'x' || bc == 'x' { + None + } else { + Some((a * 16 + b) as f64) + }; + assert_eq!(parsed, expected); + } + } + assert_eq!(parser.parse(".", UTF_8).unwrap(), Value::Number(None)); + assert_eq!(parser.parse("", UTF_8).unwrap(), Value::Number(None)); + } + + #[test] + fn rbhex() { + for _ in 0..10000 { + let number = random::(); + let formatted = format!("{:016x}", number.to_bits()); + let parsed = Format::new(Type::RBHex, 16, 0) + .unwrap() + .parser() + .parse(&formatted, UTF_8) + .unwrap() + .as_number() + .unwrap() + .unwrap(); + assert_eq!(parsed, number, "formatted as {formatted:?}"); + } + } + + #[test] + fn rb() { + for _ in 0..10000 { + let number = random::(); + let raw = number.to_be_bytes(); + let parsed = Format::new(Type::RB, 8, 0) + .unwrap() + .parser() + .with_endian(EndianSettings::new(Endian::Big)) + .parse_encoded(&raw[..], UTF_8) + .unwrap() + .as_number() + .unwrap() + .unwrap(); + assert_eq!(parsed, number); + } + } + + #[test] + fn n() { + let parser = Format::new(Type::N, 2, 0).unwrap().parser(); + for number in 0..=99 { + let formatted = format!("{:02}", number); + let parsed = parser + .parse(&formatted, UTF_8) + .unwrap() + .as_number() + .unwrap() + .unwrap(); + assert_eq!(parsed, number as f64, "formatted as {formatted:?}"); + } + assert!(matches!( + parser.parse(" 0", UTF_8), + Err(ParseError { + kind: ParseErrorKind::Nondigit(' '), + .. + }) + )); + assert!(matches!( + parser.parse(".", UTF_8), + Err(ParseError { + kind: ParseErrorKind::Nondigit('.'), + .. + }) + )); + } + + #[test] + fn z() { + let parser = Format::new(Type::Z, 2, 0).unwrap().parser(); + for number in -99i32..=99 { + for mut formatted in [ + format!("{:02}", number.abs()), + format!("{:2}", number.abs()), + ] { + let last = formatted.pop().unwrap(); + let digit = last.to_digit(10).unwrap() as usize; + if number >= 0 { + formatted.push(b"{ABCDEFGHI"[digit] as char); + } else { + formatted.push(b"}JKLMNOPQR"[digit] as char); + } + let parsed = parser + .parse(&formatted, UTF_8) + .unwrap() + .as_number() + .unwrap() + .unwrap(); + assert_eq!(parsed, number as f64, "formatted as {formatted:?}"); + } + } + assert_eq!(parser.parse(".", UTF_8).unwrap(), Value::Number(None)); + + let parser = Format::new(Type::Z, 4, 1) + .unwrap() + .parser() + .with_implied_decimals(); + for number in -999i32..=999 { + let tenths = number as f64 / 10.0; + for mut formatted in [format!("{}", number.abs()), format!("{:.1}", tenths.abs())] { + let last = formatted.pop().unwrap(); + let digit = last.to_digit(10).unwrap() as usize; + if number >= 0 { + formatted.push(b"{ABCDEFGHI"[digit] as char); + } else { + formatted.push(b"}JKLMNOPQR"[digit] as char); + } + let parsed = parser + .parse(&formatted, UTF_8) + .unwrap() + .as_number() + .unwrap() + .unwrap(); + assert_eq!(parsed, tenths, "formatted as {formatted:?}"); + } + } + } } diff --git a/rust/pspp/src/settings.rs b/rust/pspp/src/settings.rs index 966f26bd5b..6bb33d7d13 100644 --- a/rust/pspp/src/settings.rs +++ b/rust/pspp/src/settings.rs @@ -40,17 +40,20 @@ impl Show { #[derive(Copy, Clone, Default, PartialEq, Eq)] pub struct EndianSettings { - /// Endianness for reading IB and PIB formats. - pub input_integer_format: Endian, + /// Endianness for reading IB, PIB, and RB formats. + pub input: Endian, - /// Endianness for reading RB and RBHEX formats. - pub input_float_format: Endian, - - /// Endianness for writing IB and PIB formats. - pub output_integer_format: Endian, + /// Endianness for writing IB, PIB, and RB formats. + pub output: Endian, +} - /// Endianness for writing RB and RBHEX formats. - pub output_float_format: Endian, +impl EndianSettings { + pub const fn new(endian: Endian) -> Self { + Self { + input: endian, + output: endian, + } + } } pub struct Settings {