From e09ed2b52e13125dc04a169e192b1cde759ab1e7 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 29 Jul 2023 12:21:20 -0700 Subject: [PATCH] sack making progress! --- rust/Cargo.lock | 10 ++++++ rust/Cargo.toml | 1 + rust/src/endian.rs | 29 +++++++++++++++ rust/src/sack.rs | 90 +++++++++++++++++++++++++++++++++++++++++----- 4 files changed, 122 insertions(+), 8 deletions(-) diff --git a/rust/Cargo.lock b/rust/Cargo.lock index c37218a301..b69e7e9b22 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -299,6 +299,15 @@ version = "1.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +[[package]] +name = "ordered-float" +version = "3.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fc2dbde8f8a79f2102cc474ceb0ad68e3b80b85289ea62389b60e66777e4213" +dependencies = [ + "num-traits", +] + [[package]] name = "os_str_bytes" version = "6.4.1" @@ -350,6 +359,7 @@ dependencies = [ "num", "num-derive", "num-traits", + "ordered-float", "thiserror", ] diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 0990ad1be3..20a9b21afb 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -13,6 +13,7 @@ hexplay = "0.2.1" num = "0.4.0" num-derive = "0.4.0" num-traits = "0.2.16" +ordered-float = "3.7.0" thiserror = "1.0" [[bin]] diff --git a/rust/src/endian.rs b/rust/src/endian.rs index dd562e60ef..bf861af2d1 100644 --- a/rust/src/endian.rs +++ b/rust/src/endian.rs @@ -37,6 +37,35 @@ impl Endian { pub trait ToBytes { fn to_bytes(self, value: T) -> [u8; N]; } +impl ToBytes for Endian { + fn to_bytes(self, value: i64) -> [u8; 8] { + match self { + Endian::Big => i64::to_be_bytes(value), + Endian::Little => i64::to_le_bytes(value), + } + } +} +impl ToBytes for Endian { + fn to_bytes(self, value: u32) -> [u8; 4] { + match self { + Endian::Big => u32::to_be_bytes(value), + Endian::Little => u32::to_le_bytes(value), + } + } +} +impl ToBytes for Endian { + fn to_bytes(self, value: u16) -> [u8; 2] { + match self { + Endian::Big => u16::to_be_bytes(value), + Endian::Little => u16::to_le_bytes(value), + } + } +} +impl ToBytes for Endian { + fn to_bytes(self, value: u8) -> [u8; 1] { + [value] + } +} impl ToBytes for Endian { fn to_bytes(self, value: f64) -> [u8; 8] { match self { diff --git a/rust/src/sack.rs b/rust/src/sack.rs index 6be81d8aff..70e251d08e 100644 --- a/rust/src/sack.rs +++ b/rust/src/sack.rs @@ -1,18 +1,70 @@ use anyhow::{anyhow, Result}; use float_next_after::NextAfter; -use std::{iter::Peekable, str::Chars}; +use num::Bounded; +use ordered_float::OrderedFloat; +use std::{fmt::Display, iter::Peekable, str::Chars}; -use crate::endian::Endian; +use crate::endian::{Endian, ToBytes}; pub fn sack(input: &str, endian: Endian) -> Result> { - let lexer = Lexer::new(input, endian)?; - //let mut output = Vec::new(); + let mut lexer = Lexer::new(input, endian)?; + let mut output = Vec::new(); + while parse_data_item(&mut lexer, &mut output)? {} Ok(Vec::new()) } +fn parse_data_item(lexer: &mut Lexer, output: &mut Vec) -> Result { + if lexer.token.is_none() { + return Ok(false); + }; + match lexer.take()? { + Token::Integer(integer) => output.extend_from_slice(&lexer.endian.to_bytes(integer)), + Token::Float(float) => output.extend_from_slice(&lexer.endian.to_bytes(float.0)), + Token::PcSysmis => { + output.extend_from_slice(&[0xf5, 0x1e, 0x26, 0x02, 0x8a, 0x8c, 0xed, 0xff]) + } + Token::I8 => collect_integers::(lexer, "i8", output)?, + Token::I16 => collect_integers::(lexer, "i16", output)?, + Token::I64 => collect_integers::(lexer, "i64", output)?, + _ => return Err(anyhow!("syntax error")), + } + Ok(true) +} + +fn collect_integers( + lexer: &mut Lexer, + name: &str, + output: &mut Vec, +) -> Result<()> +where + T: Bounded + Display + TryFrom + Copy, + Endian: ToBytes, +{ + let mut n = 0; + while let Some(integer) = lexer.take_if(|t| match t { + Token::Integer(integer) => Some(*integer), + _ => None, + })? { + let Ok(integer) = integer.try_into() else { + return Err(anyhow!( + "{integer} is not in the valid range [{},{}]", + T::min_value(), + T::max_value() + )); + }; + output.extend_from_slice(&lexer.endian.to_bytes(integer)); + n += 1; + } + if n == 0 { + return Err(anyhow!("integer expected after '{name}'")); + } + Ok(()) +} + +#[derive(PartialEq, Eq, Clone)] enum Token { Integer(i64), - Float(f64), + Float(OrderedFloat), PcSysmis, String(String), Semicolon, @@ -50,6 +102,28 @@ impl<'a> Lexer<'a> { lexer.next()?; Ok(lexer) } + fn take(&mut self) -> Result { + let Some(token) = self.token.take() else { + return Err(anyhow!("unexpected end of input")); + }; + self.token = self.next()?; + Ok(token) + } + fn take_if(&mut self, condition: F) -> Result> + where + F: FnOnce(&Token) -> Option, + { + let Some(ref token) = self.token else { + return Ok(None); + }; + match condition(&token) { + Some(value) => { + self.token = self.next()?; + Ok(Some(value)) + } + None => Ok(None), + } + } fn get(&'a mut self) -> Result> { if self.token.is_none() { Err(anyhow!("unexpected end of input")) @@ -149,10 +223,10 @@ impl<'a> Lexer<'a> { "i8" => Token::I8, "i16" => Token::I16, "i64" => Token::I64, - "SYSMIS" => Token::Float(-f64::MAX), + "SYSMIS" => Token::Float(OrderedFloat(-f64::MAX)), "PCSYSMIS" => Token::PcSysmis, - "LOWEST" => Token::Float((-f64::MAX).next_after(0.0)), - "HIGHEST" => Token::Float(f64::MAX), + "LOWEST" => Token::Float((-f64::MAX).next_after(0.0).into()), + "HIGHEST" => Token::Float(f64::MAX.into()), "ENDIAN" => Token::Integer(if self.endian == Endian::Big { 1 } else { 2 }), "COUNT" => Token::Count, "COUNT8" => Token::Count8, -- 2.30.2