sack making progress!
authorBen Pfaff <blp@cs.stanford.edu>
Sat, 29 Jul 2023 19:21:20 +0000 (12:21 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Sat, 29 Jul 2023 19:21:20 +0000 (12:21 -0700)
rust/Cargo.lock
rust/Cargo.toml
rust/src/endian.rs
rust/src/sack.rs

index c37218a301957c392e5b64a9a70f06eca5935568..b69e7e9b22da0528ef7017bb152b78a436bf2802 100644 (file)
@@ -299,6 +299,15 @@ version = "1.17.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"
 
+[[package]]
+name = "ordered-float"
+version = "3.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2fc2dbde8f8a79f2102cc474ceb0ad68e3b80b85289ea62389b60e66777e4213"
+dependencies = [
+ "num-traits",
+]
+
 [[package]]
 name = "os_str_bytes"
 version = "6.4.1"
@@ -350,6 +359,7 @@ dependencies = [
  "num",
  "num-derive",
  "num-traits",
+ "ordered-float",
  "thiserror",
 ]
 
index 0990ad1be38e6b4476ee79198f2ca9565b87222c..20a9b21afb24e9d8eeb4c2c611024e8ae534d0b5 100644 (file)
@@ -13,6 +13,7 @@ hexplay = "0.2.1"
 num = "0.4.0"
 num-derive = "0.4.0"
 num-traits = "0.2.16"
+ordered-float = "3.7.0"
 thiserror = "1.0"
 
 [[bin]]
index dd562e60eff7c9cf8500879f1064c41618ba5ae7..bf861af2d18765697eab1e078f5ff3fd374d43a8 100644 (file)
@@ -37,6 +37,35 @@ impl Endian {
 pub trait ToBytes<T, const N: usize> {
     fn to_bytes(self, value: T) -> [u8; N];
 }
+impl ToBytes<i64, 8> for Endian {
+    fn to_bytes(self, value: i64) -> [u8; 8] {
+        match self {
+            Endian::Big => i64::to_be_bytes(value),
+            Endian::Little => i64::to_le_bytes(value),
+        }
+    }
+}
+impl ToBytes<u32, 4> for Endian {
+    fn to_bytes(self, value: u32) -> [u8; 4] {
+        match self {
+            Endian::Big => u32::to_be_bytes(value),
+            Endian::Little => u32::to_le_bytes(value),
+        }
+    }
+}
+impl ToBytes<u16, 2> for Endian {
+    fn to_bytes(self, value: u16) -> [u8; 2] {
+        match self {
+            Endian::Big => u16::to_be_bytes(value),
+            Endian::Little => u16::to_le_bytes(value),
+        }
+    }
+}
+impl ToBytes<u8, 1> for Endian {
+    fn to_bytes(self, value: u8) -> [u8; 1] {
+        [value]
+    }
+}
 impl ToBytes<f64, 8> for Endian {
     fn to_bytes(self, value: f64) -> [u8; 8] {
         match self {
index 6be81d8aff1c285473660c7ce75745243102e12f..70e251d08e181d81944d7ce06ff24c369b0ea2b8 100644 (file)
@@ -1,18 +1,70 @@
 use anyhow::{anyhow, Result};
 use float_next_after::NextAfter;
-use std::{iter::Peekable, str::Chars};
+use num::Bounded;
+use ordered_float::OrderedFloat;
+use std::{fmt::Display, iter::Peekable, str::Chars};
 
-use crate::endian::Endian;
+use crate::endian::{Endian, ToBytes};
 
 pub fn sack(input: &str, endian: Endian) -> Result<Vec<u8>> {
-    let lexer = Lexer::new(input, endian)?;
-    //let mut output = Vec::new();
+    let mut lexer = Lexer::new(input, endian)?;
+    let mut output = Vec::new();
+    while parse_data_item(&mut lexer, &mut output)? {}
     Ok(Vec::new())
 }
 
+fn parse_data_item(lexer: &mut Lexer, output: &mut Vec<u8>) -> Result<bool> {
+    if lexer.token.is_none() {
+        return Ok(false);
+    };
+    match lexer.take()? {
+        Token::Integer(integer) => output.extend_from_slice(&lexer.endian.to_bytes(integer)),
+        Token::Float(float) => output.extend_from_slice(&lexer.endian.to_bytes(float.0)),
+        Token::PcSysmis => {
+            output.extend_from_slice(&[0xf5, 0x1e, 0x26, 0x02, 0x8a, 0x8c, 0xed, 0xff])
+        }
+        Token::I8 => collect_integers::<u8, 1>(lexer, "i8", output)?,
+        Token::I16 => collect_integers::<u16, 2>(lexer, "i16", output)?,
+        Token::I64 => collect_integers::<i64, 8>(lexer, "i64", output)?,
+        _ => return Err(anyhow!("syntax error")),
+    }
+    Ok(true)
+}
+
+fn collect_integers<T, const N: usize>(
+    lexer: &mut Lexer,
+    name: &str,
+    output: &mut Vec<u8>,
+) -> Result<()>
+where
+    T: Bounded + Display + TryFrom<i64> + Copy,
+    Endian: ToBytes<T, N>,
+{
+    let mut n = 0;
+    while let Some(integer) = lexer.take_if(|t| match t {
+        Token::Integer(integer) => Some(*integer),
+        _ => None,
+    })? {
+        let Ok(integer) = integer.try_into() else {
+            return Err(anyhow!(
+                "{integer} is not in the valid range [{},{}]",
+                T::min_value(),
+                T::max_value()
+            ));
+        };
+        output.extend_from_slice(&lexer.endian.to_bytes(integer));
+        n += 1;
+    }
+    if n == 0 {
+        return Err(anyhow!("integer expected after '{name}'"));
+    }
+    Ok(())
+}
+
+#[derive(PartialEq, Eq, Clone)]
 enum Token {
     Integer(i64),
-    Float(f64),
+    Float(OrderedFloat<f64>),
     PcSysmis,
     String(String),
     Semicolon,
@@ -50,6 +102,28 @@ impl<'a> Lexer<'a> {
         lexer.next()?;
         Ok(lexer)
     }
+    fn take(&mut self) -> Result<Token> {
+        let Some(token) = self.token.take() else {
+            return Err(anyhow!("unexpected end of input"));
+        };
+        self.token = self.next()?;
+        Ok(token)
+    }
+    fn take_if<F, T>(&mut self, condition: F) -> Result<Option<T>>
+    where
+        F: FnOnce(&Token) -> Option<T>,
+    {
+        let Some(ref token) = self.token else {
+            return Ok(None);
+        };
+        match condition(&token) {
+            Some(value) => {
+                self.token = self.next()?;
+                Ok(Some(value))
+            }
+            None => Ok(None),
+        }
+    }
     fn get(&'a mut self) -> Result<Option<&'a Token>> {
         if self.token.is_none() {
             Err(anyhow!("unexpected end of input"))
@@ -149,10 +223,10 @@ impl<'a> Lexer<'a> {
                         "i8" => Token::I8,
                         "i16" => Token::I16,
                         "i64" => Token::I64,
-                        "SYSMIS" => Token::Float(-f64::MAX),
+                        "SYSMIS" => Token::Float(OrderedFloat(-f64::MAX)),
                         "PCSYSMIS" => Token::PcSysmis,
-                        "LOWEST" => Token::Float((-f64::MAX).next_after(0.0)),
-                        "HIGHEST" => Token::Float(f64::MAX),
+                        "LOWEST" => Token::Float((-f64::MAX).next_after(0.0).into()),
+                        "HIGHEST" => Token::Float(f64::MAX.into()),
                         "ENDIAN" => Token::Integer(if self.endian == Endian::Big { 1 } else { 2 }),
                         "COUNT" => Token::Count,
                         "COUNT8" => Token::Count8,