From 339b774ab6e5ac5ac64626c0dd30430fa918d925 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 29 Jul 2023 16:26:22 -0700 Subject: [PATCH] finish sack library --- rust/src/sack.rs | 194 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 182 insertions(+), 12 deletions(-) diff --git a/rust/src/sack.rs b/rust/src/sack.rs index 70e251d08e..41012c467e 100644 --- a/rust/src/sack.rs +++ b/rust/src/sack.rs @@ -1,37 +1,207 @@ use anyhow::{anyhow, Result}; use float_next_after::NextAfter; -use num::Bounded; +use num::{Bounded, Zero}; use ordered_float::OrderedFloat; -use std::{fmt::Display, iter::Peekable, str::Chars}; +use std::{ + collections::{hash_map::Entry, HashMap}, + fmt::Display, + iter::{repeat, Peekable}, + str::Chars, +}; use crate::endian::{Endian, ToBytes}; pub fn sack(input: &str, endian: Endian) -> Result> { + let mut symbol_table = HashMap::new(); + let output = _sack(input, endian, &mut symbol_table)?; + let output = if !symbol_table.is_empty() { + for (k, v) in symbol_table.iter() { + if v.is_none() { + return Err(anyhow!("label {k} used but never defined")); + } + } + _sack(input, endian, &mut symbol_table)? + } else { + output + }; + Ok(output) +} + +fn _sack( + input: &str, + endian: Endian, + symbol_table: &mut HashMap>, +) -> Result> { let mut lexer = Lexer::new(input, endian)?; let mut output = Vec::new(); - while parse_data_item(&mut lexer, &mut output)? {} - Ok(Vec::new()) + while parse_data_item(&mut lexer, &mut output, symbol_table)? {} + Ok(output) } -fn parse_data_item(lexer: &mut Lexer, output: &mut Vec) -> Result { +fn parse_data_item( + lexer: &mut Lexer, + output: &mut Vec, + symbol_table: &mut HashMap>, +) -> Result { if lexer.token.is_none() { return Ok(false); }; + + let initial_len = output.len(); match lexer.take()? { Token::Integer(integer) => output.extend_from_slice(&lexer.endian.to_bytes(integer)), Token::Float(float) => output.extend_from_slice(&lexer.endian.to_bytes(float.0)), Token::PcSysmis => { output.extend_from_slice(&[0xf5, 0x1e, 0x26, 0x02, 0x8a, 0x8c, 0xed, 0xff]) } - Token::I8 => collect_integers::(lexer, "i8", output)?, - Token::I16 => collect_integers::(lexer, "i16", output)?, - Token::I64 => collect_integers::(lexer, "i64", output)?, - _ => return Err(anyhow!("syntax error")), + Token::I8 => put_integers::(lexer, "i8", output)?, + Token::I16 => put_integers::(lexer, "i16", output)?, + Token::I64 => put_integers::(lexer, "i64", output)?, + Token::String(string) => output.extend_from_slice(string.as_bytes()), + Token::S(size) => { + let Some(Token::String(ref string)) = lexer.token else { + return Err(anyhow!("string expected after 's{size}'")); + }; + let len = string.len(); + if len > size { + return Err(anyhow!( + "{len}-byte string is longer than pad length {size}" + )); + } + output.extend_from_slice(string.as_bytes()); + output.extend(repeat(b' ').take(size - len)); + lexer.get()?; + } + Token::LParen => { + while lexer.token != Some(Token::RParen) { + parse_data_item(lexer, output, symbol_table)?; + } + lexer.get()?; + } + Token::Count => put_counted_items::(lexer, "COUNT", output, symbol_table)?, + Token::Count8 => put_counted_items::(lexer, "COUNT8", output, symbol_table)?, + Token::Hex => { + let Some(Token::String(ref string)) = lexer.token else { + return Err(anyhow!("string expected after 'hex'")); + }; + let mut i = string.chars(); + loop { + let Some(c0) = i.next() else { return Ok(true) }; + let Some(c1) = i.next() else { + return Err(anyhow!("hex string has odd number of characters")); + }; + let (Some(digit0), Some(digit1)) = (c0.to_digit(16), c1.to_digit(16)) else { + return Err(anyhow!("invalid digit in hex string")); + }; + let byte = digit0 * 16 + digit1; + output.push(byte as u8); + } + } + Token::Label(name) => { + let value = output.len() as u32; + match symbol_table.entry(name) { + Entry::Vacant(v) => { + v.insert(Some(value)); + } + Entry::Occupied(o) => { + if let Some(v) = o.get() { + if *v != value { + return Err(anyhow!("syntax error")); + } + } + } + }; + } + Token::At(name) => { + let mut value = symbol_table.entry(name).or_insert(None).unwrap_or(0); + lexer.get()?; + loop { + let plus = match lexer.token { + Some(Token::Plus) => true, + Some(Token::Minus) => false, + _ => break, + }; + lexer.get()?; + + let operand = match lexer.token { + Some(Token::At(ref name)) => if let Some(value) = symbol_table.get(name) { + *value + } else { + symbol_table.insert(name.clone(), None); + None + } + .unwrap_or(0), + Some(Token::Integer(integer)) => integer + .try_into() + .map_err(|msg| anyhow!("bad offset literal ({msg})"))?, + _ => return Err(anyhow!("expecting @label or integer literal")), + }; + lexer.get()?; + + value = if plus { + value.checked_add(operand) + } else { + value.checked_sub(operand) + } + .ok_or_else(|| anyhow!("overflow in offset arithmetic"))?; + } + output.extend_from_slice(&lexer.endian.to_bytes(value)); + } + _ => (), + }; + if lexer.token == Some(Token::Asterisk) { + lexer.get()?; + let Token::Integer(count) = lexer.take()? else { + return Err(anyhow!("positive integer expected after '*'")); + }; + if count < 1 { + return Err(anyhow!("positive integer expected after '*'")); + }; + let final_len = output.len(); + for _ in 1..count { + output.extend_from_within(initial_len..final_len); + } + } + match lexer.token { + Some(Token::Semicolon) => { + lexer.get()?; + } + Some(Token::RParen) => (), + _ => return Err(anyhow!("';' expected")), } Ok(true) } -fn collect_integers( +fn put_counted_items( + lexer: &mut Lexer, + name: &str, + output: &mut Vec, + symbol_table: &mut HashMap>, +) -> Result<()> +where + T: Zero + TryFrom, + Endian: ToBytes, +{ + let old_size = output.len(); + output.extend_from_slice(&lexer.endian.to_bytes(T::zero())); + if lexer.token != Some(Token::LParen) { + return Err(anyhow!("'(' expected after '{name}'")); + } + lexer.get()?; + while lexer.token != Some(Token::RParen) { + parse_data_item(lexer, output, symbol_table)?; + } + lexer.get()?; + let delta = output.len() - old_size; + let Ok(delta): Result = delta.try_into() else { + return Err(anyhow!("{delta} bytes is too much for '{name}'")); + }; + let dest = &mut output[old_size..old_size + N]; + dest.copy_from_slice(&lexer.endian.to_bytes(delta)); + Ok(()) +} + +fn put_integers( lexer: &mut Lexer, name: &str, output: &mut Vec, @@ -116,7 +286,7 @@ impl<'a> Lexer<'a> { let Some(ref token) = self.token else { return Ok(None); }; - match condition(&token) { + match condition(token) { Some(value) => { self.token = self.next()?; Ok(Some(value)) @@ -124,7 +294,7 @@ impl<'a> Lexer<'a> { None => Ok(None), } } - fn get(&'a mut self) -> Result> { + fn get(&mut self) -> Result> { if self.token.is_none() { Err(anyhow!("unexpected end of input")) } else { -- 2.30.2