1 use anyhow::{anyhow, Result};
2 use float_next_after::NextAfter;
3 use num::{Bounded, Zero};
4 use ordered_float::OrderedFloat;
6 collections::{hash_map::Entry, HashMap},
8 iter::{repeat, Peekable},
12 use crate::endian::{Endian, ToBytes};
14 pub fn sack(input: &str, endian: Endian) -> Result<Vec<u8>> {
15 let mut symbol_table = HashMap::new();
16 let output = _sack(input, endian, &mut symbol_table)?;
17 let output = if !symbol_table.is_empty() {
18 for (k, v) in symbol_table.iter() {
20 return Err(anyhow!("label {k} used but never defined"));
23 _sack(input, endian, &mut symbol_table)?
33 symbol_table: &mut HashMap<String, Option<u32>>,
34 ) -> Result<Vec<u8>> {
35 let mut lexer = Lexer::new(input, endian)?;
36 let mut output = Vec::new();
37 while parse_data_item(&mut lexer, &mut output, symbol_table)? {}
44 symbol_table: &mut HashMap<String, Option<u32>>,
46 if lexer.token.is_none() {
50 let initial_len = output.len();
52 Token::Integer(integer) => output.extend_from_slice(&lexer.endian.to_bytes(integer)),
53 Token::Float(float) => output.extend_from_slice(&lexer.endian.to_bytes(float.0)),
55 output.extend_from_slice(&[0xf5, 0x1e, 0x26, 0x02, 0x8a, 0x8c, 0xed, 0xff])
57 Token::I8 => put_integers::<u8, 1>(lexer, "i8", output)?,
58 Token::I16 => put_integers::<u16, 2>(lexer, "i16", output)?,
59 Token::I64 => put_integers::<i64, 8>(lexer, "i64", output)?,
60 Token::String(string) => output.extend_from_slice(string.as_bytes()),
62 let Some(Token::String(ref string)) = lexer.token else {
63 return Err(anyhow!("string expected after 's{size}'"));
65 let len = string.len();
68 "{len}-byte string is longer than pad length {size}"
71 output.extend_from_slice(string.as_bytes());
72 output.extend(repeat(b' ').take(size - len));
76 while lexer.token != Some(Token::RParen) {
77 parse_data_item(lexer, output, symbol_table)?;
81 Token::Count => put_counted_items::<u32, 4>(lexer, "COUNT", output, symbol_table)?,
82 Token::Count8 => put_counted_items::<u8, 1>(lexer, "COUNT8", output, symbol_table)?,
84 let Some(Token::String(ref string)) = lexer.token else {
85 return Err(anyhow!("string expected after 'hex'"));
87 let mut i = string.chars();
89 let Some(c0) = i.next() else { return Ok(true) };
90 let Some(c1) = i.next() else {
91 return Err(anyhow!("hex string has odd number of characters"));
93 let (Some(digit0), Some(digit1)) = (c0.to_digit(16), c1.to_digit(16)) else {
94 return Err(anyhow!("invalid digit in hex string"));
96 let byte = digit0 * 16 + digit1;
97 output.push(byte as u8);
100 Token::Label(name) => {
101 let value = output.len() as u32;
102 match symbol_table.entry(name) {
103 Entry::Vacant(v) => {
104 v.insert(Some(value));
106 Entry::Occupied(o) => {
107 if let Some(v) = o.get() {
109 return Err(anyhow!("syntax error"));
116 let mut value = symbol_table.entry(name).or_insert(None).unwrap_or(0);
119 let plus = match lexer.token {
120 Some(Token::Plus) => true,
121 Some(Token::Minus) => false,
126 let operand = match lexer.token {
127 Some(Token::At(ref name)) => if let Some(value) = symbol_table.get(name) {
130 symbol_table.insert(name.clone(), None);
134 Some(Token::Integer(integer)) => integer
136 .map_err(|msg| anyhow!("bad offset literal ({msg})"))?,
137 _ => return Err(anyhow!("expecting @label or integer literal")),
142 value.checked_add(operand)
144 value.checked_sub(operand)
146 .ok_or_else(|| anyhow!("overflow in offset arithmetic"))?;
148 output.extend_from_slice(&lexer.endian.to_bytes(value));
152 if lexer.token == Some(Token::Asterisk) {
154 let Token::Integer(count) = lexer.take()? else {
155 return Err(anyhow!("positive integer expected after '*'"));
158 return Err(anyhow!("positive integer expected after '*'"));
160 let final_len = output.len();
162 output.extend_from_within(initial_len..final_len);
166 Some(Token::Semicolon) => {
169 Some(Token::RParen) => (),
170 _ => return Err(anyhow!("';' expected")),
175 fn put_counted_items<T, const N: usize>(
178 output: &mut Vec<u8>,
179 symbol_table: &mut HashMap<String, Option<u32>>,
182 T: Zero + TryFrom<usize>,
183 Endian: ToBytes<T, N>,
185 let old_size = output.len();
186 output.extend_from_slice(&lexer.endian.to_bytes(T::zero()));
187 if lexer.token != Some(Token::LParen) {
188 return Err(anyhow!("'(' expected after '{name}'"));
191 while lexer.token != Some(Token::RParen) {
192 parse_data_item(lexer, output, symbol_table)?;
195 let delta = output.len() - old_size;
196 let Ok(delta): Result<T, _> = delta.try_into() else {
197 return Err(anyhow!("{delta} bytes is too much for '{name}'"));
199 let dest = &mut output[old_size..old_size + N];
200 dest.copy_from_slice(&lexer.endian.to_bytes(delta));
204 fn put_integers<T, const N: usize>(
207 output: &mut Vec<u8>,
210 T: Bounded + Display + TryFrom<i64> + Copy,
211 Endian: ToBytes<T, N>,
214 while let Some(integer) = lexer.take_if(|t| match t {
215 Token::Integer(integer) => Some(*integer),
218 let Ok(integer) = integer.try_into() else {
220 "{integer} is not in the valid range [{},{}]",
225 output.extend_from_slice(&lexer.endian.to_bytes(integer));
229 return Err(anyhow!("integer expected after '{name}'"));
234 #[derive(PartialEq, Eq, Clone)]
237 Float(OrderedFloat<f64>),
258 iter: Peekable<Chars<'a>>,
259 token: Option<Token>,
265 fn new(input: &'a str, endian: Endian) -> Result<Lexer<'a>> {
266 let mut lexer = Lexer {
267 iter: input.chars().peekable(),
275 fn take(&mut self) -> Result<Token> {
276 let Some(token) = self.token.take() else {
277 return Err(anyhow!("unexpected end of input"));
279 self.token = self.next()?;
282 fn take_if<F, T>(&mut self, condition: F) -> Result<Option<T>>
284 F: FnOnce(&Token) -> Option<T>,
286 let Some(ref token) = self.token else {
289 match condition(token) {
291 self.token = self.next()?;
297 fn get(&mut self) -> Result<Option<&Token>> {
298 if self.token.is_none() {
299 Err(anyhow!("unexpected end of input"))
301 self.token = self.next()?;
302 Ok((&self.token).into())
306 fn next(&mut self) -> Result<Option<Token>> {
307 // Get the first character of the token, skipping past white space and
310 let Some(c) = self.iter.next() else {
313 let c = if c == '#' {
315 match self.iter.next() {
316 None => return Ok(None),
326 self.line_number += 1
327 } else if !c.is_whitespace() && c != '<' && c != '>' {
332 let token = match c {
333 c if c.is_ascii_digit() || c == '-' => {
334 let mut s = String::from(c);
335 while let Some(c) = self
337 .next_if(|&c| c.is_ascii_digit() || c.is_alphabetic() || c == '.')
344 } else if !s.contains('.') {
347 .map_err(|msg| anyhow!("bad integer literal '{s}' ({msg})"))?,
352 .map_err(|msg| anyhow!("bad float literal '{s}' ({msg})"))?,
357 let mut s = String::from(c);
359 match self.iter.next() {
360 None => return Err(anyhow!("end-of-file inside string")),
361 Some('\n') => return Err(anyhow!("new-line inside string")),
363 Some(c) => s.push(c),
368 ';' => Token::Semicolon,
369 '*' => Token::Asterisk,
371 '(' => Token::LParen,
372 ')' => Token::RParen,
373 c if c.is_alphabetic() || c == '@' || c == '_' => {
374 let mut s = String::from(c);
375 while let Some(c) = self
377 .next_if(|&c| c.is_ascii_digit() || c.is_alphabetic() || c == '.' || c == '_')
381 if self.iter.next_if_eq(&':').is_some() {
383 } else if s.starts_with('@') {
385 } else if let Some(count) = s.strip_prefix('s') {
389 .map_err(|msg| anyhow!("bad counted string '{s}' ({msg})"))?,
396 "SYSMIS" => Token::Float(OrderedFloat(-f64::MAX)),
397 "PCSYSMIS" => Token::PcSysmis,
398 "LOWEST" => Token::Float((-f64::MAX).next_after(0.0).into()),
399 "HIGHEST" => Token::Float(f64::MAX.into()),
400 "ENDIAN" => Token::Integer(if self.endian == Endian::Big { 1 } else { 2 }),
401 "COUNT" => Token::Count,
402 "COUNT8" => Token::Count8,
404 _ => return Err(anyhow!("invalid token '{s}'")),
408 _ => return Err(anyhow!("invalid input byte '{c}'")),