1 use anyhow::{anyhow, Result};
2 use float_next_after::NextAfter;
4 use ordered_float::OrderedFloat;
5 use std::{fmt::Display, iter::Peekable, str::Chars};
7 use crate::endian::{Endian, ToBytes};
9 pub fn sack(input: &str, endian: Endian) -> Result<Vec<u8>> {
10 let mut lexer = Lexer::new(input, endian)?;
11 let mut output = Vec::new();
12 while parse_data_item(&mut lexer, &mut output)? {}
16 fn parse_data_item(lexer: &mut Lexer, output: &mut Vec<u8>) -> Result<bool> {
17 if lexer.token.is_none() {
21 Token::Integer(integer) => output.extend_from_slice(&lexer.endian.to_bytes(integer)),
22 Token::Float(float) => output.extend_from_slice(&lexer.endian.to_bytes(float.0)),
24 output.extend_from_slice(&[0xf5, 0x1e, 0x26, 0x02, 0x8a, 0x8c, 0xed, 0xff])
26 Token::I8 => collect_integers::<u8, 1>(lexer, "i8", output)?,
27 Token::I16 => collect_integers::<u16, 2>(lexer, "i16", output)?,
28 Token::I64 => collect_integers::<i64, 8>(lexer, "i64", output)?,
29 _ => return Err(anyhow!("syntax error")),
34 fn collect_integers<T, const N: usize>(
40 T: Bounded + Display + TryFrom<i64> + Copy,
41 Endian: ToBytes<T, N>,
44 while let Some(integer) = lexer.take_if(|t| match t {
45 Token::Integer(integer) => Some(*integer),
48 let Ok(integer) = integer.try_into() else {
50 "{integer} is not in the valid range [{},{}]",
55 output.extend_from_slice(&lexer.endian.to_bytes(integer));
59 return Err(anyhow!("integer expected after '{name}'"));
64 #[derive(PartialEq, Eq, Clone)]
67 Float(OrderedFloat<f64>),
88 iter: Peekable<Chars<'a>>,
95 fn new(input: &'a str, endian: Endian) -> Result<Lexer<'a>> {
96 let mut lexer = Lexer {
97 iter: input.chars().peekable(),
105 fn take(&mut self) -> Result<Token> {
106 let Some(token) = self.token.take() else {
107 return Err(anyhow!("unexpected end of input"));
109 self.token = self.next()?;
112 fn take_if<F, T>(&mut self, condition: F) -> Result<Option<T>>
114 F: FnOnce(&Token) -> Option<T>,
116 let Some(ref token) = self.token else {
119 match condition(&token) {
121 self.token = self.next()?;
127 fn get(&'a mut self) -> Result<Option<&'a Token>> {
128 if self.token.is_none() {
129 Err(anyhow!("unexpected end of input"))
131 self.token = self.next()?;
132 Ok((&self.token).into())
136 fn next(&mut self) -> Result<Option<Token>> {
137 // Get the first character of the token, skipping past white space and
140 let Some(c) = self.iter.next() else {
143 let c = if c == '#' {
145 match self.iter.next() {
146 None => return Ok(None),
156 self.line_number += 1
157 } else if !c.is_whitespace() && c != '<' && c != '>' {
162 let token = match c {
163 c if c.is_ascii_digit() || c == '-' => {
164 let mut s = String::from(c);
165 while let Some(c) = self
167 .next_if(|&c| c.is_ascii_digit() || c.is_alphabetic() || c == '.')
174 } else if !s.contains('.') {
177 .map_err(|msg| anyhow!("bad integer literal '{s}' ({msg})"))?,
182 .map_err(|msg| anyhow!("bad float literal '{s}' ({msg})"))?,
187 let mut s = String::from(c);
189 match self.iter.next() {
190 None => return Err(anyhow!("end-of-file inside string")),
191 Some('\n') => return Err(anyhow!("new-line inside string")),
193 Some(c) => s.push(c),
198 ';' => Token::Semicolon,
199 '*' => Token::Asterisk,
201 '(' => Token::LParen,
202 ')' => Token::RParen,
203 c if c.is_alphabetic() || c == '@' || c == '_' => {
204 let mut s = String::from(c);
205 while let Some(c) = self
207 .next_if(|&c| c.is_ascii_digit() || c.is_alphabetic() || c == '.' || c == '_')
211 if self.iter.next_if_eq(&':').is_some() {
213 } else if s.starts_with('@') {
215 } else if let Some(count) = s.strip_prefix('s') {
219 .map_err(|msg| anyhow!("bad counted string '{s}' ({msg})"))?,
226 "SYSMIS" => Token::Float(OrderedFloat(-f64::MAX)),
227 "PCSYSMIS" => Token::PcSysmis,
228 "LOWEST" => Token::Float((-f64::MAX).next_after(0.0).into()),
229 "HIGHEST" => Token::Float(f64::MAX.into()),
230 "ENDIAN" => Token::Integer(if self.endian == Endian::Big { 1 } else { 2 }),
231 "COUNT" => Token::Count,
232 "COUNT8" => Token::Count8,
234 _ => return Err(anyhow!("invalid token '{s}'")),
238 _ => return Err(anyhow!("invalid input byte '{c}'")),