1 use anyhow::{anyhow, Result};
2 use float_next_after::NextAfter;
3 use std::{iter::Peekable, str::Chars};
5 use crate::endian::Endian;
7 pub fn sack(input: &str, endian: Endian) -> Result<Vec<u8>> {
8 let lexer = Lexer::new(input, endian)?;
9 //let mut output = Vec::new();
36 iter: Peekable<Chars<'a>>,
43 fn new(input: &'a str, endian: Endian) -> Result<Lexer<'a>> {
44 let mut lexer = Lexer {
45 iter: input.chars().peekable(),
53 fn get(&'a mut self) -> Result<Option<&'a Token>> {
54 if self.token.is_none() {
55 Err(anyhow!("unexpected end of input"))
57 self.token = self.next()?;
58 Ok((&self.token).into())
62 fn next(&mut self) -> Result<Option<Token>> {
63 // Get the first character of the token, skipping past white space and
66 let Some(c) = self.iter.next() else {
71 match self.iter.next() {
72 None => return Ok(None),
83 } else if !c.is_whitespace() && c != '<' && c != '>' {
89 c if c.is_ascii_digit() || c == '-' => {
90 let mut s = String::from(c);
91 while let Some(c) = self
93 .next_if(|&c| c.is_ascii_digit() || c.is_alphabetic() || c == '.')
100 } else if !s.contains('.') {
103 .map_err(|msg| anyhow!("bad integer literal '{s}' ({msg})"))?,
108 .map_err(|msg| anyhow!("bad float literal '{s}' ({msg})"))?,
113 let mut s = String::from(c);
115 match self.iter.next() {
116 None => return Err(anyhow!("end-of-file inside string")),
117 Some('\n') => return Err(anyhow!("new-line inside string")),
119 Some(c) => s.push(c),
124 ';' => Token::Semicolon,
125 '*' => Token::Asterisk,
127 '(' => Token::LParen,
128 ')' => Token::RParen,
129 c if c.is_alphabetic() || c == '@' || c == '_' => {
130 let mut s = String::from(c);
131 while let Some(c) = self
133 .next_if(|&c| c.is_ascii_digit() || c.is_alphabetic() || c == '.' || c == '_')
137 if self.iter.next_if_eq(&':').is_some() {
139 } else if s.starts_with('@') {
141 } else if let Some(count) = s.strip_prefix('s') {
145 .map_err(|msg| anyhow!("bad counted string '{s}' ({msg})"))?,
152 "SYSMIS" => Token::Float(-f64::MAX),
153 "PCSYSMIS" => Token::PcSysmis,
154 "LOWEST" => Token::Float((-f64::MAX).next_after(0.0)),
155 "HIGHEST" => Token::Float(f64::MAX),
156 "ENDIAN" => Token::Integer(if self.endian == Endian::Big { 1 } else { 2 }),
157 "COUNT" => Token::Count,
158 "COUNT8" => Token::Count8,
160 _ => return Err(anyhow!("invalid token '{s}'")),
164 _ => return Err(anyhow!("invalid input byte '{c}'")),