1 use anyhow::{anyhow, Result};
2 use float_next_after::NextAfter;
3 use num::{Bounded, Zero};
4 use ordered_float::OrderedFloat;
6 collections::{hash_map::Entry, HashMap},
8 iter::{repeat, Peekable},
12 use crate::endian::{Endian, ToBytes};
14 pub fn sack(input: &str, endian: Endian) -> Result<Vec<u8>> {
15 let mut lexer = Lexer::new(input, endian)?;
16 while let Some(ref token) = lexer.token {
17 println!("{token:?}");
21 let mut symbol_table = HashMap::new();
22 let output = _sack(input, endian, &mut symbol_table)?;
23 let output = if !symbol_table.is_empty() {
24 for (k, v) in symbol_table.iter() {
26 return Err(anyhow!("label {k} used but never defined"));
29 _sack(input, endian, &mut symbol_table)?
39 symbol_table: &mut HashMap<String, Option<u32>>,
40 ) -> Result<Vec<u8>> {
41 let mut lexer = Lexer::new(input, endian)?;
42 let mut output = Vec::new();
43 while parse_data_item(&mut lexer, &mut output, symbol_table)? {}
50 symbol_table: &mut HashMap<String, Option<u32>>,
52 if lexer.token.is_none() {
56 let initial_len = output.len();
58 Token::Integer(integer) => output.extend_from_slice(&lexer.endian.to_bytes(integer)),
59 Token::Float(float) => output.extend_from_slice(&lexer.endian.to_bytes(float.0)),
61 output.extend_from_slice(&[0xf5, 0x1e, 0x26, 0x02, 0x8a, 0x8c, 0xed, 0xff])
63 Token::I8 => put_integers::<u8, 1>(lexer, "i8", output)?,
64 Token::I16 => put_integers::<u16, 2>(lexer, "i16", output)?,
65 Token::I64 => put_integers::<i64, 8>(lexer, "i64", output)?,
66 Token::String(string) => output.extend_from_slice(string.as_bytes()),
68 let Some(Token::String(ref string)) = lexer.token else {
69 return Err(anyhow!("string expected after 's{size}'"));
71 let len = string.len();
74 "{len}-byte string is longer than pad length {size}"
77 output.extend_from_slice(string.as_bytes());
78 output.extend(repeat(b' ').take(size - len));
82 while lexer.token != Some(Token::RParen) {
83 parse_data_item(lexer, output, symbol_table)?;
87 Token::Count => put_counted_items::<u32, 4>(lexer, "COUNT", output, symbol_table)?,
88 Token::Count8 => put_counted_items::<u8, 1>(lexer, "COUNT8", output, symbol_table)?,
90 let Some(Token::String(ref string)) = lexer.token else {
91 return Err(anyhow!("string expected after 'hex'"));
93 let mut i = string.chars();
95 let Some(c0) = i.next() else { return Ok(true) };
96 let Some(c1) = i.next() else {
97 return Err(anyhow!("hex string has odd number of characters"));
99 let (Some(digit0), Some(digit1)) = (c0.to_digit(16), c1.to_digit(16)) else {
100 return Err(anyhow!("invalid digit in hex string"));
102 let byte = digit0 * 16 + digit1;
103 output.push(byte as u8);
106 Token::Label(name) => {
107 let value = output.len() as u32;
108 match symbol_table.entry(name) {
109 Entry::Vacant(v) => {
110 v.insert(Some(value));
112 Entry::Occupied(o) => {
113 if let Some(v) = o.get() {
115 return Err(anyhow!("syntax error"));
122 let mut value = symbol_table.entry(name).or_insert(None).unwrap_or(0);
125 let plus = match lexer.token {
126 Some(Token::Plus) => true,
127 Some(Token::Minus) => false,
132 let operand = match lexer.token {
133 Some(Token::At(ref name)) => if let Some(value) = symbol_table.get(name) {
136 symbol_table.insert(name.clone(), None);
140 Some(Token::Integer(integer)) => integer
142 .map_err(|msg| anyhow!("bad offset literal ({msg})"))?,
143 _ => return Err(anyhow!("expecting @label or integer literal")),
148 value.checked_add(operand)
150 value.checked_sub(operand)
152 .ok_or_else(|| anyhow!("overflow in offset arithmetic"))?;
154 output.extend_from_slice(&lexer.endian.to_bytes(value));
158 if lexer.token == Some(Token::Asterisk) {
160 let Token::Integer(count) = lexer.take()? else {
161 return Err(anyhow!("positive integer expected after '*'"));
164 return Err(anyhow!("positive integer expected after '*'"));
166 let final_len = output.len();
168 output.extend_from_within(initial_len..final_len);
172 Some(Token::Semicolon) => {
175 Some(Token::RParen) => (),
176 _ => return Err(anyhow!("';' expected")),
181 fn put_counted_items<T, const N: usize>(
184 output: &mut Vec<u8>,
185 symbol_table: &mut HashMap<String, Option<u32>>,
188 T: Zero + TryFrom<usize>,
189 Endian: ToBytes<T, N>,
191 let old_size = output.len();
192 output.extend_from_slice(&lexer.endian.to_bytes(T::zero()));
193 if lexer.token != Some(Token::LParen) {
194 return Err(anyhow!("'(' expected after '{name}'"));
197 while lexer.token != Some(Token::RParen) {
198 parse_data_item(lexer, output, symbol_table)?;
201 let delta = output.len() - old_size;
202 let Ok(delta): Result<T, _> = delta.try_into() else {
203 return Err(anyhow!("{delta} bytes is too much for '{name}'"));
205 let dest = &mut output[old_size..old_size + N];
206 dest.copy_from_slice(&lexer.endian.to_bytes(delta));
210 fn put_integers<T, const N: usize>(
213 output: &mut Vec<u8>,
216 T: Bounded + Display + TryFrom<i64> + Copy,
217 Endian: ToBytes<T, N>,
220 while let Some(integer) = lexer.take_if(|t| match t {
221 Token::Integer(integer) => Some(*integer),
224 let Ok(integer) = integer.try_into() else {
226 "{integer} is not in the valid range [{},{}]",
231 output.extend_from_slice(&lexer.endian.to_bytes(integer));
235 return Err(anyhow!("integer expected after '{name}'"));
240 #[derive(PartialEq, Eq, Clone, Debug)]
243 Float(OrderedFloat<f64>),
264 iter: Peekable<Chars<'a>>,
265 token: Option<Token>,
271 fn new(input: &'a str, endian: Endian) -> Result<Lexer<'a>> {
272 let mut lexer = Lexer {
273 iter: input.chars().peekable(),
278 lexer.token = lexer.next()?;
281 fn take(&mut self) -> Result<Token> {
282 let Some(token) = self.token.take() else {
283 return Err(anyhow!("unexpected end of input"));
285 self.token = self.next()?;
288 fn take_if<F, T>(&mut self, condition: F) -> Result<Option<T>>
290 F: FnOnce(&Token) -> Option<T>,
292 let Some(ref token) = self.token else {
295 match condition(token) {
297 self.token = self.next()?;
303 fn get(&mut self) -> Result<Option<&Token>> {
304 if self.token.is_none() {
305 Err(anyhow!("unexpected end of input"))
307 self.token = self.next()?;
308 Ok((&self.token).into())
312 fn next(&mut self) -> Result<Option<Token>> {
313 // Get the first character of the token, skipping past white space and
316 let Some(c) = self.iter.next() else {
319 let c = if c == '#' {
321 match self.iter.next() {
322 None => return Ok(None),
332 self.line_number += 1
333 } else if !c.is_whitespace() && c != '<' && c != '>' {
338 let token = match c {
339 c if c.is_ascii_digit() || c == '-' => {
340 let mut s = String::from(c);
341 while let Some(c) = self
343 .next_if(|&c| c.is_ascii_digit() || c.is_alphabetic() || c == '.')
350 } else if !s.contains('.') {
353 .map_err(|msg| anyhow!("bad integer literal '{s}' ({msg})"))?,
358 .map_err(|msg| anyhow!("bad float literal '{s}' ({msg})"))?,
363 let mut s = String::new();
365 match self.iter.next() {
366 None => return Err(anyhow!("end-of-file inside string")),
367 Some('\n') => return Err(anyhow!("new-line inside string")),
369 Some(c) => s.push(c),
374 ';' => Token::Semicolon,
375 '*' => Token::Asterisk,
377 '(' => Token::LParen,
378 ')' => Token::RParen,
379 c if c.is_alphabetic() || c == '@' || c == '_' => {
380 let mut s = String::from(c);
381 while let Some(c) = self
383 .next_if(|&c| c.is_ascii_digit() || c.is_alphabetic() || c == '.' || c == '_')
387 if self.iter.next_if_eq(&':').is_some() {
389 } else if s.starts_with('@') {
391 } else if let Some(count) = s.strip_prefix('s') {
395 .map_err(|msg| anyhow!("bad counted string '{s}' ({msg})"))?,
402 "SYSMIS" => Token::Float(OrderedFloat(-f64::MAX)),
403 "PCSYSMIS" => Token::PcSysmis,
404 "LOWEST" => Token::Float((-f64::MAX).next_after(0.0).into()),
405 "HIGHEST" => Token::Float(f64::MAX.into()),
406 "ENDIAN" => Token::Integer(if self.endian == Endian::Big { 1 } else { 2 }),
407 "COUNT" => Token::Count,
408 "COUNT8" => Token::Count8,
410 _ => return Err(anyhow!("invalid token '{s}'")),
414 _ => return Err(anyhow!("invalid input byte '{c}'")),
422 use crate::endian::Endian;
423 use crate::sack::sack;
425 use hexplay::HexView;
428 fn basic_sack() -> Result<()> {
430 "$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file";
432 28; # Nominal case size
437 "01 Jan 11"; "20:53:52";
438 "PSPP synthetic test file: "; i8 244; i8 245; i8 246; i8 248; s34 "";
441 let output = sack(input, Endian::Big)?;
442 HexView::new(&output).print()?;