1 use float_next_after::NextAfter;
2 use num::{Bounded, Zero};
3 use ordered_float::OrderedFloat;
5 collections::{hash_map::Entry, HashMap},
6 error::Error as StdError,
7 fmt::{Display, Formatter, Result as FmtResult},
8 iter::{repeat, Peekable},
12 use crate::endian::{Endian, ToBytes};
14 pub type Result<T, F = Error> = std::result::Result<T, F>;
18 pub file_name: Option<String>,
19 pub line_number: Option<usize>,
24 fn new(file_name: Option<&str>, line_number: Option<usize>, message: String) -> Error {
26 file_name: file_name.map(String::from),
33 impl StdError for Error {}
35 impl Display for Error {
36 fn fmt(&self, f: &mut Formatter) -> FmtResult {
37 if let Some(ref file_name) = self.file_name {
38 write!(f, "{file_name}:")?;
39 if self.line_number.is_none() {
43 if let Some(line_number) = self.line_number {
44 write!(f, "{line_number}: ")?;
46 write!(f, "{}", self.message)
50 pub fn sack(input: &str, input_file_name: Option<&str>, endian: Endian) -> Result<Vec<u8>> {
51 let mut symbol_table = HashMap::new();
52 let output = _sack(input, input_file_name, endian, &mut symbol_table)?;
53 let output = if !symbol_table.is_empty() {
54 for (k, v) in symbol_table.iter() {
59 format!("label {k} used but never defined"),
63 _sack(input, input_file_name, endian, &mut symbol_table)?
72 input_file_name: Option<&str>,
74 symbol_table: &mut HashMap<String, Option<u32>>,
75 ) -> Result<Vec<u8>> {
76 let mut lexer = Lexer::new(input, input_file_name, endian)?;
77 let mut output = Vec::new();
78 while parse_data_item(&mut lexer, &mut output, symbol_table)? {}
85 symbol_table: &mut HashMap<String, Option<u32>>,
87 if lexer.token.is_none() {
91 let initial_len = output.len();
93 Token::Integer(integer) => output.extend_from_slice(&lexer.endian.to_bytes(integer)),
94 Token::Float(float) => output.extend_from_slice(&lexer.endian.to_bytes(float.0)),
96 output.extend_from_slice(&[0xf5, 0x1e, 0x26, 0x02, 0x8a, 0x8c, 0xed, 0xff])
98 Token::I8 => put_integers::<u8, 1>(lexer, "i8", output)?,
99 Token::I16 => put_integers::<u16, 2>(lexer, "i16", output)?,
100 Token::I64 => put_integers::<i64, 8>(lexer, "i64", output)?,
101 Token::String(string) => output.extend_from_slice(string.as_bytes()),
103 let Some(Token::String(ref string)) = lexer.token else {
104 Err(lexer.error(format!("string expected after 's{size}'")))?
106 let len = string.len();
108 Err(lexer.error(format!(
109 "{len}-byte string is longer than pad length {size}"
112 output.extend_from_slice(string.as_bytes());
113 output.extend(repeat(b' ').take(size - len));
117 while lexer.token != Some(Token::RParen) {
118 parse_data_item(lexer, output, symbol_table)?;
122 Token::Count => put_counted_items::<u32, 4>(lexer, "COUNT", output, symbol_table)?,
123 Token::Count8 => put_counted_items::<u8, 1>(lexer, "COUNT8", output, symbol_table)?,
125 let Some(Token::String(ref string)) = lexer.token else {
126 Err(lexer.error(String::from("string expected after 'hex'")))?
128 let mut i = string.chars();
130 let Some(c0) = i.next() else { return Ok(true) };
131 let Some(c1) = i.next() else {
132 Err(lexer.error(String::from("hex string has odd number of characters")))?
134 let (Some(digit0), Some(digit1)) = (c0.to_digit(16), c1.to_digit(16)) else {
135 Err(lexer.error(String::from("invalid digit in hex string")))?
137 let byte = digit0 * 16 + digit1;
138 output.push(byte as u8);
141 Token::Label(name) => {
142 let value = output.len() as u32;
143 match symbol_table.entry(name) {
144 Entry::Vacant(v) => {
145 v.insert(Some(value));
147 Entry::Occupied(o) => {
148 if let Some(v) = o.get() {
150 Err(lexer.error(String::from("syntax error")))?
157 let mut value = symbol_table.entry(name).or_insert(None).unwrap_or(0);
160 let plus = match lexer.token {
161 Some(Token::Plus) => true,
162 Some(Token::Minus) => false,
167 let operand = match lexer.token {
168 Some(Token::At(ref name)) => if let Some(value) = symbol_table.get(name) {
171 symbol_table.insert(name.clone(), None);
175 Some(Token::Integer(integer)) => integer
177 .map_err(|msg| lexer.error(format!("bad offset literal ({msg})")))?,
178 _ => Err(lexer.error(String::from("expecting @label or integer literal")))?,
183 value.checked_add(operand)
185 value.checked_sub(operand)
187 .ok_or_else(|| lexer.error(String::from("overflow in offset arithmetic")))?;
189 output.extend_from_slice(&lexer.endian.to_bytes(value));
193 if lexer.token == Some(Token::Asterisk) {
195 let Token::Integer(count) = lexer.take()? else {
196 Err(lexer.error(String::from("positive integer expected after '*'")))?
199 Err(lexer.error(String::from("positive integer expected after '*'")))?
201 let final_len = output.len();
203 output.extend_from_within(initial_len..final_len);
207 Some(Token::Semicolon) => {
210 Some(Token::RParen) => (),
211 _ => Err(lexer.error(String::from("';' expected")))?,
216 fn put_counted_items<T, const N: usize>(
219 output: &mut Vec<u8>,
220 symbol_table: &mut HashMap<String, Option<u32>>,
223 T: Zero + TryFrom<usize>,
224 Endian: ToBytes<T, N>,
226 let old_size = output.len();
227 output.extend_from_slice(&lexer.endian.to_bytes(T::zero()));
228 if lexer.token != Some(Token::LParen) {
229 Err(lexer.error(format!("'(' expected after '{name}'")))?
232 while lexer.token != Some(Token::RParen) {
233 parse_data_item(lexer, output, symbol_table)?;
236 let delta = output.len() - old_size;
237 let Ok(delta): Result<T, _> = delta.try_into() else {
238 Err(lexer.error(format!("{delta} bytes is too much for '{name}'")))?
240 let dest = &mut output[old_size..old_size + N];
241 dest.copy_from_slice(&lexer.endian.to_bytes(delta));
245 fn put_integers<T, const N: usize>(
248 output: &mut Vec<u8>,
251 T: Bounded + Display + TryFrom<i64> + Copy,
252 Endian: ToBytes<T, N>,
255 while let Some(integer) = lexer.take_if(|t| match t {
256 Token::Integer(integer) => Some(*integer),
259 let Ok(integer) = integer.try_into() else {
260 Err(lexer.error(format!(
261 "{integer} is not in the valid range [{},{}]",
266 output.extend_from_slice(&lexer.endian.to_bytes(integer));
270 Err(lexer.error(format!("integer expected after '{name}'")))?
275 #[derive(PartialEq, Eq, Clone, Debug)]
278 Float(OrderedFloat<f64>),
299 iter: Peekable<Chars<'a>>,
300 token: Option<Token>,
301 input_file_name: Option<&'a str>,
307 fn new(input: &'a str, input_file_name: Option<&'a str>, endian: Endian) -> Result<Lexer<'a>> {
308 let mut lexer = Lexer {
309 iter: input.chars().peekable(),
315 lexer.token = lexer.next()?;
318 fn error(&self, message: String) -> Error {
319 Error::new(self.input_file_name, Some(self.line_number), message)
321 fn take(&mut self) -> Result<Token> {
322 let Some(token) = self.token.take() else {
323 Err(self.error(String::from("unexpected end of input")))?
325 self.token = self.next()?;
328 fn take_if<F, T>(&mut self, condition: F) -> Result<Option<T>>
330 F: FnOnce(&Token) -> Option<T>,
332 let Some(ref token) = self.token else {
335 match condition(token) {
337 self.token = self.next()?;
343 fn get(&mut self) -> Result<Option<&Token>> {
344 if self.token.is_none() {
345 Err(self.error(String::from("unexpected end of input")))?
347 self.token = self.next()?;
348 Ok((&self.token).into())
352 fn next(&mut self) -> Result<Option<Token>> {
353 // Get the first character of the token, skipping past white space and
356 let Some(c) = self.iter.next() else {
359 let c = if c == '#' {
361 match self.iter.next() {
362 None => return Ok(None),
372 self.line_number += 1
373 } else if !c.is_whitespace() && c != '<' && c != '>' {
380 c if c.is_ascii_digit() || c == '-' => {
381 let mut s = String::from(c);
382 while let Some(c) = self
384 .next_if(|&c| c.is_ascii_digit() || c.is_alphabetic() || c == '.')
391 } else if !s.contains('.') {
392 Token::Integer(s.parse().map_err(|msg| {
393 self.error(format!("bad integer literal '{s}' ({msg})"))
396 Token::Float(s.parse().map_err(|msg| {
397 self.error(format!("bad float literal '{s}' ({msg})"))
402 let mut s = String::new();
404 match self.iter.next() {
405 None => Err(self.error(String::from("end-of-file inside string")))?,
406 Some('\n') => Err(self.error(String::from("new-line inside string")))?,
408 Some(c) => s.push(c),
413 ';' => Token::Semicolon,
414 '*' => Token::Asterisk,
416 '(' => Token::LParen,
417 ')' => Token::RParen,
418 c if c.is_alphabetic() || c == '@' || c == '_' => {
419 let mut s = String::from(c);
420 while let Some(c) = self.iter.next_if(|&c| {
421 c.is_ascii_digit() || c.is_alphabetic() || c == '.' || c == '_'
425 if self.iter.next_if_eq(&':').is_some() {
427 } else if s.starts_with('@') {
429 } else if let Some(count) = s.strip_prefix('s') {
430 Token::S(count.parse().map_err(|msg| {
431 self.error(format!("bad counted string '{s}' ({msg})"))
438 "SYSMIS" => Token::Float(OrderedFloat(-f64::MAX)),
439 "PCSYSMIS" => Token::PcSysmis,
440 "LOWEST" => Token::Float((-f64::MAX).next_after(0.0).into()),
441 "HIGHEST" => Token::Float(f64::MAX.into()),
443 Token::Integer(if self.endian == Endian::Big { 1 } else { 2 })
445 "COUNT" => Token::Count,
446 "COUNT8" => Token::Count8,
448 _ => Err(self.error(format!("invalid token '{s}'")))?,
452 _ => Err(self.error(format!("invalid input byte '{c}'")))?,
460 use crate::endian::Endian;
461 use crate::sack::sack;
463 use hexplay::HexView;
466 fn basic_sack() -> Result<()> {
468 "$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file";
470 28; # Nominal case size
475 "01 Jan 11"; "20:53:52";
476 "PSPP synthetic test file: "; i8 244; i8 245; i8 246; i8 248; s34 "";
479 let output = sack(input, None, Endian::Big)?;
480 HexView::new(&output).print()?;