-use anyhow::{anyhow, Result};
use float_next_after::NextAfter;
use num::{Bounded, Zero};
use ordered_float::OrderedFloat;
use std::{
collections::{hash_map::Entry, HashMap},
- fmt::Display,
+ error::Error as StdError,
+ fmt::{Display, Formatter, Result as FmtResult},
iter::{repeat, Peekable},
str::Chars,
};
use crate::endian::{Endian, ToBytes};
-pub fn sack(input: &str, endian: Endian) -> Result<Vec<u8>> {
- let mut lexer = Lexer::new(input, endian)?;
- while let Some(ref token) = lexer.token {
- println!("{token:?}");
- lexer.get()?;
+pub type Result<T, F = Error> = std::result::Result<T, F>;
+
+#[derive(Debug)]
+pub struct Error {
+ pub file_name: Option<String>,
+ pub line_number: Option<usize>,
+ pub message: String,
+}
+
+impl Error {
+ fn new(file_name: Option<&str>, line_number: Option<usize>, message: String) -> Error {
+ Error {
+ file_name: file_name.map(String::from),
+ line_number,
+ message,
+ }
+ }
+}
+
+impl StdError for Error {}
+
+impl Display for Error {
+ fn fmt(&self, f: &mut Formatter) -> FmtResult {
+ if let Some(ref file_name) = self.file_name {
+ write!(f, "{file_name}:")?;
+ if self.line_number.is_none() {
+ write!(f, " ")?;
+ }
+ }
+ if let Some(line_number) = self.line_number {
+ write!(f, "{line_number}: ")?;
+ }
+ write!(f, "{}", self.message)
}
+}
+pub fn sack(input: &str, input_file_name: Option<&str>, endian: Endian) -> Result<Vec<u8>> {
let mut symbol_table = HashMap::new();
- let output = _sack(input, endian, &mut symbol_table)?;
+ let output = _sack(input, input_file_name, endian, &mut symbol_table)?;
let output = if !symbol_table.is_empty() {
for (k, v) in symbol_table.iter() {
if v.is_none() {
- return Err(anyhow!("label {k} used but never defined"));
+ Err(Error::new(
+ input_file_name,
+ None,
+ format!("label {k} used but never defined"),
+ ))?
}
}
- _sack(input, endian, &mut symbol_table)?
+ _sack(input, input_file_name, endian, &mut symbol_table)?
} else {
output
};
fn _sack(
input: &str,
+ input_file_name: Option<&str>,
endian: Endian,
symbol_table: &mut HashMap<String, Option<u32>>,
) -> Result<Vec<u8>> {
- let mut lexer = Lexer::new(input, endian)?;
+ let mut lexer = Lexer::new(input, input_file_name, endian)?;
let mut output = Vec::new();
while parse_data_item(&mut lexer, &mut output, symbol_table)? {}
Ok(output)
Token::String(string) => output.extend_from_slice(string.as_bytes()),
Token::S(size) => {
let Some(Token::String(ref string)) = lexer.token else {
- return Err(anyhow!("string expected after 's{size}'"));
+ Err(lexer.error(format!("string expected after 's{size}'")))?
};
let len = string.len();
if len > size {
- return Err(anyhow!(
+ Err(lexer.error(format!(
"{len}-byte string is longer than pad length {size}"
- ));
+ )))?
}
output.extend_from_slice(string.as_bytes());
output.extend(repeat(b' ').take(size - len));
Token::Count8 => put_counted_items::<u8, 1>(lexer, "COUNT8", output, symbol_table)?,
Token::Hex => {
let Some(Token::String(ref string)) = lexer.token else {
- return Err(anyhow!("string expected after 'hex'"));
+ Err(lexer.error(String::from("string expected after 'hex'")))?
};
let mut i = string.chars();
loop {
let Some(c0) = i.next() else { return Ok(true) };
let Some(c1) = i.next() else {
- return Err(anyhow!("hex string has odd number of characters"));
+ Err(lexer.error(String::from("hex string has odd number of characters")))?
};
let (Some(digit0), Some(digit1)) = (c0.to_digit(16), c1.to_digit(16)) else {
- return Err(anyhow!("invalid digit in hex string"));
+ Err(lexer.error(String::from("invalid digit in hex string")))?
};
let byte = digit0 * 16 + digit1;
output.push(byte as u8);
Entry::Occupied(o) => {
if let Some(v) = o.get() {
if *v != value {
- return Err(anyhow!("syntax error"));
+ Err(lexer.error(String::from("syntax error")))?
}
}
}
.unwrap_or(0),
Some(Token::Integer(integer)) => integer
.try_into()
- .map_err(|msg| anyhow!("bad offset literal ({msg})"))?,
- _ => return Err(anyhow!("expecting @label or integer literal")),
+ .map_err(|msg| lexer.error(format!("bad offset literal ({msg})")))?,
+ _ => Err(lexer.error(String::from("expecting @label or integer literal")))?,
};
lexer.get()?;
} else {
value.checked_sub(operand)
}
- .ok_or_else(|| anyhow!("overflow in offset arithmetic"))?;
+ .ok_or_else(|| lexer.error(String::from("overflow in offset arithmetic")))?;
}
output.extend_from_slice(&lexer.endian.to_bytes(value));
}
if lexer.token == Some(Token::Asterisk) {
lexer.get()?;
let Token::Integer(count) = lexer.take()? else {
- return Err(anyhow!("positive integer expected after '*'"));
+ Err(lexer.error(String::from("positive integer expected after '*'")))?
};
if count < 1 {
- return Err(anyhow!("positive integer expected after '*'"));
+ Err(lexer.error(String::from("positive integer expected after '*'")))?
};
let final_len = output.len();
for _ in 1..count {
lexer.get()?;
}
Some(Token::RParen) => (),
- _ => return Err(anyhow!("';' expected")),
+ _ => Err(lexer.error(String::from("';' expected")))?,
}
Ok(true)
}
let old_size = output.len();
output.extend_from_slice(&lexer.endian.to_bytes(T::zero()));
if lexer.token != Some(Token::LParen) {
- return Err(anyhow!("'(' expected after '{name}'"));
+ Err(lexer.error(format!("'(' expected after '{name}'")))?
}
lexer.get()?;
while lexer.token != Some(Token::RParen) {
lexer.get()?;
let delta = output.len() - old_size;
let Ok(delta): Result<T, _> = delta.try_into() else {
- return Err(anyhow!("{delta} bytes is too much for '{name}'"));
+ Err(lexer.error(format!("{delta} bytes is too much for '{name}'")))?
};
let dest = &mut output[old_size..old_size + N];
dest.copy_from_slice(&lexer.endian.to_bytes(delta));
_ => None,
})? {
let Ok(integer) = integer.try_into() else {
- return Err(anyhow!(
+ Err(lexer.error(format!(
"{integer} is not in the valid range [{},{}]",
T::min_value(),
T::max_value()
- ));
+ )))?
};
output.extend_from_slice(&lexer.endian.to_bytes(integer));
n += 1;
}
if n == 0 {
- return Err(anyhow!("integer expected after '{name}'"));
+ Err(lexer.error(format!("integer expected after '{name}'")))?
}
Ok(())
}
struct Lexer<'a> {
iter: Peekable<Chars<'a>>,
token: Option<Token>,
+ input_file_name: Option<&'a str>,
line_number: usize,
endian: Endian,
}
impl<'a> Lexer<'a> {
- fn new(input: &'a str, endian: Endian) -> Result<Lexer<'a>> {
+ fn new(input: &'a str, input_file_name: Option<&'a str>, endian: Endian) -> Result<Lexer<'a>> {
let mut lexer = Lexer {
iter: input.chars().peekable(),
token: None,
+ input_file_name,
line_number: 1,
endian,
};
lexer.token = lexer.next()?;
Ok(lexer)
}
+ fn error(&self, message: String) -> Error {
+ Error::new(self.input_file_name, Some(self.line_number), message)
+ }
fn take(&mut self) -> Result<Token> {
let Some(token) = self.token.take() else {
- return Err(anyhow!("unexpected end of input"));
+ Err(self.error(String::from("unexpected end of input")))?
};
self.token = self.next()?;
Ok(token)
}
fn get(&mut self) -> Result<Option<&Token>> {
if self.token.is_none() {
- Err(anyhow!("unexpected end of input"))
+ Err(self.error(String::from("unexpected end of input")))?
} else {
self.token = self.next()?;
Ok((&self.token).into())
}
};
- let token = match c {
- c if c.is_ascii_digit() || c == '-' => {
- let mut s = String::from(c);
- while let Some(c) = self
- .iter
- .next_if(|&c| c.is_ascii_digit() || c.is_alphabetic() || c == '.')
- {
- s.push(c);
- }
+ let token =
+ match c {
+ c if c.is_ascii_digit() || c == '-' => {
+ let mut s = String::from(c);
+ while let Some(c) = self
+ .iter
+ .next_if(|&c| c.is_ascii_digit() || c.is_alphabetic() || c == '.')
+ {
+ s.push(c);
+ }
- if s == "-" {
- Token::Minus
- } else if !s.contains('.') {
- Token::Integer(
- s.parse()
- .map_err(|msg| anyhow!("bad integer literal '{s}' ({msg})"))?,
- )
- } else {
- Token::Float(
- s.parse()
- .map_err(|msg| anyhow!("bad float literal '{s}' ({msg})"))?,
- )
- }
- }
- '"' => {
- let mut s = String::new();
- loop {
- match self.iter.next() {
- None => return Err(anyhow!("end-of-file inside string")),
- Some('\n') => return Err(anyhow!("new-line inside string")),
- Some('"') => break,
- Some(c) => s.push(c),
+ if s == "-" {
+ Token::Minus
+ } else if !s.contains('.') {
+ Token::Integer(s.parse().map_err(|msg| {
+ self.error(format!("bad integer literal '{s}' ({msg})"))
+ })?)
+ } else {
+ Token::Float(s.parse().map_err(|msg| {
+ self.error(format!("bad float literal '{s}' ({msg})"))
+ })?)
}
}
- Token::String(s)
- }
- ';' => Token::Semicolon,
- '*' => Token::Asterisk,
- '+' => Token::Plus,
- '(' => Token::LParen,
- ')' => Token::RParen,
- c if c.is_alphabetic() || c == '@' || c == '_' => {
- let mut s = String::from(c);
- while let Some(c) = self
- .iter
- .next_if(|&c| c.is_ascii_digit() || c.is_alphabetic() || c == '.' || c == '_')
- {
- s.push(c);
+ '"' => {
+ let mut s = String::new();
+ loop {
+ match self.iter.next() {
+ None => Err(self.error(String::from("end-of-file inside string")))?,
+ Some('\n') => Err(self.error(String::from("new-line inside string")))?,
+ Some('"') => break,
+ Some(c) => s.push(c),
+ }
+ }
+ Token::String(s)
}
- if self.iter.next_if_eq(&':').is_some() {
- Token::Label(s)
- } else if s.starts_with('@') {
- Token::At(s)
- } else if let Some(count) = s.strip_prefix('s') {
- Token::S(
- count
- .parse()
- .map_err(|msg| anyhow!("bad counted string '{s}' ({msg})"))?,
- )
- } else {
- match &s[..] {
- "i8" => Token::I8,
- "i16" => Token::I16,
- "i64" => Token::I64,
- "SYSMIS" => Token::Float(OrderedFloat(-f64::MAX)),
- "PCSYSMIS" => Token::PcSysmis,
- "LOWEST" => Token::Float((-f64::MAX).next_after(0.0).into()),
- "HIGHEST" => Token::Float(f64::MAX.into()),
- "ENDIAN" => Token::Integer(if self.endian == Endian::Big { 1 } else { 2 }),
- "COUNT" => Token::Count,
- "COUNT8" => Token::Count8,
- "hex" => Token::Hex,
- _ => return Err(anyhow!("invalid token '{s}'")),
+ ';' => Token::Semicolon,
+ '*' => Token::Asterisk,
+ '+' => Token::Plus,
+ '(' => Token::LParen,
+ ')' => Token::RParen,
+ c if c.is_alphabetic() || c == '@' || c == '_' => {
+ let mut s = String::from(c);
+ while let Some(c) = self.iter.next_if(|&c| {
+ c.is_ascii_digit() || c.is_alphabetic() || c == '.' || c == '_'
+ }) {
+ s.push(c);
+ }
+ if self.iter.next_if_eq(&':').is_some() {
+ Token::Label(s)
+ } else if s.starts_with('@') {
+ Token::At(s)
+ } else if let Some(count) = s.strip_prefix('s') {
+ Token::S(count.parse().map_err(|msg| {
+ self.error(format!("bad counted string '{s}' ({msg})"))
+ })?)
+ } else {
+ match &s[..] {
+ "i8" => Token::I8,
+ "i16" => Token::I16,
+ "i64" => Token::I64,
+ "SYSMIS" => Token::Float(OrderedFloat(-f64::MAX)),
+ "PCSYSMIS" => Token::PcSysmis,
+ "LOWEST" => Token::Float((-f64::MAX).next_after(0.0).into()),
+ "HIGHEST" => Token::Float(f64::MAX.into()),
+ "ENDIAN" => {
+ Token::Integer(if self.endian == Endian::Big { 1 } else { 2 })
+ }
+ "COUNT" => Token::Count,
+ "COUNT8" => Token::Count8,
+ "hex" => Token::Hex,
+ _ => Err(self.error(format!("invalid token '{s}'")))?,
+ }
}
}
- }
- _ => return Err(anyhow!("invalid input byte '{c}'")),
- };
+ _ => Err(self.error(format!("invalid input byte '{c}'")))?,
+ };
Ok(Some(token))
}
}
"PSPP synthetic test file: "; i8 244; i8 245; i8 246; i8 248; s34 "";
i8 0 *3;
"#;
- let output = sack(input, Endian::Big)?;
+ let output = sack(input, None, Endian::Big)?;
HexView::new(&output).print()?;
Ok(())
}