Add file names and line numbers to error messages.
authorBen Pfaff <blp@cs.stanford.edu>
Sun, 30 Jul 2023 15:27:30 +0000 (08:27 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Sun, 30 Jul 2023 15:27:30 +0000 (08:27 -0700)
rust/src/sack.rs
rust/tests/sack.rs

index 1b711544441df9b7e180d0adc81d441d10241485..1e3b4fff1189e5bd96526c9442c21ddf8571f9ea 100644 (file)
@@ -1,32 +1,66 @@
-use anyhow::{anyhow, Result};
 use float_next_after::NextAfter;
 use num::{Bounded, Zero};
 use ordered_float::OrderedFloat;
 use std::{
     collections::{hash_map::Entry, HashMap},
-    fmt::Display,
+    error::Error as StdError,
+    fmt::{Display, Formatter, Result as FmtResult},
     iter::{repeat, Peekable},
     str::Chars,
 };
 
 use crate::endian::{Endian, ToBytes};
 
-pub fn sack(input: &str, endian: Endian) -> Result<Vec<u8>> {
-    let mut lexer = Lexer::new(input, endian)?;
-    while let Some(ref token) = lexer.token {
-        println!("{token:?}");
-        lexer.get()?;
+pub type Result<T, F = Error> = std::result::Result<T, F>;
+
+#[derive(Debug)]
+pub struct Error {
+    pub file_name: Option<String>,
+    pub line_number: Option<usize>,
+    pub message: String,
+}
+
+impl Error {
+    fn new(file_name: Option<&str>, line_number: Option<usize>, message: String) -> Error {
+        Error {
+            file_name: file_name.map(String::from),
+            line_number,
+            message,
+        }
+    }
+}
+
+impl StdError for Error {}
+
+impl Display for Error {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        if let Some(ref file_name) = self.file_name {
+            write!(f, "{file_name}:")?;
+            if self.line_number.is_none() {
+                write!(f, " ")?;
+            }
+        }
+        if let Some(line_number) = self.line_number {
+            write!(f, "{line_number}: ")?;
+        }
+        write!(f, "{}", self.message)
     }
+}
 
+pub fn sack(input: &str, input_file_name: Option<&str>, endian: Endian) -> Result<Vec<u8>> {
     let mut symbol_table = HashMap::new();
-    let output = _sack(input, endian, &mut symbol_table)?;
+    let output = _sack(input, input_file_name, endian, &mut symbol_table)?;
     let output = if !symbol_table.is_empty() {
         for (k, v) in symbol_table.iter() {
             if v.is_none() {
-                return Err(anyhow!("label {k} used but never defined"));
+                Err(Error::new(
+                    input_file_name,
+                    None,
+                    format!("label {k} used but never defined"),
+                ))?
             }
         }
-        _sack(input, endian, &mut symbol_table)?
+        _sack(input, input_file_name, endian, &mut symbol_table)?
     } else {
         output
     };
@@ -35,10 +69,11 @@ pub fn sack(input: &str, endian: Endian) -> Result<Vec<u8>> {
 
 fn _sack(
     input: &str,
+    input_file_name: Option<&str>,
     endian: Endian,
     symbol_table: &mut HashMap<String, Option<u32>>,
 ) -> Result<Vec<u8>> {
-    let mut lexer = Lexer::new(input, endian)?;
+    let mut lexer = Lexer::new(input, input_file_name, endian)?;
     let mut output = Vec::new();
     while parse_data_item(&mut lexer, &mut output, symbol_table)? {}
     Ok(output)
@@ -66,13 +101,13 @@ fn parse_data_item(
         Token::String(string) => output.extend_from_slice(string.as_bytes()),
         Token::S(size) => {
             let Some(Token::String(ref string)) = lexer.token else {
-                return Err(anyhow!("string expected after 's{size}'"));
+                Err(lexer.error(format!("string expected after 's{size}'")))?
             };
             let len = string.len();
             if len > size {
-                return Err(anyhow!(
+                Err(lexer.error(format!(
                     "{len}-byte string is longer than pad length {size}"
-                ));
+                )))?
             }
             output.extend_from_slice(string.as_bytes());
             output.extend(repeat(b' ').take(size - len));
@@ -88,16 +123,16 @@ fn parse_data_item(
         Token::Count8 => put_counted_items::<u8, 1>(lexer, "COUNT8", output, symbol_table)?,
         Token::Hex => {
             let Some(Token::String(ref string)) = lexer.token else {
-                return Err(anyhow!("string expected after 'hex'"));
+                Err(lexer.error(String::from("string expected after 'hex'")))?
             };
             let mut i = string.chars();
             loop {
                 let Some(c0) = i.next() else { return Ok(true) };
                 let Some(c1) = i.next() else {
-                    return Err(anyhow!("hex string has odd number of characters"));
+                    Err(lexer.error(String::from("hex string has odd number of characters")))?
                 };
                 let (Some(digit0), Some(digit1)) = (c0.to_digit(16), c1.to_digit(16)) else {
-                    return Err(anyhow!("invalid digit in hex string"));
+                    Err(lexer.error(String::from("invalid digit in hex string")))?
                 };
                 let byte = digit0 * 16 + digit1;
                 output.push(byte as u8);
@@ -112,7 +147,7 @@ fn parse_data_item(
                 Entry::Occupied(o) => {
                     if let Some(v) = o.get() {
                         if *v != value {
-                            return Err(anyhow!("syntax error"));
+                            Err(lexer.error(String::from("syntax error")))?
                         }
                     }
                 }
@@ -139,8 +174,8 @@ fn parse_data_item(
                     .unwrap_or(0),
                     Some(Token::Integer(integer)) => integer
                         .try_into()
-                        .map_err(|msg| anyhow!("bad offset literal ({msg})"))?,
-                    _ => return Err(anyhow!("expecting @label or integer literal")),
+                        .map_err(|msg| lexer.error(format!("bad offset literal ({msg})")))?,
+                    _ => Err(lexer.error(String::from("expecting @label or integer literal")))?,
                 };
                 lexer.get()?;
 
@@ -149,7 +184,7 @@ fn parse_data_item(
                 } else {
                     value.checked_sub(operand)
                 }
-                .ok_or_else(|| anyhow!("overflow in offset arithmetic"))?;
+                .ok_or_else(|| lexer.error(String::from("overflow in offset arithmetic")))?;
             }
             output.extend_from_slice(&lexer.endian.to_bytes(value));
         }
@@ -158,10 +193,10 @@ fn parse_data_item(
     if lexer.token == Some(Token::Asterisk) {
         lexer.get()?;
         let Token::Integer(count) = lexer.take()? else {
-            return Err(anyhow!("positive integer expected after '*'"));
+            Err(lexer.error(String::from("positive integer expected after '*'")))?
         };
         if count < 1 {
-            return Err(anyhow!("positive integer expected after '*'"));
+            Err(lexer.error(String::from("positive integer expected after '*'")))?
         };
         let final_len = output.len();
         for _ in 1..count {
@@ -173,7 +208,7 @@ fn parse_data_item(
             lexer.get()?;
         }
         Some(Token::RParen) => (),
-        _ => return Err(anyhow!("';' expected")),
+        _ => Err(lexer.error(String::from("';' expected")))?,
     }
     Ok(true)
 }
@@ -191,7 +226,7 @@ where
     let old_size = output.len();
     output.extend_from_slice(&lexer.endian.to_bytes(T::zero()));
     if lexer.token != Some(Token::LParen) {
-        return Err(anyhow!("'(' expected after '{name}'"));
+        Err(lexer.error(format!("'(' expected after '{name}'")))?
     }
     lexer.get()?;
     while lexer.token != Some(Token::RParen) {
@@ -200,7 +235,7 @@ where
     lexer.get()?;
     let delta = output.len() - old_size;
     let Ok(delta): Result<T, _> = delta.try_into() else {
-        return Err(anyhow!("{delta} bytes is too much for '{name}'"));
+        Err(lexer.error(format!("{delta} bytes is too much for '{name}'")))?
     };
     let dest = &mut output[old_size..old_size + N];
     dest.copy_from_slice(&lexer.endian.to_bytes(delta));
@@ -222,17 +257,17 @@ where
         _ => None,
     })? {
         let Ok(integer) = integer.try_into() else {
-            return Err(anyhow!(
+            Err(lexer.error(format!(
                 "{integer} is not in the valid range [{},{}]",
                 T::min_value(),
                 T::max_value()
-            ));
+            )))?
         };
         output.extend_from_slice(&lexer.endian.to_bytes(integer));
         n += 1;
     }
     if n == 0 {
-        return Err(anyhow!("integer expected after '{name}'"));
+        Err(lexer.error(format!("integer expected after '{name}'")))?
     }
     Ok(())
 }
@@ -263,24 +298,29 @@ enum Token {
 struct Lexer<'a> {
     iter: Peekable<Chars<'a>>,
     token: Option<Token>,
+    input_file_name: Option<&'a str>,
     line_number: usize,
     endian: Endian,
 }
 
 impl<'a> Lexer<'a> {
-    fn new(input: &'a str, endian: Endian) -> Result<Lexer<'a>> {
+    fn new(input: &'a str, input_file_name: Option<&'a str>, endian: Endian) -> Result<Lexer<'a>> {
         let mut lexer = Lexer {
             iter: input.chars().peekable(),
             token: None,
+            input_file_name,
             line_number: 1,
             endian,
         };
         lexer.token = lexer.next()?;
         Ok(lexer)
     }
+    fn error(&self, message: String) -> Error {
+        Error::new(self.input_file_name, Some(self.line_number), message)
+    }
     fn take(&mut self) -> Result<Token> {
         let Some(token) = self.token.take() else {
-            return Err(anyhow!("unexpected end of input"));
+            Err(self.error(String::from("unexpected end of input")))?
         };
         self.token = self.next()?;
         Ok(token)
@@ -302,7 +342,7 @@ impl<'a> Lexer<'a> {
     }
     fn get(&mut self) -> Result<Option<&Token>> {
         if self.token.is_none() {
-            Err(anyhow!("unexpected end of input"))
+            Err(self.error(String::from("unexpected end of input")))?
         } else {
             self.token = self.next()?;
             Ok((&self.token).into())
@@ -335,84 +375,82 @@ impl<'a> Lexer<'a> {
             }
         };
 
-        let token = match c {
-            c if c.is_ascii_digit() || c == '-' => {
-                let mut s = String::from(c);
-                while let Some(c) = self
-                    .iter
-                    .next_if(|&c| c.is_ascii_digit() || c.is_alphabetic() || c == '.')
-                {
-                    s.push(c);
-                }
+        let token =
+            match c {
+                c if c.is_ascii_digit() || c == '-' => {
+                    let mut s = String::from(c);
+                    while let Some(c) = self
+                        .iter
+                        .next_if(|&c| c.is_ascii_digit() || c.is_alphabetic() || c == '.')
+                    {
+                        s.push(c);
+                    }
 
-                if s == "-" {
-                    Token::Minus
-                } else if !s.contains('.') {
-                    Token::Integer(
-                        s.parse()
-                            .map_err(|msg| anyhow!("bad integer literal '{s}' ({msg})"))?,
-                    )
-                } else {
-                    Token::Float(
-                        s.parse()
-                            .map_err(|msg| anyhow!("bad float literal '{s}' ({msg})"))?,
-                    )
-                }
-            }
-            '"' => {
-                let mut s = String::new();
-                loop {
-                    match self.iter.next() {
-                        None => return Err(anyhow!("end-of-file inside string")),
-                        Some('\n') => return Err(anyhow!("new-line inside string")),
-                        Some('"') => break,
-                        Some(c) => s.push(c),
+                    if s == "-" {
+                        Token::Minus
+                    } else if !s.contains('.') {
+                        Token::Integer(s.parse().map_err(|msg| {
+                            self.error(format!("bad integer literal '{s}' ({msg})"))
+                        })?)
+                    } else {
+                        Token::Float(s.parse().map_err(|msg| {
+                            self.error(format!("bad float literal '{s}' ({msg})"))
+                        })?)
                     }
                 }
-                Token::String(s)
-            }
-            ';' => Token::Semicolon,
-            '*' => Token::Asterisk,
-            '+' => Token::Plus,
-            '(' => Token::LParen,
-            ')' => Token::RParen,
-            c if c.is_alphabetic() || c == '@' || c == '_' => {
-                let mut s = String::from(c);
-                while let Some(c) = self
-                    .iter
-                    .next_if(|&c| c.is_ascii_digit() || c.is_alphabetic() || c == '.' || c == '_')
-                {
-                    s.push(c);
+                '"' => {
+                    let mut s = String::new();
+                    loop {
+                        match self.iter.next() {
+                            None => Err(self.error(String::from("end-of-file inside string")))?,
+                            Some('\n') => Err(self.error(String::from("new-line inside string")))?,
+                            Some('"') => break,
+                            Some(c) => s.push(c),
+                        }
+                    }
+                    Token::String(s)
                 }
-                if self.iter.next_if_eq(&':').is_some() {
-                    Token::Label(s)
-                } else if s.starts_with('@') {
-                    Token::At(s)
-                } else if let Some(count) = s.strip_prefix('s') {
-                    Token::S(
-                        count
-                            .parse()
-                            .map_err(|msg| anyhow!("bad counted string '{s}' ({msg})"))?,
-                    )
-                } else {
-                    match &s[..] {
-                        "i8" => Token::I8,
-                        "i16" => Token::I16,
-                        "i64" => Token::I64,
-                        "SYSMIS" => Token::Float(OrderedFloat(-f64::MAX)),
-                        "PCSYSMIS" => Token::PcSysmis,
-                        "LOWEST" => Token::Float((-f64::MAX).next_after(0.0).into()),
-                        "HIGHEST" => Token::Float(f64::MAX.into()),
-                        "ENDIAN" => Token::Integer(if self.endian == Endian::Big { 1 } else { 2 }),
-                        "COUNT" => Token::Count,
-                        "COUNT8" => Token::Count8,
-                        "hex" => Token::Hex,
-                        _ => return Err(anyhow!("invalid token '{s}'")),
+                ';' => Token::Semicolon,
+                '*' => Token::Asterisk,
+                '+' => Token::Plus,
+                '(' => Token::LParen,
+                ')' => Token::RParen,
+                c if c.is_alphabetic() || c == '@' || c == '_' => {
+                    let mut s = String::from(c);
+                    while let Some(c) = self.iter.next_if(|&c| {
+                        c.is_ascii_digit() || c.is_alphabetic() || c == '.' || c == '_'
+                    }) {
+                        s.push(c);
+                    }
+                    if self.iter.next_if_eq(&':').is_some() {
+                        Token::Label(s)
+                    } else if s.starts_with('@') {
+                        Token::At(s)
+                    } else if let Some(count) = s.strip_prefix('s') {
+                        Token::S(count.parse().map_err(|msg| {
+                            self.error(format!("bad counted string '{s}' ({msg})"))
+                        })?)
+                    } else {
+                        match &s[..] {
+                            "i8" => Token::I8,
+                            "i16" => Token::I16,
+                            "i64" => Token::I64,
+                            "SYSMIS" => Token::Float(OrderedFloat(-f64::MAX)),
+                            "PCSYSMIS" => Token::PcSysmis,
+                            "LOWEST" => Token::Float((-f64::MAX).next_after(0.0).into()),
+                            "HIGHEST" => Token::Float(f64::MAX.into()),
+                            "ENDIAN" => {
+                                Token::Integer(if self.endian == Endian::Big { 1 } else { 2 })
+                            }
+                            "COUNT" => Token::Count,
+                            "COUNT8" => Token::Count8,
+                            "hex" => Token::Hex,
+                            _ => Err(self.error(format!("invalid token '{s}'")))?,
+                        }
                     }
                 }
-            }
-            _ => return Err(anyhow!("invalid input byte '{c}'")),
-        };
+                _ => Err(self.error(format!("invalid input byte '{c}'")))?,
+            };
         Ok(Some(token))
     }
 }
@@ -438,7 +476,7 @@ mod test {
 "PSPP synthetic test file: "; i8 244; i8 245; i8 246; i8 248; s34 "";
 i8 0 *3;
 "#;
-        let output = sack(input, Endian::Big)?;
+        let output = sack(input, None, Endian::Big)?;
         HexView::new(&output).print()?;
         Ok(())
     }
index 407f2ec9a1a149d996a65dc9d0c6bf19438a9d33..4f6e9680a069dfd7ff74e59430d73dac1c616189 100644 (file)
@@ -59,12 +59,12 @@ struct Args {
     le: bool,
 
     /// Input file.
-    #[arg(required = true)]
-    input: PathBuf,
+    #[arg(required = true, name = "input")]
+    input_file_name: PathBuf,
 }
 
 fn main() -> Result<()> {
-    let Args { be, le, input } = Args::parse();
+    let Args { be, le, input_file_name } = Args::parse();
     if stdout().is_terminal() {
         return Err(anyhow!(
             "not writing binary data to a terminal; redirect to a file"
@@ -75,8 +75,9 @@ fn main() -> Result<()> {
         (false, true) => Endian::Little,
         (true, true) => return Err(anyhow!("can't use both `--be` and `--le`")),
     };
-    let input = read_to_string(&input)?;
-    let output = sack(&input, endian)?;
-    stdout().write(&output)?;
+    let input = read_to_string(&input_file_name)?;
+    let input_file_name = input_file_name.to_string_lossy();
+    let output = sack(&input, Some(&input_file_name), endian)?;
+    stdout().write_all(&output)?;
     Ok(())
 }