-#![allow(unused_variables)]
use endian::{Endian, Parse, ToBytes};
use flate2::read::ZlibDecoder;
use num::Integer;
use thiserror::Error;
pub mod endian;
+pub mod sack;
#[derive(Error, Debug)]
pub enum Error {
}
trait State {
+ #[allow(clippy::type_complexity)]
fn read(self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error>;
}
R: Read + Seek,
{
fn seek(&mut self, pos: SeekFrom) -> Result<u64, IoError> {
- unimplemented!();
+ self.reader.as_mut().unwrap().get_mut().seek(pos)
}
}
match self.state.take()?.read() {
Ok(Some((record, next_state))) => {
self.state = Some(next_state);
- return Some(Ok(record));
+ Some(Ok(record))
}
- Ok(None) => return None,
- Err(error) => return Some(Err(error)),
+ Ok(None) => None,
+ Err(error) => Some(Err(error)),
}
}
}
--- /dev/null
+use anyhow::{anyhow, Result};
+use float_next_after::NextAfter;
+use std::{iter::Peekable, str::Chars};
+
+use crate::endian::Endian;
+
+pub fn sack(input: &str, endian: Endian) -> Result<Vec<u8>> {
+ let lexer = Lexer::new(input, endian)?;
+ //let mut output = Vec::new();
+ Ok(Vec::new())
+}
+
+enum Token {
+ Integer(i64),
+ Float(f64),
+ PcSysmis,
+ String(String),
+ Semicolon,
+ Asterisk,
+ LParen,
+ RParen,
+ I8,
+ I16,
+ I64,
+ S(usize),
+ Count,
+ Count8,
+ Hex,
+ Label(String),
+ At(String),
+ Minus,
+ Plus,
+}
+
+struct Lexer<'a> {
+ iter: Peekable<Chars<'a>>,
+ token: Option<Token>,
+ line_number: usize,
+ endian: Endian,
+}
+
+impl<'a> Lexer<'a> {
+ fn new(input: &'a str, endian: Endian) -> Result<Lexer<'a>> {
+ let mut lexer = Lexer {
+ iter: input.chars().peekable(),
+ token: None,
+ line_number: 1,
+ endian,
+ };
+ lexer.next()?;
+ Ok(lexer)
+ }
+ fn get(&'a mut self) -> Result<Option<&'a Token>> {
+ if self.token.is_none() {
+ Err(anyhow!("unexpected end of input"))
+ } else {
+ self.token = self.next()?;
+ Ok((&self.token).into())
+ }
+ }
+
+ fn next(&mut self) -> Result<Option<Token>> {
+ // Get the first character of the token, skipping past white space and
+ // comments.
+ let c = loop {
+ let Some(c) = self.iter.next() else {
+ return Ok(None);
+ };
+ let c = if c == '#' {
+ loop {
+ match self.iter.next() {
+ None => return Ok(None),
+ Some('\n') => break,
+ _ => (),
+ }
+ }
+ '\n'
+ } else {
+ c
+ };
+ if c == '\n' {
+ self.line_number += 1
+ } else if !c.is_whitespace() && c != '<' && c != '>' {
+ break c;
+ }
+ };
+
+ let token = match c {
+ c if c.is_ascii_digit() || c == '-' => {
+ let mut s = String::from(c);
+ while let Some(c) = self
+ .iter
+ .next_if(|&c| c.is_ascii_digit() || c.is_alphabetic() || c == '.')
+ {
+ s.push(c);
+ }
+
+ if s == "-" {
+ Token::Minus
+ } else if !s.contains('.') {
+ Token::Integer(
+ s.parse()
+ .map_err(|msg| anyhow!("bad integer literal '{s}' ({msg})"))?,
+ )
+ } else {
+ Token::Float(
+ s.parse()
+ .map_err(|msg| anyhow!("bad float literal '{s}' ({msg})"))?,
+ )
+ }
+ }
+ '"' => {
+ let mut s = String::from(c);
+ loop {
+ match self.iter.next() {
+ None => return Err(anyhow!("end-of-file inside string")),
+ Some('\n') => return Err(anyhow!("new-line inside string")),
+ Some('"') => break,
+ Some(c) => s.push(c),
+ }
+ }
+ Token::String(s)
+ }
+ ';' => Token::Semicolon,
+ '*' => Token::Asterisk,
+ '+' => Token::Plus,
+ '(' => Token::LParen,
+ ')' => Token::RParen,
+ c if c.is_alphabetic() || c == '@' || c == '_' => {
+ let mut s = String::from(c);
+ while let Some(c) = self
+ .iter
+ .next_if(|&c| c.is_ascii_digit() || c.is_alphabetic() || c == '.' || c == '_')
+ {
+ s.push(c);
+ }
+ if self.iter.next_if_eq(&':').is_some() {
+ Token::Label(s)
+ } else if s.starts_with('@') {
+ Token::At(s)
+ } else if let Some(count) = s.strip_prefix('s') {
+ Token::S(
+ count
+ .parse()
+ .map_err(|msg| anyhow!("bad counted string '{s}' ({msg})"))?,
+ )
+ } else {
+ match &s[..] {
+ "i8" => Token::I8,
+ "i16" => Token::I16,
+ "i64" => Token::I64,
+ "SYSMIS" => Token::Float(-f64::MAX),
+ "PCSYSMIS" => Token::PcSysmis,
+ "LOWEST" => Token::Float((-f64::MAX).next_after(0.0)),
+ "HIGHEST" => Token::Float(f64::MAX),
+ "ENDIAN" => Token::Integer(if self.endian == Endian::Big { 1 } else { 2 }),
+ "COUNT" => Token::Count,
+ "COUNT8" => Token::Count8,
+ "hex" => Token::Hex,
+ _ => return Err(anyhow!("invalid token '{s}'")),
+ }
+ }
+ }
+ _ => return Err(anyhow!("invalid input byte '{c}'")),
+ };
+ Ok(Some(token))
+ }
+}