6be81d8aff1c285473660c7ce75745243102e12f
[pspp] / rust / src / sack.rs
1 use anyhow::{anyhow, Result};
2 use float_next_after::NextAfter;
3 use std::{iter::Peekable, str::Chars};
4
5 use crate::endian::Endian;
6
7 pub fn sack(input: &str, endian: Endian) -> Result<Vec<u8>> {
8     let lexer = Lexer::new(input, endian)?;
9     //let mut output = Vec::new();
10     Ok(Vec::new())
11 }
12
13 enum Token {
14     Integer(i64),
15     Float(f64),
16     PcSysmis,
17     String(String),
18     Semicolon,
19     Asterisk,
20     LParen,
21     RParen,
22     I8,
23     I16,
24     I64,
25     S(usize),
26     Count,
27     Count8,
28     Hex,
29     Label(String),
30     At(String),
31     Minus,
32     Plus,
33 }
34
35 struct Lexer<'a> {
36     iter: Peekable<Chars<'a>>,
37     token: Option<Token>,
38     line_number: usize,
39     endian: Endian,
40 }
41
42 impl<'a> Lexer<'a> {
43     fn new(input: &'a str, endian: Endian) -> Result<Lexer<'a>> {
44         let mut lexer = Lexer {
45             iter: input.chars().peekable(),
46             token: None,
47             line_number: 1,
48             endian,
49         };
50         lexer.next()?;
51         Ok(lexer)
52     }
53     fn get(&'a mut self) -> Result<Option<&'a Token>> {
54         if self.token.is_none() {
55             Err(anyhow!("unexpected end of input"))
56         } else {
57             self.token = self.next()?;
58             Ok((&self.token).into())
59         }
60     }
61
62     fn next(&mut self) -> Result<Option<Token>> {
63         // Get the first character of the token, skipping past white space and
64         // comments.
65         let c = loop {
66             let Some(c) = self.iter.next() else {
67                 return Ok(None);
68             };
69             let c = if c == '#' {
70                 loop {
71                     match self.iter.next() {
72                         None => return Ok(None),
73                         Some('\n') => break,
74                         _ => (),
75                     }
76                 }
77                 '\n'
78             } else {
79                 c
80             };
81             if c == '\n' {
82                 self.line_number += 1
83             } else if !c.is_whitespace() && c != '<' && c != '>' {
84                 break c;
85             }
86         };
87
88         let token = match c {
89             c if c.is_ascii_digit() || c == '-' => {
90                 let mut s = String::from(c);
91                 while let Some(c) = self
92                     .iter
93                     .next_if(|&c| c.is_ascii_digit() || c.is_alphabetic() || c == '.')
94                 {
95                     s.push(c);
96                 }
97
98                 if s == "-" {
99                     Token::Minus
100                 } else if !s.contains('.') {
101                     Token::Integer(
102                         s.parse()
103                             .map_err(|msg| anyhow!("bad integer literal '{s}' ({msg})"))?,
104                     )
105                 } else {
106                     Token::Float(
107                         s.parse()
108                             .map_err(|msg| anyhow!("bad float literal '{s}' ({msg})"))?,
109                     )
110                 }
111             }
112             '"' => {
113                 let mut s = String::from(c);
114                 loop {
115                     match self.iter.next() {
116                         None => return Err(anyhow!("end-of-file inside string")),
117                         Some('\n') => return Err(anyhow!("new-line inside string")),
118                         Some('"') => break,
119                         Some(c) => s.push(c),
120                     }
121                 }
122                 Token::String(s)
123             }
124             ';' => Token::Semicolon,
125             '*' => Token::Asterisk,
126             '+' => Token::Plus,
127             '(' => Token::LParen,
128             ')' => Token::RParen,
129             c if c.is_alphabetic() || c == '@' || c == '_' => {
130                 let mut s = String::from(c);
131                 while let Some(c) = self
132                     .iter
133                     .next_if(|&c| c.is_ascii_digit() || c.is_alphabetic() || c == '.' || c == '_')
134                 {
135                     s.push(c);
136                 }
137                 if self.iter.next_if_eq(&':').is_some() {
138                     Token::Label(s)
139                 } else if s.starts_with('@') {
140                     Token::At(s)
141                 } else if let Some(count) = s.strip_prefix('s') {
142                     Token::S(
143                         count
144                             .parse()
145                             .map_err(|msg| anyhow!("bad counted string '{s}' ({msg})"))?,
146                     )
147                 } else {
148                     match &s[..] {
149                         "i8" => Token::I8,
150                         "i16" => Token::I16,
151                         "i64" => Token::I64,
152                         "SYSMIS" => Token::Float(-f64::MAX),
153                         "PCSYSMIS" => Token::PcSysmis,
154                         "LOWEST" => Token::Float((-f64::MAX).next_after(0.0)),
155                         "HIGHEST" => Token::Float(f64::MAX),
156                         "ENDIAN" => Token::Integer(if self.endian == Endian::Big { 1 } else { 2 }),
157                         "COUNT" => Token::Count,
158                         "COUNT8" => Token::Count8,
159                         "hex" => Token::Hex,
160                         _ => return Err(anyhow!("invalid token '{s}'")),
161                     }
162                 }
163             }
164             _ => return Err(anyhow!("invalid input byte '{c}'")),
165         };
166         Ok(Some(token))
167     }
168 }