use super::{
segment::{Mode, Segment, Segmenter},
- token::{Punct, Token, TokenError},
+ token::{MacroToken, Punct, Token},
};
use std::collections::VecDeque;
+use thiserror::Error as ThisError;
-/// Attempts to merge a sequence of tokens together into a single token. The
-/// tokens are taken from the beginning of `input`. If successful, removes one
-/// or more token from the beginning of `input` and returnss the merged
-/// token. More input tokens might be needed; if so, leaves `input` alone and
-/// returns `None`. In the latter case, the caller should add more tokens to the
-/// input ([Token::End] or [Token::Punct(Punct::EndCmd)] is always sufficient).
-///
-/// This performs two different kinds of token merging:
-///
-/// - String concatenation, where syntax like `"a" + "b"` is converted into a
-/// single string token. This is definitely needed because the parser relies
-/// on it.
-///
-/// - Negative number merging, where syntax like `-5` is converted from a pair
-/// of tokens (a dash and a positive number) into a single token (a negative
-/// number). This might not be needed anymore because the segmenter
-/// directly treats a dash followed by a number, with optional intervening
-/// white space, as a negative number. It's only needed if we want
-/// intervening comments to be allowed or for part of the negative number
-/// token to be produced by macro expansion.
-pub fn merge_tokens(input: &mut VecDeque<Token>) -> Option<Token> {
- match input.get(0)? {
- Token::Punct(Punct::Dash) => match input.get(1)? {
- Token::Number(number) if number.is_sign_positive() => {
- let number = *number;
- input.pop_front().unwrap();
- input.pop_front().unwrap();
- return Some(Token::Number(-number));
+#[derive(ThisError, Clone, Debug, PartialEq, Eq)]
+pub enum ScanError {
+ /// Unterminated string constant.
+ #[error("Unterminated string constant.")]
+ ExpectedQuote,
+
+ /// Missing exponent.
+ #[error("Missing exponent following `{0}`")]
+ ExpectedExponent(String),
+
+ /// Odd length hex string.
+ #[error("String of hex digits has {0} characters, which is not a multiple of 2.")]
+ OddLengthHexString(usize),
+
+ /// Invalid hex digit.
+ #[error("Invalid hex digit {0:?}.")]
+ BadHexDigit(char),
+
+ /// Invalid length Unicode string.
+ #[error("Unicode string contains {0} bytes, which is not in the valid range of 1 to 8 bytes.")]
+ BadLengthUnicodeString(usize),
+
+ /// Invalid code point.
+ #[error("U+{0:04X} is not a valid Unicode code point.")]
+ BadCodePoint(u32),
+
+ /// Expected hexadecimal Unicode code point
+ #[error("Expected hexadecimal Unicode code point.")]
+ ExpectedCodePoint,
+
+ /// `DO REPEAT` nested too deeply.
+ #[error("`DO REPEAT` nested too deeply.")]
+ DoRepeatOverflow,
+
+ /// Unexpected character.
+ #[error("Unexpected character {0:?} in input.")]
+ UnexpectedChar(char),
+}
+
+#[derive(Clone, Debug, PartialEq)]
+pub enum ScanToken {
+ Token(Token),
+ Error(ScanError),
+}
+
+impl ScanToken {
+ pub fn from_segment(s: &str, segment: Segment) -> Option<Self> {
+ match segment {
+ Segment::Number => Some(Self::Token(Token::Number(s.parse().unwrap()))),
+ Segment::QuotedString => {
+ // Trim quote mark from front and back.
+ let mut chars = s.chars();
+ let quote = chars.next().unwrap();
+ let s = chars.as_str().strip_suffix(quote).unwrap();
+
+ // Replace doubled quotes by single ones.
+ let (single_quote, double_quote) = match quote {
+ '\'' => ("'", "''"),
+ '"' => ("\"", "\"\""),
+ _ => unreachable!(),
+ };
+ Some(Self::Token(Token::String(
+ s.replace(double_quote, single_quote),
+ )))
}
- _ => Some(input.pop_front().unwrap()),
- },
- Token::String(_) => {
- let mut i = 0;
- while matches!(input.get(i * 2 + 1)?, Token::Punct(Punct::Plus))
- && matches!(input.get(i * 2 + 2)?, Token::String(_))
- {
- i += 1;
+ Segment::HexString => {
+ // Strip `X"` prefix and `"` suffix (or variations).
+ let s = &s[2..s.len() - 1];
+ for c in s.chars() {
+ if !c.is_ascii_hexdigit() {
+ return Some(Self::Error(ScanError::BadHexDigit(c)));
+ }
+ }
+ if s.len() % 2 != 0 {
+ return Some(Self::Error(ScanError::OddLengthHexString(s.len())));
+ }
+ let mut out = String::with_capacity(s.len());
+ for pair in s.as_bytes().chunks_exact(2) {
+ let hi = char::from(pair[0]).to_digit(16).unwrap() as u8;
+ let lo = char::from(pair[1]).to_digit(16).unwrap() as u8;
+ out.push(char::from(hi * 16 + lo));
+ }
+ Some(Self::Token(Token::String(out)))
+ }
+ Segment::UnicodeString => {
+ // Strip `U"` prefix and `"` suffix (or variations).
+ let s = &s[2..s.len() - 1];
+ if !(1..=8).contains(&s.len()) {
+ return Some(Self::Error(ScanError::BadLengthUnicodeString(s.len())));
+ }
+ let Ok(code_point) = u32::from_str_radix(s, 16) else {
+ return Some(Self::Error(ScanError::ExpectedCodePoint));
+ };
+ let Some(c) = char::from_u32(code_point) else {
+ return Some(Self::Error(ScanError::BadCodePoint(code_point)));
+ };
+ Some(Self::Token(Token::String(String::from(c))))
}
- if i == 0 {
- Some(input.pop_front().unwrap())
- } else {
- let mut output = String::new();
- for i in 0..=i {
- let Token::String(s) = &input[i * 2] else {
- unreachable!()
- };
- output.push_str(&s);
+
+ Segment::UnquotedString
+ | Segment::DoRepeatCommand
+ | Segment::InlineData
+ | Segment::Document
+ | Segment::MacroBody
+ | Segment::MacroName => Some(Self::Token(Token::String(String::from(s)))),
+
+ Segment::ReservedWord => {
+ let c0 = s.as_bytes()[0].to_ascii_uppercase();
+ let c1 = s.as_bytes()[1].to_ascii_uppercase();
+ match (c0, c1) {
+ (b'B', _) => Some(Self::Token(Token::Punct(Punct::By))),
+ (b'E', _) => Some(Self::Token(Token::Punct(Punct::Eq))),
+ (b'G', b'T') => Some(Self::Token(Token::Punct(Punct::Gt))),
+ (b'G', _) => Some(Self::Token(Token::Punct(Punct::Ge))),
+ (b'L', b'T') => Some(Self::Token(Token::Punct(Punct::Lt))),
+ (b'L', _) => Some(Self::Token(Token::Punct(Punct::Le))),
+ (b'N', b'E') => Some(Self::Token(Token::Punct(Punct::Ne))),
+ (b'N', _) => Some(Self::Token(Token::Punct(Punct::Not))),
+ (b'O', _) => Some(Self::Token(Token::Punct(Punct::Or))),
+ (b'T', _) => Some(Self::Token(Token::Punct(Punct::To))),
+ (b'A', b'L') => Some(Self::Token(Token::Punct(Punct::All))),
+ (b'A', _) => Some(Self::Token(Token::Punct(Punct::And))),
+ (b'W', _) => Some(Self::Token(Token::Punct(Punct::With))),
+ _ => unreachable!(),
}
- for _ in 0..i * 2 + 1 {
+ }
+ Segment::Identifier => Some(Self::Token(Token::Id(String::from(s)))),
+ Segment::Punct => match s {
+ "(" => Some(Self::Token(Token::Punct(Punct::LParen))),
+ ")" => Some(Self::Token(Token::Punct(Punct::RParen))),
+ "[" => Some(Self::Token(Token::Punct(Punct::LSquare))),
+ "]" => Some(Self::Token(Token::Punct(Punct::RSquare))),
+ "{" => Some(Self::Token(Token::Punct(Punct::LCurly))),
+ "}" => Some(Self::Token(Token::Punct(Punct::RCurly))),
+ "," => Some(Self::Token(Token::Punct(Punct::Comma))),
+ "=" => Some(Self::Token(Token::Punct(Punct::Equals))),
+ "-" => Some(Self::Token(Token::Punct(Punct::Dash))),
+ "&" => Some(Self::Token(Token::Punct(Punct::And))),
+ "|" => Some(Self::Token(Token::Punct(Punct::Or))),
+ "+" => Some(Self::Token(Token::Punct(Punct::Plus))),
+ "/" => Some(Self::Token(Token::Punct(Punct::Slash))),
+ "*" => Some(Self::Token(Token::Punct(Punct::Asterisk))),
+ "<" => Some(Self::Token(Token::Punct(Punct::Lt))),
+ ">" => Some(Self::Token(Token::Punct(Punct::Gt))),
+ "~" => Some(Self::Token(Token::Punct(Punct::Not))),
+ ":" => Some(Self::Token(Token::Punct(Punct::Colon))),
+ ";" => Some(Self::Token(Token::Punct(Punct::Semicolon))),
+ "**" => Some(Self::Token(Token::Punct(Punct::Exp))),
+ "<=" => Some(Self::Token(Token::Punct(Punct::Le))),
+ "<>" => Some(Self::Token(Token::Punct(Punct::Ne))),
+ "~=" => Some(Self::Token(Token::Punct(Punct::Ne))),
+ ">=" => Some(Self::Token(Token::Punct(Punct::Ge))),
+ "!" => Some(Self::Token(Token::MacroToken(MacroToken::Bang))),
+ "%" => Some(Self::Token(Token::MacroToken(MacroToken::Percent))),
+ "?" => Some(Self::Token(Token::MacroToken(MacroToken::Question))),
+ "`" => Some(Self::Token(Token::MacroToken(MacroToken::Backtick))),
+ "_" => Some(Self::Token(Token::MacroToken(MacroToken::Underscore))),
+ "." => Some(Self::Token(Token::MacroToken(MacroToken::Dot))),
+ _ => unreachable!("bad punctuator {s:?}"),
+ },
+ Segment::Shbang
+ | Segment::Spaces
+ | Segment::Comment
+ | Segment::Newline
+ | Segment::CommentCommand => None,
+ Segment::DoRepeatOverflow => Some(Self::Error(ScanError::DoRepeatOverflow)),
+ Segment::MacroId => Some(Self::Token(Token::MacroToken(MacroToken::MacroId(
+ String::from(s),
+ )))),
+ Segment::StartDocument => Some(Self::Token(Token::Id(String::from("DOCUMENT")))),
+ Segment::StartCommand | Segment::SeparateCommands | Segment::EndCommand => {
+ Some(Self::Token(Token::EndCommand))
+ }
+ Segment::End => Some(Self::Token(Token::End)),
+ Segment::ExpectedQuote => Some(Self::Error(ScanError::ExpectedQuote)),
+ Segment::ExpectedExponent => {
+ Some(Self::Error(ScanError::ExpectedExponent(String::from(s))))
+ }
+ Segment::UnexpectedChar => Some(Self::Error(ScanError::UnexpectedChar(
+ s.chars().next().unwrap(),
+ ))),
+ }
+ }
+
+ /// Attempts to merge a sequence of tokens together into a single token. The
+ /// tokens are taken from the beginning of `input`. If successful, removes one
+ /// or more token from the beginning of `input` and returnss the merged
+ /// token. More input tokens might be needed; if so, leaves `input` alone and
+ /// returns `None`. In the latter case, the caller should add more tokens to the
+ /// input ([Token::End] or [Token::Punct(Punct::EndCmd)] is always sufficient).
+ ///
+ /// This performs two different kinds of token merging:
+ ///
+ /// - String concatenation, where syntax like `"a" + "b"` is converted into a
+ /// single string token. This is definitely needed because the parser relies
+ /// on it.
+ ///
+ /// - Negative number merging, where syntax like `-5` is converted from a pair
+ /// of tokens (a dash and a positive number) into a single token (a negative
+ /// number). This might not be needed anymore because the segmenter
+ /// directly treats a dash followed by a number, with optional intervening
+ /// white space, as a negative number. It's only needed if we want
+ /// intervening comments to be allowed or for part of the negative number
+ /// token to be produced by macro expansion.
+ pub fn merge(input: &mut VecDeque<ScanToken>) -> Option<ScanToken> {
+ match input.get(0)? {
+ ScanToken::Token(Token::Punct(Punct::Dash)) => match input.get(1)? {
+ ScanToken::Token(Token::Number(number)) if number.is_sign_positive() => {
+ let number = *number;
+ input.pop_front().unwrap();
input.pop_front().unwrap();
+ return Some(ScanToken::Token(Token::Number(-number)));
+ }
+ _ => Some(input.pop_front().unwrap()),
+ },
+ ScanToken::Token(Token::String(_)) => {
+ let mut i = 0;
+ while matches!(
+ input.get(i * 2 + 1)?,
+ ScanToken::Token(Token::Punct(Punct::Plus))
+ ) && matches!(input.get(i * 2 + 2)?, ScanToken::Token(Token::String(_)))
+ {
+ i += 1;
+ }
+ if i == 0 {
+ Some(input.pop_front().unwrap())
+ } else {
+ let mut output = String::new();
+ for i in 0..=i {
+ let ScanToken::Token(Token::String(s)) = &input[i * 2] else {
+ unreachable!()
+ };
+ output.push_str(&s);
+ }
+ for _ in 0..i * 2 + 1 {
+ input.pop_front().unwrap();
+ }
+ Some(ScanToken::Token(Token::String(output)))
}
- Some(Token::String(output))
}
+ _ => Some(input.pop_front().unwrap()),
}
- _ => Some(input.pop_front().unwrap()),
}
}
pub struct StringLexer<'a> {
input: &'a str,
segmenter: Segmenter,
- tokens: VecDeque<Token>,
+ tokens: VecDeque<ScanToken>,
}
impl<'a> StringLexer<'a> {
}
impl<'a> Iterator for StringLexer<'a> {
- type Item = Result<Token, TokenError>;
+ type Item = ScanToken;
fn next(&mut self) -> Option<Self::Item> {
- if let Some(token) = merge_tokens(&mut self.tokens) {
- return Some(Ok(token));
+ if let Some(token) = ScanToken::merge(&mut self.tokens) {
+ return Some(token);
}
loop {
let (rest, segment) = self.segmenter.push(self.input, true).unwrap();
}
let s = &self.input[..self.input.len() - rest.len()];
self.input = rest;
- match Token::try_from_segment(s, segment) {
- Err(error) => {
- println!("{:?}", &self.tokens);
- return Some(Err(error));
- }
- Ok(Some(token)) => {
- self.tokens.push_back(token);
- if let Some(token) = merge_tokens(&mut self.tokens) {
- return Some(Ok(token));
- }
+
+ if let Some(token) = ScanToken::from_segment(s, segment) {
+ self.tokens.push_back(token);
+ if let Some(token) = ScanToken::merge(&mut self.tokens) {
+ return Some(token);
}
- Ok(None) => (),
- };
+ }
}
}
}
use crate::lex::{
segment::Mode,
- token::{MacroToken, Punct, Token, TokenError},
+ token::{MacroToken, Punct, Token},
};
-use super::StringLexer;
+use super::{ScanError, ScanToken, StringLexer};
fn print_token(token: &Token) {
match token {
Token::String(s) => print!("Token::String(String::from({s:?}))"),
Token::EndCommand => print!("Token::EndCommand"),
Token::Punct(punct) => print!("Token::Punct(Punct::{punct:?})"),
+ Token::MacroToken(MacroToken::MacroId(id)) => {
+ print!("Token::MacroToken(MacroToken::MacroId(String::from({id:?})))")
+ }
Token::MacroToken(m) => print!("Token::MacroToken(MacroToken::{m:?})"),
}
}
-fn check_scan(input: &str, expected: &[Result<Token, TokenError>]) {
- let tokens = StringLexer::new(input, Mode::Auto, false).collect::<Vec<_>>();
+fn check_scan(input: &str, mode: Mode, expected: &[ScanToken]) {
+ let tokens = StringLexer::new(input, mode, false).collect::<Vec<_>>();
if &tokens != expected {
for token in &tokens {
match token {
- Ok(token) => {
- print!("Ok(");
+ ScanToken::Token(token) => {
+ print!("ScanToken::Token(");
print_token(token);
print!(")");
}
- Err(error) => print!("Err(TokenError::{error:?})"),
+ ScanToken::Error(error) => print!("ScanToken::Error(ScanError::{error:?})"),
}
println!(",");
}
WXYZ. /* unterminated end of line comment
�. /* U+FFFD is not valid in an identifier
"#,
+ Mode::Auto,
&[
- Ok(Token::Id(String::from("a"))),
- Ok(Token::Id(String::from("aB"))),
- Ok(Token::Id(String::from("i5"))),
- Ok(Token::Id(String::from("$x"))),
- Ok(Token::Id(String::from("@efg"))),
- Ok(Token::Id(String::from("@@."))),
- Ok(Token::MacroToken(MacroToken::MacroId(String::from(
+ ScanToken::Token(Token::Id(String::from("a"))),
+ ScanToken::Token(Token::Id(String::from("aB"))),
+ ScanToken::Token(Token::Id(String::from("i5"))),
+ ScanToken::Token(Token::Id(String::from("$x"))),
+ ScanToken::Token(Token::Id(String::from("@efg"))),
+ ScanToken::Token(Token::Id(String::from("@@."))),
+ ScanToken::Token(Token::MacroToken(MacroToken::MacroId(String::from(
"!abcd",
)))),
- Ok(Token::MacroToken(MacroToken::MacroId(String::from("!*")))),
- Ok(Token::MacroToken(MacroToken::MacroId(String::from("!*")))),
- Ok(Token::Id(String::from("a"))),
- Ok(Token::Id(String::from("#.#"))),
- Ok(Token::MacroToken(MacroToken::Dot)),
- Ok(Token::Id(String::from("x"))),
- Ok(Token::MacroToken(MacroToken::Underscore)),
- Ok(Token::Id(String::from("z"))),
- Ok(Token::EndCommand),
- Ok(Token::Id(String::from("abcd."))),
- Ok(Token::Id(String::from("abcd"))),
- Ok(Token::EndCommand),
- Ok(Token::Id(String::from("QRSTUV"))),
- Ok(Token::EndCommand),
- Ok(Token::Id(String::from("QrStUv"))),
- Ok(Token::EndCommand),
- Ok(Token::Id(String::from("WXYZ"))),
- Ok(Token::EndCommand),
- Err(TokenError::UnexpectedChar('�')),
- Ok(Token::EndCommand),
+ ScanToken::Token(Token::MacroToken(MacroToken::MacroId(String::from("!*")))),
+ ScanToken::Token(Token::MacroToken(MacroToken::MacroId(String::from("!*")))),
+ ScanToken::Token(Token::Id(String::from("a"))),
+ ScanToken::Token(Token::Id(String::from("#.#"))),
+ ScanToken::Token(Token::MacroToken(MacroToken::Dot)),
+ ScanToken::Token(Token::Id(String::from("x"))),
+ ScanToken::Token(Token::MacroToken(MacroToken::Underscore)),
+ ScanToken::Token(Token::Id(String::from("z"))),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::Id(String::from("abcd."))),
+ ScanToken::Token(Token::Id(String::from("abcd"))),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::Id(String::from("QRSTUV"))),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::Id(String::from("QrStUv"))),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::Id(String::from("WXYZ"))),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Error(ScanError::UnexpectedChar('�')),
+ ScanToken::Token(Token::EndCommand),
],
);
}
andx orx notx eqx gex gtx lex ltx nex allx byx tox withx
and. with.
"#,
+ Mode::Auto,
&[
- Ok(Token::Punct(Punct::And)),
- Ok(Token::Punct(Punct::Or)),
- Ok(Token::Punct(Punct::Not)),
- Ok(Token::Punct(Punct::Eq)),
- Ok(Token::Punct(Punct::Ge)),
- Ok(Token::Punct(Punct::Gt)),
- Ok(Token::Punct(Punct::Le)),
- Ok(Token::Punct(Punct::Lt)),
- Ok(Token::Punct(Punct::Ne)),
- Ok(Token::Punct(Punct::All)),
- Ok(Token::Punct(Punct::By)),
- Ok(Token::Punct(Punct::To)),
- Ok(Token::Punct(Punct::With)),
- Ok(Token::Punct(Punct::And)),
- Ok(Token::Punct(Punct::Or)),
- Ok(Token::Punct(Punct::Not)),
- Ok(Token::Punct(Punct::Eq)),
- Ok(Token::Punct(Punct::Ge)),
- Ok(Token::Punct(Punct::Gt)),
- Ok(Token::Punct(Punct::Le)),
- Ok(Token::Punct(Punct::Lt)),
- Ok(Token::Punct(Punct::Ne)),
- Ok(Token::Punct(Punct::All)),
- Ok(Token::Punct(Punct::By)),
- Ok(Token::Punct(Punct::To)),
- Ok(Token::Punct(Punct::With)),
- Ok(Token::Id(String::from("andx"))),
- Ok(Token::Id(String::from("orx"))),
- Ok(Token::Id(String::from("notx"))),
- Ok(Token::Id(String::from("eqx"))),
- Ok(Token::Id(String::from("gex"))),
- Ok(Token::Id(String::from("gtx"))),
- Ok(Token::Id(String::from("lex"))),
- Ok(Token::Id(String::from("ltx"))),
- Ok(Token::Id(String::from("nex"))),
- Ok(Token::Id(String::from("allx"))),
- Ok(Token::Id(String::from("byx"))),
- Ok(Token::Id(String::from("tox"))),
- Ok(Token::Id(String::from("withx"))),
- Ok(Token::Id(String::from("and."))),
- Ok(Token::Punct(Punct::With)),
- Ok(Token::EndCommand),
+ ScanToken::Token(Token::Punct(Punct::And)),
+ ScanToken::Token(Token::Punct(Punct::Or)),
+ ScanToken::Token(Token::Punct(Punct::Not)),
+ ScanToken::Token(Token::Punct(Punct::Eq)),
+ ScanToken::Token(Token::Punct(Punct::Ge)),
+ ScanToken::Token(Token::Punct(Punct::Gt)),
+ ScanToken::Token(Token::Punct(Punct::Le)),
+ ScanToken::Token(Token::Punct(Punct::Lt)),
+ ScanToken::Token(Token::Punct(Punct::Ne)),
+ ScanToken::Token(Token::Punct(Punct::All)),
+ ScanToken::Token(Token::Punct(Punct::By)),
+ ScanToken::Token(Token::Punct(Punct::To)),
+ ScanToken::Token(Token::Punct(Punct::With)),
+ ScanToken::Token(Token::Punct(Punct::And)),
+ ScanToken::Token(Token::Punct(Punct::Or)),
+ ScanToken::Token(Token::Punct(Punct::Not)),
+ ScanToken::Token(Token::Punct(Punct::Eq)),
+ ScanToken::Token(Token::Punct(Punct::Ge)),
+ ScanToken::Token(Token::Punct(Punct::Gt)),
+ ScanToken::Token(Token::Punct(Punct::Le)),
+ ScanToken::Token(Token::Punct(Punct::Lt)),
+ ScanToken::Token(Token::Punct(Punct::Ne)),
+ ScanToken::Token(Token::Punct(Punct::All)),
+ ScanToken::Token(Token::Punct(Punct::By)),
+ ScanToken::Token(Token::Punct(Punct::To)),
+ ScanToken::Token(Token::Punct(Punct::With)),
+ ScanToken::Token(Token::Id(String::from("andx"))),
+ ScanToken::Token(Token::Id(String::from("orx"))),
+ ScanToken::Token(Token::Id(String::from("notx"))),
+ ScanToken::Token(Token::Id(String::from("eqx"))),
+ ScanToken::Token(Token::Id(String::from("gex"))),
+ ScanToken::Token(Token::Id(String::from("gtx"))),
+ ScanToken::Token(Token::Id(String::from("lex"))),
+ ScanToken::Token(Token::Id(String::from("ltx"))),
+ ScanToken::Token(Token::Id(String::from("nex"))),
+ ScanToken::Token(Token::Id(String::from("allx"))),
+ ScanToken::Token(Token::Id(String::from("byx"))),
+ ScanToken::Token(Token::Id(String::from("tox"))),
+ ScanToken::Token(Token::Id(String::from("withx"))),
+ ScanToken::Token(Token::Id(String::from("and."))),
+ ScanToken::Token(Token::Punct(Punct::With)),
+ ScanToken::Token(Token::EndCommand),
],
);
}
~&|=>=><=<~=<>(),-+*/[]**
% : ; ? _ ` { } ~
"#,
+ Mode::Auto,
&[
- Ok(Token::Punct(Punct::Not)),
- Ok(Token::Punct(Punct::And)),
- Ok(Token::Punct(Punct::Or)),
- Ok(Token::Punct(Punct::Equals)),
- Ok(Token::Punct(Punct::Ge)),
- Ok(Token::Punct(Punct::Gt)),
- Ok(Token::Punct(Punct::Le)),
- Ok(Token::Punct(Punct::Lt)),
- Ok(Token::Punct(Punct::Ne)),
- Ok(Token::Punct(Punct::Ne)),
- Ok(Token::Punct(Punct::LParen)),
- Ok(Token::Punct(Punct::RParen)),
- Ok(Token::Punct(Punct::Comma)),
- Ok(Token::Punct(Punct::Dash)),
- Ok(Token::Punct(Punct::Plus)),
- Ok(Token::Punct(Punct::Asterisk)),
- Ok(Token::Punct(Punct::Slash)),
- Ok(Token::Punct(Punct::LSquare)),
- Ok(Token::Punct(Punct::RSquare)),
- Ok(Token::Punct(Punct::Exp)),
- Ok(Token::Punct(Punct::Not)),
- Ok(Token::Punct(Punct::And)),
- Ok(Token::Punct(Punct::Or)),
- Ok(Token::Punct(Punct::Equals)),
- Ok(Token::Punct(Punct::Ge)),
- Ok(Token::Punct(Punct::Gt)),
- Ok(Token::Punct(Punct::Le)),
- Ok(Token::Punct(Punct::Lt)),
- Ok(Token::Punct(Punct::Ne)),
- Ok(Token::Punct(Punct::Ne)),
- Ok(Token::Punct(Punct::LParen)),
- Ok(Token::Punct(Punct::RParen)),
- Ok(Token::Punct(Punct::Comma)),
- Ok(Token::Punct(Punct::Dash)),
- Ok(Token::Punct(Punct::Plus)),
- Ok(Token::Punct(Punct::Asterisk)),
- Ok(Token::Punct(Punct::Slash)),
- Ok(Token::Punct(Punct::LSquare)),
- Ok(Token::Punct(Punct::RSquare)),
- Ok(Token::Punct(Punct::Exp)),
- Ok(Token::MacroToken(MacroToken::Percent)),
- Ok(Token::Punct(Punct::Colon)),
- Ok(Token::Punct(Punct::Semicolon)),
- Ok(Token::MacroToken(MacroToken::Question)),
- Ok(Token::MacroToken(MacroToken::Underscore)),
- Ok(Token::MacroToken(MacroToken::Backtick)),
- Ok(Token::Punct(Punct::LCurly)),
- Ok(Token::Punct(Punct::RCurly)),
- Ok(Token::Punct(Punct::Not)),
+ ScanToken::Token(Token::Punct(Punct::Not)),
+ ScanToken::Token(Token::Punct(Punct::And)),
+ ScanToken::Token(Token::Punct(Punct::Or)),
+ ScanToken::Token(Token::Punct(Punct::Equals)),
+ ScanToken::Token(Token::Punct(Punct::Ge)),
+ ScanToken::Token(Token::Punct(Punct::Gt)),
+ ScanToken::Token(Token::Punct(Punct::Le)),
+ ScanToken::Token(Token::Punct(Punct::Lt)),
+ ScanToken::Token(Token::Punct(Punct::Ne)),
+ ScanToken::Token(Token::Punct(Punct::Ne)),
+ ScanToken::Token(Token::Punct(Punct::LParen)),
+ ScanToken::Token(Token::Punct(Punct::RParen)),
+ ScanToken::Token(Token::Punct(Punct::Comma)),
+ ScanToken::Token(Token::Punct(Punct::Dash)),
+ ScanToken::Token(Token::Punct(Punct::Plus)),
+ ScanToken::Token(Token::Punct(Punct::Asterisk)),
+ ScanToken::Token(Token::Punct(Punct::Slash)),
+ ScanToken::Token(Token::Punct(Punct::LSquare)),
+ ScanToken::Token(Token::Punct(Punct::RSquare)),
+ ScanToken::Token(Token::Punct(Punct::Exp)),
+ ScanToken::Token(Token::Punct(Punct::Not)),
+ ScanToken::Token(Token::Punct(Punct::And)),
+ ScanToken::Token(Token::Punct(Punct::Or)),
+ ScanToken::Token(Token::Punct(Punct::Equals)),
+ ScanToken::Token(Token::Punct(Punct::Ge)),
+ ScanToken::Token(Token::Punct(Punct::Gt)),
+ ScanToken::Token(Token::Punct(Punct::Le)),
+ ScanToken::Token(Token::Punct(Punct::Lt)),
+ ScanToken::Token(Token::Punct(Punct::Ne)),
+ ScanToken::Token(Token::Punct(Punct::Ne)),
+ ScanToken::Token(Token::Punct(Punct::LParen)),
+ ScanToken::Token(Token::Punct(Punct::RParen)),
+ ScanToken::Token(Token::Punct(Punct::Comma)),
+ ScanToken::Token(Token::Punct(Punct::Dash)),
+ ScanToken::Token(Token::Punct(Punct::Plus)),
+ ScanToken::Token(Token::Punct(Punct::Asterisk)),
+ ScanToken::Token(Token::Punct(Punct::Slash)),
+ ScanToken::Token(Token::Punct(Punct::LSquare)),
+ ScanToken::Token(Token::Punct(Punct::RSquare)),
+ ScanToken::Token(Token::Punct(Punct::Exp)),
+ ScanToken::Token(Token::MacroToken(MacroToken::Percent)),
+ ScanToken::Token(Token::Punct(Punct::Colon)),
+ ScanToken::Token(Token::Punct(Punct::Semicolon)),
+ ScanToken::Token(Token::MacroToken(MacroToken::Question)),
+ ScanToken::Token(Token::MacroToken(MacroToken::Underscore)),
+ ScanToken::Token(Token::MacroToken(MacroToken::Backtick)),
+ ScanToken::Token(Token::Punct(Punct::LCurly)),
+ ScanToken::Token(Token::Punct(Punct::RCurly)),
+ ScanToken::Token(Token::Punct(Punct::Not)),
],
);
}
1.23e1 45.6E-1 78.9e+1 99.9E+01 11.2e-03
. 1e e1 1e+ 1e-
"#,
+ Mode::Auto,
&[
- Ok(Token::Number(0.0)),
- Ok(Token::Number(1.0)),
- Ok(Token::Number(1.0)),
- Ok(Token::Number(1.0)),
- Ok(Token::Number(1.0)),
- Ok(Token::EndCommand),
- Ok(Token::Number(123.0)),
- Ok(Token::EndCommand),
- Ok(Token::EndCommand),
- Ok(Token::Number(1.0)),
- Ok(Token::Number(0.1)),
- Ok(Token::Number(0.1)),
- Ok(Token::Number(0.1)),
- Ok(Token::Number(50.0)),
- Ok(Token::Number(0.6)),
- Ok(Token::Number(70.0)),
- Ok(Token::Number(60.0)),
- Ok(Token::Number(0.006)),
- Ok(Token::EndCommand),
- Ok(Token::Number(30.0)),
- Ok(Token::Number(0.04)),
- Ok(Token::Number(5.0)),
- Ok(Token::Number(6.0)),
- Ok(Token::Number(0.0007)),
- Ok(Token::Number(12.3)),
- Ok(Token::Number(4.56)),
- Ok(Token::Number(789.0)),
- Ok(Token::Number(999.0)),
- Ok(Token::Number(0.0112)),
- Ok(Token::EndCommand),
- Err(TokenError::ExpectedExponent(String::from("1e"))),
- Ok(Token::Id(String::from("e1"))),
- Err(TokenError::ExpectedExponent(String::from("1e+"))),
- Err(TokenError::ExpectedExponent(String::from("1e-"))),
+ ScanToken::Token(Token::Number(0.0)),
+ ScanToken::Token(Token::Number(1.0)),
+ ScanToken::Token(Token::Number(1.0)),
+ ScanToken::Token(Token::Number(1.0)),
+ ScanToken::Token(Token::Number(1.0)),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::Number(123.0)),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::Number(1.0)),
+ ScanToken::Token(Token::Number(0.1)),
+ ScanToken::Token(Token::Number(0.1)),
+ ScanToken::Token(Token::Number(0.1)),
+ ScanToken::Token(Token::Number(50.0)),
+ ScanToken::Token(Token::Number(0.6)),
+ ScanToken::Token(Token::Number(70.0)),
+ ScanToken::Token(Token::Number(60.0)),
+ ScanToken::Token(Token::Number(0.006)),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::Number(30.0)),
+ ScanToken::Token(Token::Number(0.04)),
+ ScanToken::Token(Token::Number(5.0)),
+ ScanToken::Token(Token::Number(6.0)),
+ ScanToken::Token(Token::Number(0.0007)),
+ ScanToken::Token(Token::Number(12.3)),
+ ScanToken::Token(Token::Number(4.56)),
+ ScanToken::Token(Token::Number(789.0)),
+ ScanToken::Token(Token::Number(999.0)),
+ ScanToken::Token(Token::Number(0.0112)),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Error(ScanError::ExpectedExponent(String::from("1e"))),
+ ScanToken::Token(Token::Id(String::from("e1"))),
+ ScanToken::Error(ScanError::ExpectedExponent(String::from("1e+"))),
+ ScanToken::Error(ScanError::ExpectedExponent(String::from("1e-"))),
],
);
}
-/**/1
-. -1e -e1 -1e+ -1e- -1.
"#,
+ Mode::Auto,
&[
- Ok(Token::Number(-0.0)),
- Ok(Token::Number(-1.0)),
- Ok(Token::Number(-1.0)),
- Ok(Token::Number(-1.0)),
- Ok(Token::Number(-1.0)),
- Ok(Token::EndCommand),
- Ok(Token::Number(-123.0)),
- Ok(Token::EndCommand),
- Ok(Token::Number(-0.1)),
- Ok(Token::Number(-0.1)),
- Ok(Token::Number(-0.1)),
- Ok(Token::Number(-0.1)),
- Ok(Token::Number(-50.0)),
- Ok(Token::Number(-0.6)),
- Ok(Token::Number(-70.0)),
- Ok(Token::Number(-60.0)),
- Ok(Token::Number(-0.006)),
- Ok(Token::Number(-3.0)),
- Ok(Token::Number(-0.04)),
- Ok(Token::Number(-5.0)),
- Ok(Token::Number(-6.0)),
- Ok(Token::Number(-0.0007)),
- Ok(Token::Number(-12.3)),
- Ok(Token::Number(-4.56)),
- Ok(Token::Number(-789.0)),
- Ok(Token::Number(-999.0)),
- Ok(Token::Number(-0.0112)),
- Ok(Token::Number(-1.0)),
- Ok(Token::Punct(Punct::Dash)),
- Ok(Token::MacroToken(MacroToken::Dot)),
- Err(TokenError::ExpectedExponent(String::from("-1e"))),
- Ok(Token::Punct(Punct::Dash)),
- Ok(Token::Id(String::from("e1"))),
- Err(TokenError::ExpectedExponent(String::from("-1e+"))),
- Err(TokenError::ExpectedExponent(String::from("-1e-"))),
- Ok(Token::Number(-1.0)),
- Ok(Token::EndCommand),
+ ScanToken::Token(Token::Number(-0.0)),
+ ScanToken::Token(Token::Number(-1.0)),
+ ScanToken::Token(Token::Number(-1.0)),
+ ScanToken::Token(Token::Number(-1.0)),
+ ScanToken::Token(Token::Number(-1.0)),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::Number(-123.0)),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::Number(-0.1)),
+ ScanToken::Token(Token::Number(-0.1)),
+ ScanToken::Token(Token::Number(-0.1)),
+ ScanToken::Token(Token::Number(-0.1)),
+ ScanToken::Token(Token::Number(-50.0)),
+ ScanToken::Token(Token::Number(-0.6)),
+ ScanToken::Token(Token::Number(-70.0)),
+ ScanToken::Token(Token::Number(-60.0)),
+ ScanToken::Token(Token::Number(-0.006)),
+ ScanToken::Token(Token::Number(-3.0)),
+ ScanToken::Token(Token::Number(-0.04)),
+ ScanToken::Token(Token::Number(-5.0)),
+ ScanToken::Token(Token::Number(-6.0)),
+ ScanToken::Token(Token::Number(-0.0007)),
+ ScanToken::Token(Token::Number(-12.3)),
+ ScanToken::Token(Token::Number(-4.56)),
+ ScanToken::Token(Token::Number(-789.0)),
+ ScanToken::Token(Token::Number(-999.0)),
+ ScanToken::Token(Token::Number(-0.0112)),
+ ScanToken::Token(Token::Number(-1.0)),
+ ScanToken::Token(Token::Punct(Punct::Dash)),
+ ScanToken::Token(Token::MacroToken(MacroToken::Dot)),
+ ScanToken::Error(ScanError::ExpectedExponent(String::from("-1e"))),
+ ScanToken::Token(Token::Punct(Punct::Dash)),
+ ScanToken::Token(Token::Id(String::from("e1"))),
+ ScanToken::Error(ScanError::ExpectedExponent(String::from("-1e+"))),
+ ScanToken::Error(ScanError::ExpectedExponent(String::from("-1e-"))),
+ ScanToken::Token(Token::Number(-1.0)),
+ ScanToken::Token(Token::EndCommand),
],
);
}
-
#[test]
fn test_strings() {
- check_scan(r#"'x' "y" 'abc'
+ check_scan(
+ r#"'x' "y" 'abc'
'Don''t' "Can't" 'Won''t'
"""quoted""" '"quoted"'
'' "" '''' """"
+u'304a'
"�あいうえお"
"abc"+U"FFFD"+u'3048'+"xyz"
-"#, &[]);
+"#,
+ Mode::Auto,
+ &[
+ ScanToken::Token(Token::String(String::from("x"))),
+ ScanToken::Token(Token::String(String::from("y"))),
+ ScanToken::Token(Token::String(String::from("abc"))),
+ ScanToken::Token(Token::String(String::from("Don't"))),
+ ScanToken::Token(Token::String(String::from("Can't"))),
+ ScanToken::Token(Token::String(String::from("Won't"))),
+ ScanToken::Token(Token::String(String::from("\"quoted\""))),
+ ScanToken::Token(Token::String(String::from("\"quoted\""))),
+ ScanToken::Token(Token::String(String::from(""))),
+ ScanToken::Token(Token::String(String::from(""))),
+ ScanToken::Token(Token::String(String::from("'"))),
+ ScanToken::Token(Token::String(String::from("\""))),
+ ScanToken::Error(ScanError::ExpectedQuote),
+ ScanToken::Error(ScanError::ExpectedQuote),
+ ScanToken::Token(Token::String(String::from("xyzabcde"))),
+ ScanToken::Token(Token::String(String::from("foobar"))),
+ ScanToken::Token(Token::String(String::from("foobar"))),
+ ScanToken::Token(Token::String(String::from("foo"))),
+ ScanToken::Token(Token::Punct(Punct::Plus)),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::String(String::from("bar"))),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::Punct(Punct::Plus)),
+ ScanToken::Token(Token::String(String::from("AB5152"))),
+ ScanToken::Token(Token::String(String::from("4142QR"))),
+ ScanToken::Token(Token::String(String::from("ABお"))),
+ ScanToken::Token(Token::String(String::from("�あいうえお"))),
+ ScanToken::Token(Token::String(String::from("abc�えxyz"))),
+ ScanToken::Token(Token::End),
+ ],
+ );
+}
+
+#[test]
+fn test_shbang() {
+ check_scan(
+ r#"#! /usr/bin/pspp
+#! /usr/bin/pspp
+"#,
+ Mode::Auto,
+ &[
+ ScanToken::Token(Token::Id(String::from("#"))),
+ ScanToken::Token(Token::MacroToken(MacroToken::MacroId(String::from("!")))),
+ ScanToken::Token(Token::Punct(Punct::Slash)),
+ ScanToken::Token(Token::Id(String::from("usr"))),
+ ScanToken::Token(Token::Punct(Punct::Slash)),
+ ScanToken::Token(Token::Id(String::from("bin"))),
+ ScanToken::Token(Token::Punct(Punct::Slash)),
+ ScanToken::Token(Token::Id(String::from("pspp"))),
+ ],
+ );
+}
+
+#[test]
+fn test_comments() {
+ check_scan(
+ r#"* Comment commands "don't
+have to contain valid tokens.
+
+** Check ambiguity with ** token.
+****************.
+
+comment keyword works too.
+COMM also.
+com is ambiguous with COMPUTE.
+
+ * Comment need not start at left margin.
+
+* Comment ends with blank line
+
+next command.
+
+"#,
+ Mode::Auto,
+ &[
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::Id(String::from("com"))),
+ ScanToken::Token(Token::Id(String::from("is"))),
+ ScanToken::Token(Token::Id(String::from("ambiguous"))),
+ ScanToken::Token(Token::Punct(Punct::With)),
+ ScanToken::Token(Token::Id(String::from("COMPUTE"))),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::Id(String::from("next"))),
+ ScanToken::Token(Token::Id(String::from("command"))),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::EndCommand),
+ ],
+ );
+}
+
+#[test]
+fn test_document() {
+ check_scan(
+ r#"DOCUMENT one line.
+DOC more
+ than
+ one
+ line.
+docu
+first.paragraph
+isn't parsed as tokens
+
+second paragraph.
+"#,
+ Mode::Auto,
+ &[
+ ScanToken::Token(Token::Id(String::from("DOCUMENT"))),
+ ScanToken::Token(Token::String(String::from("DOCUMENT one line."))),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::Id(String::from("DOCUMENT"))),
+ ScanToken::Token(Token::String(String::from("DOC more"))),
+ ScanToken::Token(Token::String(String::from(" than"))),
+ ScanToken::Token(Token::String(String::from(" one"))),
+ ScanToken::Token(Token::String(String::from(" line."))),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::Id(String::from("DOCUMENT"))),
+ ScanToken::Token(Token::String(String::from("docu"))),
+ ScanToken::Token(Token::String(String::from("first.paragraph"))),
+ ScanToken::Token(Token::String(String::from("isn't parsed as tokens"))),
+ ScanToken::Token(Token::String(String::from(""))),
+ ScanToken::Token(Token::String(String::from("second paragraph."))),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::EndCommand),
+ ],
+ );
+}
+
+#[test]
+fn test_file_label() {
+ check_scan(
+ r#"FIL label isn't quoted.
+FILE
+ lab 'is quoted'.
+FILE /*
+/**/ lab not quoted here either
+
+"#,
+ Mode::Auto,
+ &[
+ ScanToken::Token(Token::Id(String::from("FIL"))),
+ ScanToken::Token(Token::Id(String::from("label"))),
+ ScanToken::Token(Token::String(String::from("isn't quoted"))),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::Id(String::from("FILE"))),
+ ScanToken::Token(Token::Id(String::from("lab"))),
+ ScanToken::Token(Token::String(String::from("is quoted"))),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::Id(String::from("FILE"))),
+ ScanToken::Token(Token::Id(String::from("lab"))),
+ ScanToken::Token(Token::String(String::from("not quoted here either"))),
+ ScanToken::Token(Token::EndCommand),
+ ],
+ );
+}
+
+#[test]
+fn test_begin_data() {
+ check_scan(
+ r#"begin data.
+123
+xxx
+end data.
+
+BEG /**/ DAT /*
+5 6 7 /* x
+
+end data
+end data
+.
+"#,
+ Mode::Auto,
+ &[
+ ScanToken::Token(Token::Id(String::from("begin"))),
+ ScanToken::Token(Token::Id(String::from("data"))),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::String(String::from("123"))),
+ ScanToken::Token(Token::String(String::from("xxx"))),
+ ScanToken::Token(Token::Id(String::from("end"))),
+ ScanToken::Token(Token::Id(String::from("data"))),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::Id(String::from("BEG"))),
+ ScanToken::Token(Token::Id(String::from("DAT"))),
+ ScanToken::Token(Token::String(String::from("5 6 7 /* x"))),
+ ScanToken::Token(Token::String(String::from(""))),
+ ScanToken::Token(Token::String(String::from("end data"))),
+ ScanToken::Token(Token::Id(String::from("end"))),
+ ScanToken::Token(Token::Id(String::from("data"))),
+ ScanToken::Token(Token::EndCommand),
+ ],
+ );
+}
+
+#[test]
+fn test_do_repeat() {
+ check_scan(
+ r#"do repeat x=a b c
+ y=d e f.
+ do repeat a=1 thru 5.
+another command.
+second command
++ third command.
+end /* x */ /* y */ repeat print.
+end
+ repeat.
+"#,
+ Mode::Auto,
+ &[
+ ScanToken::Token(Token::Id(String::from("do"))),
+ ScanToken::Token(Token::Id(String::from("repeat"))),
+ ScanToken::Token(Token::Id(String::from("x"))),
+ ScanToken::Token(Token::Punct(Punct::Equals)),
+ ScanToken::Token(Token::Id(String::from("a"))),
+ ScanToken::Token(Token::Id(String::from("b"))),
+ ScanToken::Token(Token::Id(String::from("c"))),
+ ScanToken::Token(Token::Id(String::from("y"))),
+ ScanToken::Token(Token::Punct(Punct::Equals)),
+ ScanToken::Token(Token::Id(String::from("d"))),
+ ScanToken::Token(Token::Id(String::from("e"))),
+ ScanToken::Token(Token::Id(String::from("f"))),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::String(String::from(" do repeat a=1 thru 5."))),
+ ScanToken::Token(Token::String(String::from("another command."))),
+ ScanToken::Token(Token::String(String::from("second command"))),
+ ScanToken::Token(Token::String(String::from("+ third command."))),
+ ScanToken::Token(Token::String(String::from(
+ "end /* x */ /* y */ repeat print.",
+ ))),
+ ScanToken::Token(Token::Id(String::from("end"))),
+ ScanToken::Token(Token::Id(String::from("repeat"))),
+ ScanToken::Token(Token::EndCommand),
+ ],
+ );
+}
+
+#[test]
+fn test_do_repeat_batch() {
+ check_scan(
+ r#"do repeat x=a b c
+ y=d e f
+do repeat a=1 thru 5
+another command
+second command
++ third command
+end /* x */ /* y */ repeat print
+end
+ repeat
+do
+ repeat #a=1
+
+ inner command
+end repeat
+"#,
+ Mode::Batch,
+ &[
+ ScanToken::Token(Token::Id(String::from("do"))),
+ ScanToken::Token(Token::Id(String::from("repeat"))),
+ ScanToken::Token(Token::Id(String::from("x"))),
+ ScanToken::Token(Token::Punct(Punct::Equals)),
+ ScanToken::Token(Token::Id(String::from("a"))),
+ ScanToken::Token(Token::Id(String::from("b"))),
+ ScanToken::Token(Token::Id(String::from("c"))),
+ ScanToken::Token(Token::Id(String::from("y"))),
+ ScanToken::Token(Token::Punct(Punct::Equals)),
+ ScanToken::Token(Token::Id(String::from("d"))),
+ ScanToken::Token(Token::Id(String::from("e"))),
+ ScanToken::Token(Token::Id(String::from("f"))),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::String(String::from("do repeat a=1 thru 5"))),
+ ScanToken::Token(Token::String(String::from("another command"))),
+ ScanToken::Token(Token::String(String::from("second command"))),
+ ScanToken::Token(Token::String(String::from("+ third command"))),
+ ScanToken::Token(Token::String(String::from(
+ "end /* x */ /* y */ repeat print",
+ ))),
+ ScanToken::Token(Token::Id(String::from("end"))),
+ ScanToken::Token(Token::Id(String::from("repeat"))),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::Id(String::from("do"))),
+ ScanToken::Token(Token::Id(String::from("repeat"))),
+ ScanToken::Token(Token::Id(String::from("#a"))),
+ ScanToken::Token(Token::Punct(Punct::Equals)),
+ ScanToken::Token(Token::Number(1.0)),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::String(String::from(" inner command"))),
+ ScanToken::Token(Token::Id(String::from("end"))),
+ ScanToken::Token(Token::Id(String::from("repeat"))),
+ ],
+ );
}
+
#[test]
-fn test_strings2() {
- check_scan(r#"""""
-'error
-'b'
-"#, &[]);
+fn test_batch_mode() {
+ check_scan(
+ r#"first command
+ another line of first command
++ second command
+third command
+
+fourth command.
+ fifth command.
+"#,
+ Mode::Batch,
+ &[
+ ScanToken::Token(Token::Id(String::from("first"))),
+ ScanToken::Token(Token::Id(String::from("command"))),
+ ScanToken::Token(Token::Id(String::from("another"))),
+ ScanToken::Token(Token::Id(String::from("line"))),
+ ScanToken::Token(Token::Id(String::from("of"))),
+ ScanToken::Token(Token::Id(String::from("first"))),
+ ScanToken::Token(Token::Id(String::from("command"))),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::Id(String::from("second"))),
+ ScanToken::Token(Token::Id(String::from("command"))),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::Id(String::from("third"))),
+ ScanToken::Token(Token::Id(String::from("command"))),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::Id(String::from("fourth"))),
+ ScanToken::Token(Token::Id(String::from("command"))),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::Id(String::from("fifth"))),
+ ScanToken::Token(Token::Id(String::from("command"))),
+ ScanToken::Token(Token::EndCommand),
+ ],
+ );
+}
+
+mod define {
+ use crate::lex::{
+ scan::ScanToken,
+ segment::Mode,
+ token::{MacroToken, Punct, Token},
+ };
+
+ use super::check_scan;
+
+ #[test]
+ fn test_simple() {
+ check_scan(
+ r#"define !macro1()
+var1 var2 var3
+!enddefine.
+"#,
+ Mode::Auto,
+ &[
+ ScanToken::Token(Token::Id(String::from("define"))),
+ ScanToken::Token(Token::String(String::from("!macro1"))),
+ ScanToken::Token(Token::Punct(Punct::LParen)),
+ ScanToken::Token(Token::Punct(Punct::RParen)),
+ ScanToken::Token(Token::String(String::from("var1 var2 var3"))),
+ ScanToken::Token(Token::MacroToken(MacroToken::MacroId(String::from(
+ "!enddefine",
+ )))),
+ ScanToken::Token(Token::EndCommand),
+ ],
+ );
+ }
+
+ #[test]
+ fn test_no_newline_after_parentheses() {
+ check_scan(
+ r#"define !macro1() var1 var2 var3
+!enddefine.
+"#,
+ Mode::Auto,
+ &[
+ ScanToken::Token(Token::Id(String::from("define"))),
+ ScanToken::Token(Token::String(String::from("!macro1"))),
+ ScanToken::Token(Token::Punct(Punct::LParen)),
+ ScanToken::Token(Token::Punct(Punct::RParen)),
+ ScanToken::Token(Token::String(String::from(" var1 var2 var3"))),
+ ScanToken::Token(Token::MacroToken(MacroToken::MacroId(String::from(
+ "!enddefine",
+ )))),
+ ScanToken::Token(Token::EndCommand),
+ ],
+ );
+ }
+
+ #[test]
+ fn test_no_newline_before_enddefine() {
+ check_scan(
+ r#"define !macro1()
+var1 var2 var3!enddefine.
+"#,
+ Mode::Auto,
+ &[
+ ScanToken::Token(Token::Id(String::from("define"))),
+ ScanToken::Token(Token::String(String::from("!macro1"))),
+ ScanToken::Token(Token::Punct(Punct::LParen)),
+ ScanToken::Token(Token::Punct(Punct::RParen)),
+ ScanToken::Token(Token::String(String::from("var1 var2 var3"))),
+ ScanToken::Token(Token::MacroToken(MacroToken::MacroId(String::from(
+ "!enddefine",
+ )))),
+ ScanToken::Token(Token::EndCommand),
+ ],
+ );
+ }
+
+ #[test]
+ fn test_all_on_one_line() {
+ check_scan(
+ r#"define !macro1()var1 var2 var3!enddefine.
+"#,
+ Mode::Auto,
+ &[
+ ScanToken::Token(Token::Id(String::from("define"))),
+ ScanToken::Token(Token::String(String::from("!macro1"))),
+ ScanToken::Token(Token::Punct(Punct::LParen)),
+ ScanToken::Token(Token::Punct(Punct::RParen)),
+ ScanToken::Token(Token::String(String::from("var1 var2 var3"))),
+ ScanToken::Token(Token::MacroToken(MacroToken::MacroId(String::from(
+ "!enddefine",
+ )))),
+ ScanToken::Token(Token::EndCommand),
+ ],
+ );
+ }
+
+ #[test]
+ fn test_empty() {
+ check_scan(
+ r#"define !macro1()
+!enddefine.
+"#,
+ Mode::Auto,
+ &[
+ ScanToken::Token(Token::Id(String::from("define"))),
+ ScanToken::Token(Token::String(String::from("!macro1"))),
+ ScanToken::Token(Token::Punct(Punct::LParen)),
+ ScanToken::Token(Token::Punct(Punct::RParen)),
+ ScanToken::Token(Token::MacroToken(MacroToken::MacroId(String::from(
+ "!enddefine",
+ )))),
+ ScanToken::Token(Token::EndCommand),
+ ],
+ );
+ }
+
+ #[test]
+ fn test_blank_lines() {
+ check_scan(
+ r#"define !macro1()
+
+
+!enddefine.
+"#,
+ Mode::Auto,
+ &[
+ ScanToken::Token(Token::Id(String::from("define"))),
+ ScanToken::Token(Token::String(String::from("!macro1"))),
+ ScanToken::Token(Token::Punct(Punct::LParen)),
+ ScanToken::Token(Token::Punct(Punct::RParen)),
+ ScanToken::Token(Token::String(String::from(""))),
+ ScanToken::Token(Token::String(String::from(""))),
+ ScanToken::Token(Token::MacroToken(MacroToken::MacroId(String::from(
+ "!enddefine",
+ )))),
+ ScanToken::Token(Token::EndCommand),
+ ],
+ );
+ }
+
+ #[test]
+ fn test_arguments() {
+ check_scan(
+ r#"define !macro1(a(), b(), c())
+!enddefine.
+"#,
+ Mode::Auto,
+ &[
+ ScanToken::Token(Token::Id(String::from("define"))),
+ ScanToken::Token(Token::String(String::from("!macro1"))),
+ ScanToken::Token(Token::Punct(Punct::LParen)),
+ ScanToken::Token(Token::Id(String::from("a"))),
+ ScanToken::Token(Token::Punct(Punct::LParen)),
+ ScanToken::Token(Token::Punct(Punct::RParen)),
+ ScanToken::Token(Token::Punct(Punct::Comma)),
+ ScanToken::Token(Token::Id(String::from("b"))),
+ ScanToken::Token(Token::Punct(Punct::LParen)),
+ ScanToken::Token(Token::Punct(Punct::RParen)),
+ ScanToken::Token(Token::Punct(Punct::Comma)),
+ ScanToken::Token(Token::Id(String::from("c"))),
+ ScanToken::Token(Token::Punct(Punct::LParen)),
+ ScanToken::Token(Token::Punct(Punct::RParen)),
+ ScanToken::Token(Token::Punct(Punct::RParen)),
+ ScanToken::Token(Token::MacroToken(MacroToken::MacroId(String::from(
+ "!enddefine",
+ )))),
+ ScanToken::Token(Token::EndCommand),
+ ],
+ );
+ }
+
+ #[test]
+ fn test_multiline_arguments() {
+ check_scan(
+ r#"define !macro1(
+ a(), b(
+ ),
+ c()
+)
+!enddefine.
+"#,
+ Mode::Auto,
+ &[
+ ScanToken::Token(Token::Id(String::from("define"))),
+ ScanToken::Token(Token::String(String::from("!macro1"))),
+ ScanToken::Token(Token::Punct(Punct::LParen)),
+ ScanToken::Token(Token::Id(String::from("a"))),
+ ScanToken::Token(Token::Punct(Punct::LParen)),
+ ScanToken::Token(Token::Punct(Punct::RParen)),
+ ScanToken::Token(Token::Punct(Punct::Comma)),
+ ScanToken::Token(Token::Id(String::from("b"))),
+ ScanToken::Token(Token::Punct(Punct::LParen)),
+ ScanToken::Token(Token::Punct(Punct::RParen)),
+ ScanToken::Token(Token::Punct(Punct::Comma)),
+ ScanToken::Token(Token::Id(String::from("c"))),
+ ScanToken::Token(Token::Punct(Punct::LParen)),
+ ScanToken::Token(Token::Punct(Punct::RParen)),
+ ScanToken::Token(Token::Punct(Punct::RParen)),
+ ScanToken::Token(Token::MacroToken(MacroToken::MacroId(String::from(
+ "!enddefine",
+ )))),
+ ScanToken::Token(Token::EndCommand),
+ ],
+ );
+ }
+
+ #[test]
+ fn test_arguments_start_on_second_line() {
+ check_scan(
+ r#"define !macro1
+(x,y,z
+)
+content 1
+content 2
+!enddefine.
+"#,
+ Mode::Auto,
+ &[
+ ScanToken::Token(Token::Id(String::from("define"))),
+ ScanToken::Token(Token::String(String::from("!macro1"))),
+ ScanToken::Token(Token::Punct(Punct::LParen)),
+ ScanToken::Token(Token::Id(String::from("x"))),
+ ScanToken::Token(Token::Punct(Punct::Comma)),
+ ScanToken::Token(Token::Id(String::from("y"))),
+ ScanToken::Token(Token::Punct(Punct::Comma)),
+ ScanToken::Token(Token::Id(String::from("z"))),
+ ScanToken::Token(Token::Punct(Punct::RParen)),
+ ScanToken::Token(Token::String(String::from("content 1"))),
+ ScanToken::Token(Token::String(String::from("content 2"))),
+ ScanToken::Token(Token::MacroToken(MacroToken::MacroId(String::from(
+ "!enddefine",
+ )))),
+ ScanToken::Token(Token::EndCommand),
+ ],
+ );
+ }
+
+ #[test]
+ fn test_early_end_of_command_1() {
+ check_scan(
+ r#"define !macro1.
+data list /x 1.
+"#,
+ Mode::Auto,
+ &[
+ ScanToken::Token(Token::Id(String::from("define"))),
+ ScanToken::Token(Token::String(String::from("!macro1"))),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::Id(String::from("data"))),
+ ScanToken::Token(Token::Id(String::from("list"))),
+ ScanToken::Token(Token::Punct(Punct::Slash)),
+ ScanToken::Token(Token::Id(String::from("x"))),
+ ScanToken::Token(Token::Number(1.0)),
+ ScanToken::Token(Token::EndCommand),
+ ],
+ );
+ }
+
+ #[test]
+ fn test_early_end_of_command_2() {
+ check_scan(
+ r#"define !macro1
+x.
+data list /x 1.
+"#,
+ Mode::Auto,
+ &[
+ ScanToken::Token(Token::Id(String::from("define"))),
+ ScanToken::Token(Token::String(String::from("!macro1"))),
+ ScanToken::Token(Token::Id(String::from("x"))),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::Id(String::from("data"))),
+ ScanToken::Token(Token::Id(String::from("list"))),
+ ScanToken::Token(Token::Punct(Punct::Slash)),
+ ScanToken::Token(Token::Id(String::from("x"))),
+ ScanToken::Token(Token::Number(1.0)),
+ ScanToken::Token(Token::EndCommand),
+ ],
+ );
+ }
+
+ #[test]
+ fn test_early_end_of_command_3() {
+ check_scan(
+ r#"define !macro1(.
+x.
+data list /x 1.
+"#,
+ Mode::Auto,
+ &[
+ ScanToken::Token(Token::Id(String::from("define"))),
+ ScanToken::Token(Token::String(String::from("!macro1"))),
+ ScanToken::Token(Token::Punct(Punct::LParen)),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::Id(String::from("x"))),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::Id(String::from("data"))),
+ ScanToken::Token(Token::Id(String::from("list"))),
+ ScanToken::Token(Token::Punct(Punct::Slash)),
+ ScanToken::Token(Token::Id(String::from("x"))),
+ ScanToken::Token(Token::Number(1.0)),
+ ScanToken::Token(Token::EndCommand),
+ ],
+ );
+ }
+
+ #[test]
+ fn test_early_end_of_command_4() {
+ // Notice the command terminator at the end of the DEFINE command,
+ // which should not be there and ends it early.
+ check_scan(
+ r#"define !macro1.
+data list /x 1.
+"#,
+ Mode::Auto,
+ &[
+ ScanToken::Token(Token::Id(String::from("define"))),
+ ScanToken::Token(Token::String(String::from("!macro1"))),
+ ScanToken::Token(Token::EndCommand),
+ ScanToken::Token(Token::Id(String::from("data"))),
+ ScanToken::Token(Token::Id(String::from("list"))),
+ ScanToken::Token(Token::Punct(Punct::Slash)),
+ ScanToken::Token(Token::Id(String::from("x"))),
+ ScanToken::Token(Token::Number(1.0)),
+ ScanToken::Token(Token::EndCommand),
+ ],
+ );
+ }
+
+ #[test]
+ fn test_missing_enddefine() {
+ check_scan(
+ r#"define !macro1()
+content line 1
+content line 2
+"#,
+ Mode::Auto,
+ &[
+ ScanToken::Token(Token::Id(String::from("define"))),
+ ScanToken::Token(Token::String(String::from("!macro1"))),
+ ScanToken::Token(Token::Punct(Punct::LParen)),
+ ScanToken::Token(Token::Punct(Punct::RParen)),
+ ScanToken::Token(Token::String(String::from("content line 1"))),
+ ScanToken::Token(Token::String(String::from("content line 2"))),
+ ScanToken::Token(Token::End),
+ ],
+ );
+ }
}