-use crate::lex::{
+use crate::{identifier::Identifier, lex::{
segment::Mode,
token::{Punct, Token},
-};
+}};
-use super::{ScanError, ScanToken, StringLexer};
+use super::{ScanError, ScanToken, StringScanner};
fn print_token(token: &Token) {
match token {
}
fn check_scan(input: &str, mode: Mode, expected: &[ScanToken]) {
- let tokens = StringLexer::new(input, mode, false).collect::<Vec<_>>();
+ let tokens = StringScanner::new(input, mode, false).collect::<Vec<_>>();
if &tokens != expected {
for token in &tokens {
"#,
Mode::Auto,
&[
- ScanToken::Token(Token::Id(String::from("a"))),
- ScanToken::Token(Token::Id(String::from("aB"))),
- ScanToken::Token(Token::Id(String::from("i5"))),
- ScanToken::Token(Token::Id(String::from("$x"))),
- ScanToken::Token(Token::Id(String::from("@efg"))),
- ScanToken::Token(Token::Id(String::from("@@."))),
- ScanToken::Token(Token::Id(String::from("!abcd"))),
+ ScanToken::Token(Token::Id(Identifier::new("a").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("aB").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("i5").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("$x").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("@efg").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("@@.").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("!abcd").unwrap())),
ScanToken::Token(Token::Punct(Punct::BangAsterisk)),
ScanToken::Token(Token::Punct(Punct::BangAsterisk)),
- ScanToken::Token(Token::Id(String::from("a"))),
- ScanToken::Token(Token::Id(String::from("#.#"))),
+ ScanToken::Token(Token::Id(Identifier::new("a").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("#.#").unwrap())),
ScanToken::Token(Token::Punct(Punct::Dot)),
- ScanToken::Token(Token::Id(String::from("x"))),
+ ScanToken::Token(Token::Id(Identifier::new("x").unwrap())),
ScanToken::Token(Token::Punct(Punct::Underscore)),
- ScanToken::Token(Token::Id(String::from("z"))),
+ ScanToken::Token(Token::Id(Identifier::new("z").unwrap())),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::Id(String::from("abcd."))),
- ScanToken::Token(Token::Id(String::from("abcd"))),
+ ScanToken::Token(Token::Id(Identifier::new("abcd.").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("abcd").unwrap())),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::Id(String::from("QRSTUV"))),
+ ScanToken::Token(Token::Id(Identifier::new("QRSTUV").unwrap())),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::Id(String::from("QrStUv"))),
+ ScanToken::Token(Token::Id(Identifier::new("QrStUv").unwrap())),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::Id(String::from("WXYZ"))),
+ ScanToken::Token(Token::Id(Identifier::new("WXYZ").unwrap())),
ScanToken::Token(Token::EndCommand),
ScanToken::Error(ScanError::UnexpectedChar('�')),
ScanToken::Token(Token::EndCommand),
ScanToken::Token(Token::Punct(Punct::By)),
ScanToken::Token(Token::Punct(Punct::To)),
ScanToken::Token(Token::Punct(Punct::With)),
- ScanToken::Token(Token::Id(String::from("andx"))),
- ScanToken::Token(Token::Id(String::from("orx"))),
- ScanToken::Token(Token::Id(String::from("notx"))),
- ScanToken::Token(Token::Id(String::from("eqx"))),
- ScanToken::Token(Token::Id(String::from("gex"))),
- ScanToken::Token(Token::Id(String::from("gtx"))),
- ScanToken::Token(Token::Id(String::from("lex"))),
- ScanToken::Token(Token::Id(String::from("ltx"))),
- ScanToken::Token(Token::Id(String::from("nex"))),
- ScanToken::Token(Token::Id(String::from("allx"))),
- ScanToken::Token(Token::Id(String::from("byx"))),
- ScanToken::Token(Token::Id(String::from("tox"))),
- ScanToken::Token(Token::Id(String::from("withx"))),
- ScanToken::Token(Token::Id(String::from("and."))),
+ ScanToken::Token(Token::Id(Identifier::new("andx").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("orx").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("notx").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("eqx").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("gex").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("gtx").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("lex").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("ltx").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("nex").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("allx").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("byx").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("tox").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("withx").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("and.").unwrap())),
ScanToken::Token(Token::Punct(Punct::With)),
ScanToken::Token(Token::EndCommand),
],
ScanToken::Token(Token::Number(0.0112)),
ScanToken::Token(Token::EndCommand),
ScanToken::Error(ScanError::ExpectedExponent(String::from("1e"))),
- ScanToken::Token(Token::Id(String::from("e1"))),
+ ScanToken::Token(Token::Id(Identifier::new("e1").unwrap())),
ScanToken::Error(ScanError::ExpectedExponent(String::from("1e+"))),
ScanToken::Error(ScanError::ExpectedExponent(String::from("1e-"))),
],
ScanToken::Token(Token::Punct(Punct::Dot)),
ScanToken::Error(ScanError::ExpectedExponent(String::from("-1e"))),
ScanToken::Token(Token::Punct(Punct::Dash)),
- ScanToken::Token(Token::Id(String::from("e1"))),
+ ScanToken::Token(Token::Id(Identifier::new("e1").unwrap())),
ScanToken::Error(ScanError::ExpectedExponent(String::from("-1e+"))),
ScanToken::Error(ScanError::ExpectedExponent(String::from("-1e-"))),
ScanToken::Token(Token::Number(-1.0)),
"#,
Mode::Auto,
&[
- ScanToken::Token(Token::Id(String::from("#"))),
+ ScanToken::Token(Token::Id(Identifier::new("#").unwrap())),
ScanToken::Token(Token::Punct(Punct::Bang)),
ScanToken::Token(Token::Punct(Punct::Slash)),
- ScanToken::Token(Token::Id(String::from("usr"))),
+ ScanToken::Token(Token::Id(Identifier::new("usr").unwrap())),
ScanToken::Token(Token::Punct(Punct::Slash)),
- ScanToken::Token(Token::Id(String::from("bin"))),
+ ScanToken::Token(Token::Id(Identifier::new("bin").unwrap())),
ScanToken::Token(Token::Punct(Punct::Slash)),
- ScanToken::Token(Token::Id(String::from("pspp"))),
+ ScanToken::Token(Token::Id(Identifier::new("pspp").unwrap())),
],
);
}
ScanToken::Token(Token::EndCommand),
ScanToken::Token(Token::EndCommand),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::Id(String::from("com"))),
- ScanToken::Token(Token::Id(String::from("is"))),
- ScanToken::Token(Token::Id(String::from("ambiguous"))),
+ ScanToken::Token(Token::Id(Identifier::new("com").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("is").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("ambiguous").unwrap())),
ScanToken::Token(Token::Punct(Punct::With)),
- ScanToken::Token(Token::Id(String::from("COMPUTE"))),
+ ScanToken::Token(Token::Id(Identifier::new("COMPUTE").unwrap())),
ScanToken::Token(Token::EndCommand),
ScanToken::Token(Token::EndCommand),
ScanToken::Token(Token::EndCommand),
ScanToken::Token(Token::EndCommand),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::Id(String::from("next"))),
- ScanToken::Token(Token::Id(String::from("command"))),
+ ScanToken::Token(Token::Id(Identifier::new("next").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("command").unwrap())),
ScanToken::Token(Token::EndCommand),
ScanToken::Token(Token::EndCommand),
],
"#,
Mode::Auto,
&[
- ScanToken::Token(Token::Id(String::from("DOCUMENT"))),
+ ScanToken::Token(Token::Id(Identifier::new("DOCUMENT").unwrap())),
ScanToken::Token(Token::String(String::from("DOCUMENT one line."))),
ScanToken::Token(Token::EndCommand),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::Id(String::from("DOCUMENT"))),
+ ScanToken::Token(Token::Id(Identifier::new("DOCUMENT").unwrap())),
ScanToken::Token(Token::String(String::from("DOC more"))),
ScanToken::Token(Token::String(String::from(" than"))),
ScanToken::Token(Token::String(String::from(" one"))),
ScanToken::Token(Token::String(String::from(" line."))),
ScanToken::Token(Token::EndCommand),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::Id(String::from("DOCUMENT"))),
+ ScanToken::Token(Token::Id(Identifier::new("DOCUMENT").unwrap())),
ScanToken::Token(Token::String(String::from("docu"))),
ScanToken::Token(Token::String(String::from("first.paragraph"))),
ScanToken::Token(Token::String(String::from("isn't parsed as tokens"))),
"#,
Mode::Auto,
&[
- ScanToken::Token(Token::Id(String::from("FIL"))),
- ScanToken::Token(Token::Id(String::from("label"))),
+ ScanToken::Token(Token::Id(Identifier::new("FIL").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("label").unwrap())),
ScanToken::Token(Token::String(String::from("isn't quoted"))),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::Id(String::from("FILE"))),
- ScanToken::Token(Token::Id(String::from("lab"))),
+ ScanToken::Token(Token::Id(Identifier::new("FILE").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("lab").unwrap())),
ScanToken::Token(Token::String(String::from("is quoted"))),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::Id(String::from("FILE"))),
- ScanToken::Token(Token::Id(String::from("lab"))),
+ ScanToken::Token(Token::Id(Identifier::new("FILE").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("lab").unwrap())),
ScanToken::Token(Token::String(String::from("not quoted here either"))),
ScanToken::Token(Token::EndCommand),
],
"#,
Mode::Auto,
&[
- ScanToken::Token(Token::Id(String::from("begin"))),
- ScanToken::Token(Token::Id(String::from("data"))),
+ ScanToken::Token(Token::Id(Identifier::new("begin").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("data").unwrap())),
ScanToken::Token(Token::EndCommand),
ScanToken::Token(Token::String(String::from("123"))),
ScanToken::Token(Token::String(String::from("xxx"))),
- ScanToken::Token(Token::Id(String::from("end"))),
- ScanToken::Token(Token::Id(String::from("data"))),
+ ScanToken::Token(Token::Id(Identifier::new("end").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("data").unwrap())),
ScanToken::Token(Token::EndCommand),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::Id(String::from("BEG"))),
- ScanToken::Token(Token::Id(String::from("DAT"))),
+ ScanToken::Token(Token::Id(Identifier::new("BEG").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("DAT").unwrap())),
ScanToken::Token(Token::String(String::from("5 6 7 /* x"))),
ScanToken::Token(Token::String(String::from(""))),
ScanToken::Token(Token::String(String::from("end data"))),
- ScanToken::Token(Token::Id(String::from("end"))),
- ScanToken::Token(Token::Id(String::from("data"))),
+ ScanToken::Token(Token::Id(Identifier::new("end").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("data").unwrap())),
ScanToken::Token(Token::EndCommand),
],
);
"#,
Mode::Auto,
&[
- ScanToken::Token(Token::Id(String::from("do"))),
- ScanToken::Token(Token::Id(String::from("repeat"))),
- ScanToken::Token(Token::Id(String::from("x"))),
+ ScanToken::Token(Token::Id(Identifier::new("do").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("repeat").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("x").unwrap())),
ScanToken::Token(Token::Punct(Punct::Equals)),
- ScanToken::Token(Token::Id(String::from("a"))),
- ScanToken::Token(Token::Id(String::from("b"))),
- ScanToken::Token(Token::Id(String::from("c"))),
- ScanToken::Token(Token::Id(String::from("y"))),
+ ScanToken::Token(Token::Id(Identifier::new("a").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("b").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("c").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("y").unwrap())),
ScanToken::Token(Token::Punct(Punct::Equals)),
- ScanToken::Token(Token::Id(String::from("d"))),
- ScanToken::Token(Token::Id(String::from("e"))),
- ScanToken::Token(Token::Id(String::from("f"))),
+ ScanToken::Token(Token::Id(Identifier::new("d").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("e").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("f").unwrap())),
ScanToken::Token(Token::EndCommand),
ScanToken::Token(Token::String(String::from(" do repeat a=1 thru 5."))),
ScanToken::Token(Token::String(String::from("another command."))),
ScanToken::Token(Token::String(String::from(
"end /* x */ /* y */ repeat print.",
))),
- ScanToken::Token(Token::Id(String::from("end"))),
- ScanToken::Token(Token::Id(String::from("repeat"))),
+ ScanToken::Token(Token::Id(Identifier::new("end").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("repeat").unwrap())),
ScanToken::Token(Token::EndCommand),
],
);
"#,
Mode::Batch,
&[
- ScanToken::Token(Token::Id(String::from("do"))),
- ScanToken::Token(Token::Id(String::from("repeat"))),
- ScanToken::Token(Token::Id(String::from("x"))),
+ ScanToken::Token(Token::Id(Identifier::new("do").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("repeat").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("x").unwrap())),
ScanToken::Token(Token::Punct(Punct::Equals)),
- ScanToken::Token(Token::Id(String::from("a"))),
- ScanToken::Token(Token::Id(String::from("b"))),
- ScanToken::Token(Token::Id(String::from("c"))),
- ScanToken::Token(Token::Id(String::from("y"))),
+ ScanToken::Token(Token::Id(Identifier::new("a").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("b").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("c").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("y").unwrap())),
ScanToken::Token(Token::Punct(Punct::Equals)),
- ScanToken::Token(Token::Id(String::from("d"))),
- ScanToken::Token(Token::Id(String::from("e"))),
- ScanToken::Token(Token::Id(String::from("f"))),
+ ScanToken::Token(Token::Id(Identifier::new("d").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("e").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("f").unwrap())),
ScanToken::Token(Token::EndCommand),
ScanToken::Token(Token::String(String::from("do repeat a=1 thru 5"))),
ScanToken::Token(Token::String(String::from("another command"))),
ScanToken::Token(Token::String(String::from(
"end /* x */ /* y */ repeat print",
))),
- ScanToken::Token(Token::Id(String::from("end"))),
- ScanToken::Token(Token::Id(String::from("repeat"))),
+ ScanToken::Token(Token::Id(Identifier::new("end").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("repeat").unwrap())),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::Id(String::from("do"))),
- ScanToken::Token(Token::Id(String::from("repeat"))),
- ScanToken::Token(Token::Id(String::from("#a"))),
+ ScanToken::Token(Token::Id(Identifier::new("do").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("repeat").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("#a").unwrap())),
ScanToken::Token(Token::Punct(Punct::Equals)),
ScanToken::Token(Token::Number(1.0)),
ScanToken::Token(Token::EndCommand),
ScanToken::Token(Token::String(String::from(" inner command"))),
- ScanToken::Token(Token::Id(String::from("end"))),
- ScanToken::Token(Token::Id(String::from("repeat"))),
+ ScanToken::Token(Token::Id(Identifier::new("end").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("repeat").unwrap())),
],
);
}
"#,
Mode::Batch,
&[
- ScanToken::Token(Token::Id(String::from("first"))),
- ScanToken::Token(Token::Id(String::from("command"))),
- ScanToken::Token(Token::Id(String::from("another"))),
- ScanToken::Token(Token::Id(String::from("line"))),
- ScanToken::Token(Token::Id(String::from("of"))),
- ScanToken::Token(Token::Id(String::from("first"))),
- ScanToken::Token(Token::Id(String::from("command"))),
+ ScanToken::Token(Token::Id(Identifier::new("first").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("command").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("another").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("line").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("of").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("first").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("command").unwrap())),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::Id(String::from("second"))),
- ScanToken::Token(Token::Id(String::from("command"))),
+ ScanToken::Token(Token::Id(Identifier::new("second").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("command").unwrap())),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::Id(String::from("third"))),
- ScanToken::Token(Token::Id(String::from("command"))),
+ ScanToken::Token(Token::Id(Identifier::new("third").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("command").unwrap())),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::Id(String::from("fourth"))),
- ScanToken::Token(Token::Id(String::from("command"))),
+ ScanToken::Token(Token::Id(Identifier::new("fourth").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("command").unwrap())),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::Id(String::from("fifth"))),
- ScanToken::Token(Token::Id(String::from("command"))),
+ ScanToken::Token(Token::Id(Identifier::new("fifth").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("command").unwrap())),
ScanToken::Token(Token::EndCommand),
],
);
}
mod define {
- use crate::lex::{
+ use crate::{identifier::Identifier, lex::{
scan::ScanToken,
segment::Mode,
token::{Punct, Token},
- };
+ }};
use super::check_scan;
"#,
Mode::Auto,
&[
- ScanToken::Token(Token::Id(String::from("define"))),
+ ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
ScanToken::Token(Token::String(String::from("!macro1"))),
ScanToken::Token(Token::Punct(Punct::LParen)),
ScanToken::Token(Token::Punct(Punct::RParen)),
ScanToken::Token(Token::String(String::from("var1 var2 var3"))),
- ScanToken::Token(Token::Id(String::from("!enddefine"))),
+ ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())),
ScanToken::Token(Token::EndCommand),
],
);
"#,
Mode::Auto,
&[
- ScanToken::Token(Token::Id(String::from("define"))),
+ ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
ScanToken::Token(Token::String(String::from("!macro1"))),
ScanToken::Token(Token::Punct(Punct::LParen)),
ScanToken::Token(Token::Punct(Punct::RParen)),
ScanToken::Token(Token::String(String::from(" var1 var2 var3"))),
- ScanToken::Token(Token::Id(String::from("!enddefine"))),
+ ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())),
ScanToken::Token(Token::EndCommand),
],
);
"#,
Mode::Auto,
&[
- ScanToken::Token(Token::Id(String::from("define"))),
+ ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
ScanToken::Token(Token::String(String::from("!macro1"))),
ScanToken::Token(Token::Punct(Punct::LParen)),
ScanToken::Token(Token::Punct(Punct::RParen)),
ScanToken::Token(Token::String(String::from("var1 var2 var3"))),
- ScanToken::Token(Token::Id(String::from("!enddefine"))),
+ ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())),
ScanToken::Token(Token::EndCommand),
],
);
"#,
Mode::Auto,
&[
- ScanToken::Token(Token::Id(String::from("define"))),
+ ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
ScanToken::Token(Token::String(String::from("!macro1"))),
ScanToken::Token(Token::Punct(Punct::LParen)),
ScanToken::Token(Token::Punct(Punct::RParen)),
ScanToken::Token(Token::String(String::from("var1 var2 var3"))),
- ScanToken::Token(Token::Id(String::from("!enddefine"))),
+ ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())),
ScanToken::Token(Token::EndCommand),
],
);
"#,
Mode::Auto,
&[
- ScanToken::Token(Token::Id(String::from("define"))),
+ ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
ScanToken::Token(Token::String(String::from("!macro1"))),
ScanToken::Token(Token::Punct(Punct::LParen)),
ScanToken::Token(Token::Punct(Punct::RParen)),
- ScanToken::Token(Token::Id(String::from("!enddefine"))),
+ ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())),
ScanToken::Token(Token::EndCommand),
],
);
"#,
Mode::Auto,
&[
- ScanToken::Token(Token::Id(String::from("define"))),
+ ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
ScanToken::Token(Token::String(String::from("!macro1"))),
ScanToken::Token(Token::Punct(Punct::LParen)),
ScanToken::Token(Token::Punct(Punct::RParen)),
ScanToken::Token(Token::String(String::from(""))),
ScanToken::Token(Token::String(String::from(""))),
- ScanToken::Token(Token::Id(String::from("!enddefine"))),
+ ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())),
ScanToken::Token(Token::EndCommand),
],
);
"#,
Mode::Auto,
&[
- ScanToken::Token(Token::Id(String::from("define"))),
+ ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
ScanToken::Token(Token::String(String::from("!macro1"))),
ScanToken::Token(Token::Punct(Punct::LParen)),
- ScanToken::Token(Token::Id(String::from("a"))),
+ ScanToken::Token(Token::Id(Identifier::new("a").unwrap())),
ScanToken::Token(Token::Punct(Punct::LParen)),
ScanToken::Token(Token::Punct(Punct::RParen)),
ScanToken::Token(Token::Punct(Punct::Comma)),
- ScanToken::Token(Token::Id(String::from("b"))),
+ ScanToken::Token(Token::Id(Identifier::new("b").unwrap())),
ScanToken::Token(Token::Punct(Punct::LParen)),
ScanToken::Token(Token::Punct(Punct::RParen)),
ScanToken::Token(Token::Punct(Punct::Comma)),
- ScanToken::Token(Token::Id(String::from("c"))),
+ ScanToken::Token(Token::Id(Identifier::new("c").unwrap())),
ScanToken::Token(Token::Punct(Punct::LParen)),
ScanToken::Token(Token::Punct(Punct::RParen)),
ScanToken::Token(Token::Punct(Punct::RParen)),
- ScanToken::Token(Token::Id(String::from("!enddefine"))),
+ ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())),
ScanToken::Token(Token::EndCommand),
],
);
"#,
Mode::Auto,
&[
- ScanToken::Token(Token::Id(String::from("define"))),
+ ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
ScanToken::Token(Token::String(String::from("!macro1"))),
ScanToken::Token(Token::Punct(Punct::LParen)),
- ScanToken::Token(Token::Id(String::from("a"))),
+ ScanToken::Token(Token::Id(Identifier::new("a").unwrap())),
ScanToken::Token(Token::Punct(Punct::LParen)),
ScanToken::Token(Token::Punct(Punct::RParen)),
ScanToken::Token(Token::Punct(Punct::Comma)),
- ScanToken::Token(Token::Id(String::from("b"))),
+ ScanToken::Token(Token::Id(Identifier::new("b").unwrap())),
ScanToken::Token(Token::Punct(Punct::LParen)),
ScanToken::Token(Token::Punct(Punct::RParen)),
ScanToken::Token(Token::Punct(Punct::Comma)),
- ScanToken::Token(Token::Id(String::from("c"))),
+ ScanToken::Token(Token::Id(Identifier::new("c").unwrap())),
ScanToken::Token(Token::Punct(Punct::LParen)),
ScanToken::Token(Token::Punct(Punct::RParen)),
ScanToken::Token(Token::Punct(Punct::RParen)),
- ScanToken::Token(Token::Id(String::from("!enddefine"))),
+ ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())),
ScanToken::Token(Token::EndCommand),
],
);
"#,
Mode::Auto,
&[
- ScanToken::Token(Token::Id(String::from("define"))),
+ ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
ScanToken::Token(Token::String(String::from("!macro1"))),
ScanToken::Token(Token::Punct(Punct::LParen)),
- ScanToken::Token(Token::Id(String::from("x"))),
+ ScanToken::Token(Token::Id(Identifier::new("x").unwrap())),
ScanToken::Token(Token::Punct(Punct::Comma)),
- ScanToken::Token(Token::Id(String::from("y"))),
+ ScanToken::Token(Token::Id(Identifier::new("y").unwrap())),
ScanToken::Token(Token::Punct(Punct::Comma)),
- ScanToken::Token(Token::Id(String::from("z"))),
+ ScanToken::Token(Token::Id(Identifier::new("z").unwrap())),
ScanToken::Token(Token::Punct(Punct::RParen)),
ScanToken::Token(Token::String(String::from("content 1"))),
ScanToken::Token(Token::String(String::from("content 2"))),
- ScanToken::Token(Token::Id(String::from("!enddefine"))),
+ ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())),
ScanToken::Token(Token::EndCommand),
],
);
"#,
Mode::Auto,
&[
- ScanToken::Token(Token::Id(String::from("define"))),
+ ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
ScanToken::Token(Token::String(String::from("!macro1"))),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::Id(String::from("data"))),
- ScanToken::Token(Token::Id(String::from("list"))),
+ ScanToken::Token(Token::Id(Identifier::new("data").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("list").unwrap())),
ScanToken::Token(Token::Punct(Punct::Slash)),
- ScanToken::Token(Token::Id(String::from("x"))),
+ ScanToken::Token(Token::Id(Identifier::new("x").unwrap())),
ScanToken::Token(Token::Number(1.0)),
ScanToken::Token(Token::EndCommand),
],
"#,
Mode::Auto,
&[
- ScanToken::Token(Token::Id(String::from("define"))),
+ ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
ScanToken::Token(Token::String(String::from("!macro1"))),
- ScanToken::Token(Token::Id(String::from("x"))),
+ ScanToken::Token(Token::Id(Identifier::new("x").unwrap())),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::Id(String::from("data"))),
- ScanToken::Token(Token::Id(String::from("list"))),
+ ScanToken::Token(Token::Id(Identifier::new("data").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("list").unwrap())),
ScanToken::Token(Token::Punct(Punct::Slash)),
- ScanToken::Token(Token::Id(String::from("x"))),
+ ScanToken::Token(Token::Id(Identifier::new("x").unwrap())),
ScanToken::Token(Token::Number(1.0)),
ScanToken::Token(Token::EndCommand),
],
"#,
Mode::Auto,
&[
- ScanToken::Token(Token::Id(String::from("define"))),
+ ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
ScanToken::Token(Token::String(String::from("!macro1"))),
ScanToken::Token(Token::Punct(Punct::LParen)),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::Id(String::from("x"))),
+ ScanToken::Token(Token::Id(Identifier::new("x").unwrap())),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::Id(String::from("data"))),
- ScanToken::Token(Token::Id(String::from("list"))),
+ ScanToken::Token(Token::Id(Identifier::new("data").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("list").unwrap())),
ScanToken::Token(Token::Punct(Punct::Slash)),
- ScanToken::Token(Token::Id(String::from("x"))),
+ ScanToken::Token(Token::Id(Identifier::new("x").unwrap())),
ScanToken::Token(Token::Number(1.0)),
ScanToken::Token(Token::EndCommand),
],
"#,
Mode::Auto,
&[
- ScanToken::Token(Token::Id(String::from("define"))),
+ ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
ScanToken::Token(Token::String(String::from("!macro1"))),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::Id(String::from("data"))),
- ScanToken::Token(Token::Id(String::from("list"))),
+ ScanToken::Token(Token::Id(Identifier::new("data").unwrap())),
+ ScanToken::Token(Token::Id(Identifier::new("list").unwrap())),
ScanToken::Token(Token::Punct(Punct::Slash)),
- ScanToken::Token(Token::Id(String::from("x"))),
+ ScanToken::Token(Token::Id(Identifier::new("x").unwrap())),
ScanToken::Token(Token::Number(1.0)),
ScanToken::Token(Token::EndCommand),
],
"#,
Mode::Auto,
&[
- ScanToken::Token(Token::Id(String::from("define"))),
+ ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
ScanToken::Token(Token::String(String::from("!macro1"))),
ScanToken::Token(Token::Punct(Punct::LParen)),
ScanToken::Token(Token::Punct(Punct::RParen)),
-use std::collections::HashMap;
+use std::{
+ cmp::Ordering,
+ collections::{BTreeMap, HashMap},
+ mem::take,
+};
+use num::Integer;
use thiserror::Error as ThisError;
use unicase::UniCase;
-use crate::{lex::token::Token, message::Location};
+use crate::{
+ identifier::Identifier,
+ lex::{
+ scan::{ScanError, ScanToken, StringScanner, StringSegmenter},
+ segment::Mode,
+ token::{Punct, Token},
+ },
+ message::Location,
+};
#[derive(Clone, Debug, ThisError)]
pub enum MacroError {
)]
ExpectedMoreTokens {
n: usize,
- arg: String,
- macro_: String,
+ arg: Identifier,
+ macro_: Identifier,
},
/// Expected a particular token at end of command.
#[error("Reached end of command expecting {token:?} in argument {arg} to macro {macro_}.")]
ExpectedToken {
token: String,
- arg: String,
- macro_: String,
+ arg: Identifier,
+ macro_: Identifier,
},
/// Expected a particular token, got a different one.
UnexpectedToken {
actual: String,
expected: String,
- arg: String,
- macro_: String,
+ arg: Identifier,
+ macro_: Identifier,
},
+
+ /// Argument specified multiple times,
+ #[error("Argument {arg} specified multiple times in call to macro {macro_}.")]
+ DuplicateArg { arg: Identifier, macro_: Identifier },
+
+ /// Maximum nesting limit exceeded.
+ #[error("Maximum nesting level {limit} exceeded. (Use `SET MNEST` to change the limit.)")]
+ TooDeep { limit: usize },
+
+ /// Invalid `!*`.
+ #[error("`!*` may only be used within the expansion of a macro.")]
+ InvalidBangAsterisk,
+
+ /// Error tokenizing during expansion.
+ #[error(transparent)]
+ ScanError(ScanError),
+
+ /// Expecting `)` in macro expression.
+ #[error("Expecting `)` in macro expression.")]
+ ExpectingRParen,
+
+ /// Expecting literal.
+ #[error("Expecting literal or function invocation in macro expression.")]
+ ExpectingLiteral,
}
/// A PSPP macro as defined with `!DEFINE`.
pub struct Macro {
/// The macro's name. This is an ordinary identifier except that it is
/// allowed (but not required) to begin with `!`.
- pub name: String,
+ pub name: Identifier,
/// Source code location of macro definition, for error reporting.
pub location: Location,
parameters: Vec<Parameter>,
/// Body.
- body: Vec<BodyToken>,
+ body: Vec<MacroToken>,
}
impl Macro {
- fn initial_state(&self) -> MacroCallState {
+ fn initial_state(&self) -> ParserState {
if self.parameters.is_empty() {
- MacroCallState::Finished
+ ParserState::Finished
} else if self.parameters[0].is_positional() {
- MacroCallState::Keyword
+ ParserState::Keyword
} else if let ValueType::Enclose(_, _) = self.parameters[0].arg {
- MacroCallState::Enclose
+ ParserState::Enclose
} else {
- MacroCallState::Arg
+ ParserState::Arg
}
}
+
+ fn find_parameter(&self, name: &Identifier) -> Option<usize> {
+ self.parameters.iter().position(|param| ¶m.name == name)
+ }
}
struct Parameter {
- /// `name` or `1`.
- name: UniCase<String>,
+ /// `!name` or `!1`.
+ name: Identifier,
/// Default value.
///
/// The tokens don't include white space, etc. between them.
- default: Vec<BodyToken>,
+ default: Vec<MacroToken>,
/// Macro-expand the value?
expand_value: bool,
/// Returns true if this is a positional parameter. Positional parameters
/// are expanded by index (position) rather than by name.
fn is_positional(&self) -> bool {
- self.name.as_bytes()[1].is_ascii_digit()
+ self.name.0.as_bytes()[1].is_ascii_digit()
}
}
/// A token and the syntax that was tokenized to produce it. The syntax allows
/// the token to be turned back into syntax accurately.
#[derive(Clone)]
-struct BodyToken {
+pub struct MacroToken {
/// The token.
token: Token,
syntax: String,
}
+fn tokenize_string(s: &str, mode: Mode, output: &mut Vec<MacroToken>, error: &impl Fn(MacroError)) {
+ for (syntax, token) in StringSegmenter::new(s, mode, true) {
+ match token {
+ ScanToken::Token(token) => output.push(MacroToken {
+ token,
+ syntax: String::from(syntax),
+ }),
+ ScanToken::Error(scan_error) => error(MacroError::ScanError(scan_error)),
+ }
+ }
+}
+
+fn unquote_string(input: String, mode: Mode) -> String {
+ let mut scanner = StringScanner::new(&input, mode, true);
+ let Some(ScanToken::Token(Token::String(unquoted))) = scanner.next() else {
+ return input;
+ };
+ let None = scanner.next() else { return input };
+ return unquoted;
+}
+
+struct MacroTokens<'a>(&'a [MacroToken]);
+
+impl<'a> MacroTokens<'a> {
+ fn match_(&mut self, s: &str) -> bool {
+ if let Some((first, rest)) = self.0.split_first() {
+ if first.syntax.eq_ignore_ascii_case(s) {
+ self.0 = rest;
+ return true;
+ }
+ }
+ false
+ }
+ fn take_relop(&mut self) -> Option<RelOp> {
+ if let Some((first, rest)) = self.0.split_first() {
+ if let Ok(relop) = first.syntax.as_str().try_into() {
+ self.0 = rest;
+ return Some(relop);
+ }
+ }
+ None
+ }
+ fn advance(&mut self) -> &MacroToken {
+ let (first, rest) = self.0.split_first().unwrap();
+ self.0 = rest;
+ first
+ }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+enum TokenClass {
+ /// No space before or after (new-line after).
+ EndCommand,
+
+ /// Space on both sides.
+ BinaryOperator,
+
+ /// Space afterward.
+ Comma,
+
+ /// Don't need spaces except sequentially.
+ Id,
+
+ /// Don't need spaces except sequentially.
+ Punct,
+}
+
+impl TokenClass {
+ fn needs_space(prev: Self, next: Self) -> bool {
+ match (prev, next) {
+ // Don't need a space before or after the end of a command. (A
+ // new-line is needed afterward as a special case.)
+ (Self::EndCommand, _) | (_, Self::EndCommand) => false,
+
+ // Binary operators always have a space on both sides.
+ (Self::BinaryOperator, _) | (_, Self::BinaryOperator) => true,
+
+ // A comma always has a space afterward.
+ (Self::Comma, _) => true,
+
+ // Otherwise, `prev` is `Self::BinaryOperator` or `Self::Punct`,
+ // which only need a space if there are two or them in a row.
+ _ => prev == next,
+ }
+ }
+}
+
+impl From<&Token> for TokenClass {
+ fn from(source: &Token) -> Self {
+ match source {
+ Token::End => Self::Punct,
+ Token::Id(_) | Token::Number(_) | Token::String(_) => Self::Id,
+ Token::EndCommand => Self::EndCommand,
+ Token::Punct(punct) => match punct {
+ Punct::LParen
+ | Punct::RParen
+ | Punct::LSquare
+ | Punct::RSquare
+ | Punct::LCurly
+ | Punct::RCurly => Self::Punct,
+
+ Punct::Plus
+ | Punct::Dash
+ | Punct::Asterisk
+ | Punct::Slash
+ | Punct::Equals
+ | Punct::Colon
+ | Punct::And
+ | Punct::Or
+ | Punct::Not
+ | Punct::Eq
+ | Punct::Ge
+ | Punct::Gt
+ | Punct::Le
+ | Punct::Lt
+ | Punct::Ne
+ | Punct::All
+ | Punct::By
+ | Punct::To
+ | Punct::With
+ | Punct::Exp
+ | Punct::Bang
+ | Punct::Percent
+ | Punct::Question
+ | Punct::Backtick
+ | Punct::Dot
+ | Punct::Underscore
+ | Punct::BangAsterisk => Self::BinaryOperator,
+
+ Punct::Comma | Punct::Semicolon => Self::Comma,
+ },
+ }
+ }
+}
+
+fn macro_tokens_to_syntax(input: &[MacroToken], output: &mut String) {
+ for (i, token) in input.iter().enumerate() {
+ if i > 0 {
+ let prev = &input[i].token;
+ let next = &token.token;
+ if let Token::EndCommand = prev {
+ output.push('\n');
+ } else {
+ let prev_class: TokenClass = prev.into();
+ let next_class: TokenClass = next.into();
+ if TokenClass::needs_space(prev_class, next_class) {
+ output.push(' ')
+ }
+ }
+ output.push_str(&token.syntax);
+ }
+ }
+}
+
+trait MacroId {
+ fn macro_id(&self) -> Option<&Identifier>;
+}
+
+impl MacroId for Token {
+ fn macro_id(&self) -> Option<&Identifier> {
+ let id = self.id()?;
+ id.0.starts_with('!').then_some(id)
+ }
+}
+
+enum RelOp {
+ Eq,
+ Ne,
+ Lt,
+ Gt,
+ Le,
+ Ge,
+}
+
+impl TryFrom<&str> for RelOp {
+ type Error = ();
+
+ fn try_from(source: &str) -> Result<Self, Self::Error> {
+ match source {
+ "=" => Ok(Self::Eq),
+ "~=" | "<>" => Ok(Self::Ne),
+ "<" => Ok(Self::Lt),
+ ">" => Ok(Self::Gt),
+ "<=" => Ok(Self::Le),
+ ">=" => Ok(Self::Ge),
+ _ if source.len() == 3 && source.as_bytes()[0] == b'!' => match (
+ source.as_bytes()[0].to_ascii_uppercase(),
+ source.as_bytes()[1].to_ascii_uppercase(),
+ ) {
+ (b'E', b'Q') => Ok(Self::Eq),
+ (b'N', b'E') => Ok(Self::Ne),
+ (b'L', b'T') => Ok(Self::Lt),
+ (b'L', b'E') => Ok(Self::Le),
+ (b'G', b'T') => Ok(Self::Gt),
+ (b'G', b'E') => Ok(Self::Ge),
+ _ => Err(()),
+ },
+ _ => Err(()),
+ }
+ }
+}
+
+impl RelOp {
+ fn evaluate(&self, cmp: Ordering) -> bool {
+ match self {
+ RelOp::Eq => cmp == Ordering::Equal,
+ RelOp::Ne => cmp != Ordering::Equal,
+ RelOp::Lt => cmp == Ordering::Less,
+ RelOp::Gt => cmp == Ordering::Greater,
+ RelOp::Le => cmp != Ordering::Greater,
+ RelOp::Ge => cmp != Ordering::Less,
+ }
+ }
+}
+
type MacroSet = HashMap<UniCase<String>, Macro>;
-pub enum MacroCallState {
+enum ParserState {
/// Accumulating tokens toward the end of any type of argument.
Arg,
}
/// Macro call parser FSM.
-pub struct MacroCall<'a> {
+pub struct Parser<'a> {
macros: &'a MacroSet,
macro_: &'a Macro,
- state: MacroCallState,
- args: Box<[Option<Vec<BodyToken>>]>,
+ state: ParserState,
+ args: Box<[Option<Vec<MacroToken>>]>,
arg_index: usize,
/// Length of macro call so far.
n_tokens: usize,
}
-impl<'a> MacroCall<'a> {
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum ParseStatus {
+ Complete,
+ Incomplete,
+}
+
+impl<'a> Parser<'a> {
pub fn new(macros: &'a MacroSet, token: &Token) -> Option<Self> {
- if macros.is_empty() {
- return None;
- }
- let macro_name = match token {
- Token::Id(s) => s.clone(),
- _ => return None,
- };
- // XXX Unicase::new() is very expensive. We probably need to define our
- // own Unicase-alike that has a proper Borrow<> implementation.
- let Some(macro_) = macros.get(&UniCase::new(macro_name)) else {
- return None;
- };
+ let macro_ = macros.get(&token.id()?.0)?;
Some(Self {
macros,
macro_,
})
}
- fn finished(&mut self) -> Option<usize> {
- self.state = MacroCallState::Finished;
+ fn finished(&mut self) {
+ self.state = ParserState::Finished;
for (i, arg) in self.args.iter_mut().enumerate() {
if arg.is_none() {
*arg = Some(self.macro_.parameters[i].default.clone());
}
}
- Some(self.n_tokens)
+ self.state = ParserState::Finished;
}
- fn next_arg(&mut self) -> Option<usize> {
+ fn next_arg(&mut self) {
if self.macro_.parameters.is_empty() {
self.finished()
} else {
} else {
let param = &self.macro_.parameters[self.arg_index];
self.state = if !param.is_positional() {
- MacroCallState::Keyword
+ ParserState::Keyword
} else if let ValueType::Enclose(_, _) = param.arg {
- MacroCallState::Enclose
+ ParserState::Enclose
} else {
- MacroCallState::Arg
+ ParserState::Arg
};
- None
}
} else {
if self.args.iter().any(|arg| arg.is_none()) {
- self.state = MacroCallState::Keyword;
- None
+ self.state = ParserState::Keyword;
} else {
- self.finished()
+ self.finished();
}
}
}
}
- fn push_arg(
- &mut self,
- token: &Token,
- syntax: &str,
- error: &impl Fn(MacroError),
- ) -> Option<usize> {
+ fn push_arg(&mut self, token: &Token, syntax: &str, error: &impl Fn(MacroError)) {
let param = &self.macro_.parameters[self.args.len() - 1];
if let Token::EndCommand | Token::End = token {
if let Some(arg) = &self.args[self.arg_index] {
}
}
}
- return self.finished();
+ self.finished();
}
self.n_tokens += 1;
if true
// !macro_expand_arg (&mt->token, mc->me, *argp)
{
- arg.push(BodyToken {
+ arg.push(MacroToken {
token: token.clone(),
syntax: String::from(syntax),
});
}
if next_arg {
self.next_arg()
- } else {
- None
}
}
- fn push_enclose(
- &mut self,
- token: &Token,
- syntax: &str,
- error: &impl Fn(MacroError),
- ) -> Option<usize> {
+ fn push_enclose(&mut self, token: &Token, syntax: &str, error: &impl Fn(MacroError)) {
let param = &self.macro_.parameters[self.arg_index];
let ValueType::Enclose(start, _) = ¶m.arg else {
unreachable!()
if token == start {
self.n_tokens += 1;
self.args[self.arg_index].get_or_insert(Vec::new());
- self.state = MacroCallState::Arg;
- None
+ self.state = ParserState::Arg;
} else if param.is_positional() && matches!(token, Token::End | Token::EndCommand) {
- self.finished()
+ self.finished();
} else {
error(MacroError::UnexpectedToken {
actual: String::from(syntax),
arg: param.name.clone(),
macro_: self.macro_.name.clone(),
});
- self.finished()
+ self.finished();
}
}
- fn push_keyword(
- &mut self,
- token: &Token,
- syntax: &str,
- error: &impl Fn(MacroError),
- ) -> Option<usize> {
- let Token::Id(id) = token else {
+ fn push_keyword(&mut self, token: &Token, _syntax: &str, error: &impl Fn(MacroError)) {
+ let Some(id) = token.id() else {
return self.finished();
};
- let Some(arg_idx) = self
- .macro_
- .parameters
- .iter()
- .position(|param| param.name == UniCase::new(id))
- else {};
+ let Some(arg_index) = self.macro_.find_parameter(id) else {
+ return self.finished();
+ };
+ self.arg_index = arg_index;
+ if self.args[arg_index].is_some() {
+ error(MacroError::DuplicateArg {
+ arg: id.clone(),
+ macro_: self.macro_.name.clone(),
+ });
+ }
+ self.args[arg_index] = Some(Vec::new());
+ }
+
+ fn push_equals(&mut self, token: &Token, syntax: &str, error: &impl Fn(MacroError)) {
+ let param = &self.macro_.parameters[self.arg_index];
+ if let Token::Punct(Punct::Eq) = token {
+ self.n_tokens += 1;
+ self.state = if let ValueType::Enclose(_, _) = param.arg {
+ ParserState::Enclose
+ } else {
+ ParserState::Arg
+ };
+ } else {
+ error(MacroError::UnexpectedToken {
+ actual: syntax.into(),
+ expected: String::from("="),
+ arg: param.name.clone(),
+ macro_: self.macro_.name.clone(),
+ });
+ self.finished()
+ }
}
/// Adds `token`, which has the given `syntax`, to the collection of tokens
/// in `self` that potentially need to be macro expanded.
///
- /// Returns `None` if the macro expander needs more tokens, for macro
+ /// Returns `false` if the macro expander needs more tokens, for macro
/// arguments or to decide whether this is actually a macro invocation. The
/// caller should call `push` again with the next token.
- ///
- /// Returns `Some(n)` if the macro was complete with `n` tokens. The caller
+ ///n
+ /// Returns `true` if the macro was complete with `n` tokens. The caller
/// should call [`Self::expand`] to obtain the expansion. (If `n == 0`,
/// then the tokens did not actually invoke a macro at all and the expansion
/// will be empty.)
token: &Token,
syntax: &str,
error: &impl Fn(MacroError),
- ) -> Option<usize> {
+ ) -> ParseStatus {
match self.state {
- MacroCallState::Arg => self.push_arg(token, syntax, error),
- MacroCallState::Enclose => self.push_enclose(token, syntax, error),
- MacroCallState::Keyword => todo!(),
- MacroCallState::Equals => todo!(),
- MacroCallState::Finished => todo!(),
+ ParserState::Arg => self.push_arg(token, syntax, error),
+ ParserState::Enclose => self.push_enclose(token, syntax, error),
+ ParserState::Keyword => self.push_keyword(token, syntax, error),
+ ParserState::Equals => self.push_equals(token, syntax, error),
+ ParserState::Finished => (),
+ }
+ if let ParserState::Finished = self.state {
+ ParseStatus::Complete
+ } else {
+ ParseStatus::Incomplete
}
}
+
+ pub fn finish(self) -> Call<'a> {
+ let ParserState::Finished = self.state else {
+ panic!()
+ };
+ Call(self)
+ }
}
+
+/// Expansion stack entry.
+struct Frame {
+ /// A macro name or `!IF`, `!DO`, etc.
+ name: Option<Identifier>,
+
+ /// Source location, if available.
+ location: Option<Location>,
+}
+
+struct Expander<'a> {
+ /// Macros to expand recursively.
+ macros: &'a MacroSet,
+
+ /// Error reporting callback.
+ error: &'a Box<dyn Fn(MacroError) + 'a>,
+
+ /// Tokenization mode.
+ mode: Mode,
+
+ /// Remaining nesting levels.
+ nesting_countdown: usize,
+
+ /// Stack for error reporting.
+ stack: Vec<Frame>,
+
+ // May macro calls be expanded?
+ expand: Option<&'a bool>,
+
+ /// Variables from `!DO` and `!LET`.
+ vars: &'a BTreeMap<Identifier, String>,
+
+ // Only set if inside a `!DO` loop. If true, break out of the loop.
+ break_: Option<&'a mut bool>,
+
+ /// Only set if expanding a macro (and not, say, a macro argument).
+ macro_: Option<&'a Macro>,
+
+ /// Only set if expanding a macro (and not, say, a macro argument).
+ args: Option<&'a [Option<Vec<MacroToken>>]>,
+}
+
+fn bool_to_string(b: bool) -> String {
+ if b {
+ String::from("1")
+ } else {
+ String::from("0")
+ }
+}
+
+impl<'a> Expander<'a> {
+ fn may_expand(&self) -> bool {
+ self.expand.map(|b| *b).unwrap_or(false)
+ }
+
+ fn should_break(&self) -> bool {
+ self.break_.as_ref().map(|b| **b).unwrap_or(false)
+ }
+
+ fn expand(&mut self, input: &[MacroToken], output: &mut Vec<MacroToken>) {
+ if self.nesting_countdown == 0 {
+ (self.error)(MacroError::TooDeep { limit: MNEST });
+ output.extend(input.iter().cloned());
+ } else {
+ let mut i = 0;
+ while i < input.len() && !self.should_break() {
+ let consumed = self.expand__(&input[i..], output);
+ debug_assert!(consumed > 0);
+ i += consumed;
+ }
+ }
+ }
+
+ fn expand_arg(&mut self, param_idx: usize, output: &mut Vec<MacroToken>) {
+ let param = &self.macro_.unwrap().parameters[param_idx];
+ let arg = &self.args.unwrap()[param_idx].as_ref().unwrap();
+ if self.may_expand() && param.expand_value {
+ let vars = BTreeMap::new();
+ let mut stack = take(&mut self.stack);
+ stack.push(Frame {
+ name: Some(param.name.clone()),
+ location: None,
+ });
+ let mut subexpander = Expander {
+ stack,
+ vars: &vars,
+ break_: None,
+ macro_: None,
+ args: None,
+ ..*self
+ };
+ subexpander.expand(&arg, output);
+ self.stack = subexpander.stack;
+ self.stack.pop();
+ } else {
+ output.extend(arg.iter().cloned());
+ }
+ }
+
+ /// Parses one function argument from `input`. Each argument to a macro
+ /// function is one of:
+ ///
+ /// - A quoted string or other single literal token.
+ ///
+ /// - An argument to the macro being expanded, e.g. `!1` or a named
+ /// argument.
+ ///
+ /// - `!*`.
+ ///
+ /// - A function invocation.
+ ///
+ /// Each function invocation yields a character sequence to be turned into a
+ /// sequence of tokens. The case where that character sequence is a single
+ /// quoted string is an important special case.
+ fn parse_function_arg(&mut self, input: &mut MacroTokens) -> Option<String> {
+ if let Some(macro_) = self.macro_ {
+ match &input.0.get(0)?.token {
+ Token::Id(id) if id.0.starts_with('!') => {
+ if let Some(param_idx) = macro_.find_parameter(id) {
+ let mut s = String::new();
+ macro_tokens_to_syntax(
+ self.args.unwrap()[param_idx].as_ref().unwrap(),
+ &mut s,
+ );
+ input.advance();
+ return Some(s);
+ }
+ if let Some(value) = self.vars.get(id) {
+ return Some(value.clone());
+ }
+
+ todo!() // expand macro function
+ }
+ Token::Punct(Punct::BangAsterisk) => {
+ let mut arg = String::new();
+ for i in 0..macro_.parameters.len() {
+ if !macro_.parameters[i].is_positional() {
+ break;
+ }
+ if i > 0 {
+ arg.push(' ')
+ }
+ macro_tokens_to_syntax(self.args.unwrap()[i].as_ref().unwrap(), &mut arg);
+ }
+ input.advance();
+ return Some(arg);
+ }
+ _ => (),
+ }
+ }
+ Some(input.advance().syntax.clone())
+ }
+
+ fn evaluate_literal(&mut self, input: &mut MacroTokens) -> Option<String> {
+ if input.match_("(") {
+ let value = self.evaluate_or(input)?;
+ if input.match_(")") {
+ Some(value)
+ } else {
+ (self.error)(MacroError::ExpectingRParen);
+ None
+ }
+ } else if input.match_(")") {
+ (self.error)(MacroError::ExpectingLiteral);
+ None
+ } else {
+ Some(unquote_string(self.parse_function_arg(input)?, self.mode))
+ }
+ }
+
+ fn evaluate_relational(&mut self, input: &mut MacroTokens) -> Option<String> {
+ let lhs = self.evaluate_literal(input)?;
+ let Some(relop) = input.take_relop() else {
+ return Some(lhs);
+ };
+ let rhs = self.evaluate_literal(input)?;
+ let cmp = unquote_string(lhs, self.mode).cmp(&unquote_string(rhs, self.mode));
+ Some(bool_to_string(relop.evaluate(cmp)))
+ }
+
+ fn evaluate_not(&mut self, input: &mut MacroTokens) -> Option<String> {
+ let mut negations = 0;
+ while input.match_("!AND") || input.match_("&") {
+ negations += 1;
+ }
+
+ let operand = self.evaluate_relational(input)?;
+ if negations == 0 {
+ return Some(operand);
+ }
+
+ let mut b = operand != "0";
+ if negations.is_odd() {
+ b = !b;
+ }
+ Some(bool_to_string(b))
+ }
+
+ fn evaluate_and(&mut self, input: &mut MacroTokens) -> Option<String> {
+ let mut lhs = self.evaluate_not(input)?;
+ while input.match_("!AND") || input.match_("&") {
+ let rhs = self.evaluate_not(input)?;
+ lhs = bool_to_string(lhs != "0" && rhs != "0");
+ }
+ Some(lhs)
+ }
+ fn evaluate_or(&mut self, input: &mut MacroTokens) -> Option<String> {
+ let mut lhs = self.evaluate_and(input)?;
+ while input.match_("!OR") || input.match_("|") {
+ let rhs = self.evaluate_and(input)?;
+ lhs = bool_to_string(lhs != "0" || rhs != "0");
+ }
+ Some(lhs)
+ }
+
+ fn evaluate_expression(&mut self, input: &[MacroToken]) -> Option<String> {
+ let mut tokens = MacroTokens(input);
+ self.evaluate_or(&mut tokens)
+ }
+
+ fn expand_if(&mut self, input: &[MacroToken], output: &mut Vec<MacroToken>) -> usize {
+ self.evaluate_expression(input);
+ todo!()
+ }
+
+ fn expand__(&mut self, input: &[MacroToken], output: &mut Vec<MacroToken>) -> usize {
+ // Recursive macro calls.
+ if self.may_expand() {
+ if let Some(call) = Call::for_tokens(self.macros, input, &self.error) {
+ let vars = BTreeMap::new();
+ let mut stack = take(&mut self.stack);
+ stack.push(Frame {
+ name: Some(call.0.macro_.name.clone()),
+ location: Some(call.0.macro_.location.clone()),
+ });
+ let mut subexpander = Expander {
+ break_: None,
+ vars: &vars,
+ nesting_countdown: self.nesting_countdown.saturating_sub(1),
+ stack,
+ ..*self
+ };
+ subexpander.expand(input, output);
+ self.stack = subexpander.stack;
+ self.stack.pop();
+ return call.0.n_tokens;
+ }
+ }
+
+ // Only identifiers beginning with `!` receive further processing.
+ let id = match &input[0].token {
+ Token::Id(id) if id.0.starts_with('!') => id,
+ Token::Punct(Punct::BangAsterisk) => {
+ if let Some(macro_) = self.macro_ {
+ for i in 0..macro_.parameters.len() {
+ self.expand_arg(i, output);
+ }
+ } else {
+ (self.error)(MacroError::InvalidBangAsterisk);
+ }
+ return 1;
+ }
+ _ => {
+ output.push(input[0].clone());
+ return 1;
+ }
+ };
+
+ // Macro arguments.
+ if let Some(macro_) = self.macro_ {
+ if let Some(param_idx) = macro_.find_parameter(id) {
+ self.expand_arg(param_idx, output);
+ return 1;
+ }
+ }
+
+ // Variables set by `!DO` or `!LET`.
+ if let Some(value) = self.vars.get(id) {
+ tokenize_string(value.as_str(), self.mode, output, &self.error);
+ return 1;
+ }
+
+ // XXX Macro functions.
+ if id == "!IF" {
+ let n = self.expand_if(&input[1..], output);
+ if n > 0 {
+ return n;
+ }
+ }
+
+ todo!()
+ }
+}
+
+pub struct Call<'a>(Parser<'a>);
+
+impl<'a> Call<'a> {
+ pub fn for_tokens<F>(macros: &'a MacroSet, tokens: &[MacroToken], error: &F) -> Option<Self>
+ where
+ F: Fn(MacroError),
+ {
+ let mut parser = Parser::new(macros, &tokens.get(0)?.token)?;
+ for token in tokens[1..].iter().chain(&[MacroToken {
+ token: Token::EndCommand,
+ syntax: String::from(""),
+ }]) {
+ if parser.push(&token.token, &token.syntax, error) == ParseStatus::Complete {
+ return Some(parser.finish());
+ }
+ }
+ return None;
+ }
+
+ pub fn expand<F>(&self, mode: Mode, call_loc: Location, output: &mut Vec<MacroToken>, error: F)
+ where
+ F: Fn(MacroError) + 'a,
+ {
+ let error: Box<dyn Fn(MacroError) + 'a> = Box::new(error);
+ let vars = BTreeMap::new();
+ let mut me = Expander {
+ macros: self.0.macros,
+ error: &error,
+ macro_: Some(self.0.macro_),
+ args: Some(&self.0.args),
+ mode,
+ nesting_countdown: MNEST,
+ stack: vec![
+ Frame {
+ name: None,
+ location: Some(call_loc),
+ },
+ Frame {
+ name: Some(self.0.macro_.name.clone()),
+ location: Some(self.0.macro_.location.clone()),
+ },
+ ],
+ vars: &vars,
+ break_: None,
+ expand: None,
+ };
+ me.expand(&self.0.macro_.body, output);
+ }
+}
+
+const MNEST: usize = 50;