integer::ToInteger,
lex::{
command_name::CommandMatcher,
- lexer::Lexer,
+ lexer::{NewLexer},
token::{Punct, Token},
},
message::Diagnostic,
COMMANDS.get_or_init(|| new_commands()).as_slice()
}
-fn parse_command_word(lexer: &mut Lexer, s: &mut String, n: isize) -> bool {
+fn parse_command_word(lexer: &mut NewLexer, s: &mut String, n: isize) -> bool {
let separator = match s.chars().next_back() {
Some(c) if c != '-' => " ",
_ => "",
}
fn parse_command_name(
- lexer: &mut Lexer,
+ lexer: &mut NewLexer,
error: &Box<dyn Fn(Diagnostic)>,
) -> Result<(&'static Command, isize), ()> {
let mut s = String::new();
pub fn end_of_command(context: &Context) -> Result<Success, ()> {
match context.lexer.token() {
- Token::EndCommand | Token::End => Ok(Success::Success),
+ Token::EndCommand => Ok(Success::Success),
_ => {
context.error(
context
}
}
-fn parse_in_state(lexer: &mut Lexer, error: &Box<dyn Fn(Diagnostic)>, _state: State) {
+fn parse_in_state(mut lexer: NewLexer, error: &Box<dyn Fn(Diagnostic)>, _state: State) {
println!("{}:{}", file!(), line!());
match lexer.token() {
- Token::End | Token::EndCommand => (),
+ Token::EndCommand => (),
_ => {
- println!("{}:{}", file!(), line!());
- if let Ok((command, n_tokens)) = parse_command_name(lexer, error) {
+ if let Ok((command, n_tokens)) = parse_command_name(&mut lexer, error) {
for _ in 0..n_tokens {
lexer.get();
}
- println!("{}:{}", file!(), line!());
let mut context = Context {
error,
lexer,
command_name: Some(command.name),
};
- println!("{}:{}", file!(), line!());
(command.run)(&mut context);
- println!("{}:{}", file!(), line!());
let _ = end_of_command(&context);
- println!("{}:{}", file!(), line!());
}
- println!("{}:{}", file!(), line!());
- lexer.discard_rest_of_command();
- println!("{}:{}", file!(), line!());
}
}
- println!("{}:{}", file!(), line!());
- while let Token::EndCommand = lexer.token() {
- lexer.get();
- }
- println!("{}:{}", file!(), line!());
}
-pub fn parse(lexer: &mut Lexer, error: &Box<dyn Fn(Diagnostic)>) {
+pub fn parse_command(lexer: NewLexer, error: &Box<dyn Fn(Diagnostic)>) {
parse_in_state(lexer, error, State::Initial)
}
pub struct Context<'a> {
error: &'a Box<dyn Fn(Diagnostic)>,
- lexer: &'a mut Lexer,
+ lexer: NewLexer<'a>,
command_name: Option<&'static str>,
}
use crate::{
- command::parse,
+ command::parse_command,
lex::{
- lexer::{Lexer, Source},
- token::Token,
+ lexer::{Lexer, NewLexer, NewSource},
},
+ macros::MacroSet,
message::Diagnostic,
};
lexer: Lexer::new(Box::new(|location, error| println!("{location}: {error}"))),
}
}
- fn run(&mut self, source: Source) {
- self.lexer.append(source);
- self.lexer.get();
- while self.lexer.token() != &Token::End {
- println!("{}:{}", file!(), line!());
+ fn run(&mut self, mut source: NewSource) {
+ let macros = MacroSet::new();
+ while let Some(tokens) = source.read_command(¯os) {
let error: Box<dyn Fn(Diagnostic)> = Box::new(|diagnostic| {
println!("{diagnostic}");
});
- parse(&mut self.lexer, &error);
+ parse_command(NewLexer::new(&tokens), &error);
}
}
}
mod tests {
use encoding_rs::UTF_8;
- use crate::lex::lexer::{Source, SourceFile};
+ use crate::lex::lexer::{NewSource, SourceFile};
use super::Engine;
#[test]
fn test_echo() {
let mut engine = Engine::new();
- engine.run(Source::new_default(SourceFile::for_file_contents(
+ engine.run(NewSource::new_default(SourceFile::for_file_contents(
"ECHO 'hi there'.\nECHO 'bye there'.\n".to_string(),
Some("test.sps".to_string()),
UTF_8,
match scan_token {
None => false,
Some(ScanToken::Token(token)) => {
- let token = if let Token::End = token {
- Token::EndCommand
- } else {
- token
- };
self.pp.push_back(LexToken {
token,
pos,
while index >= self.parse.len() {
if let Some(token) = self.parse.last() {
match token.token {
- Token::End => return &Token::End,
Token::EndCommand => return &Token::EndCommand,
_ => (),
}
};
if !self.source.get_parse(&context) {
if !self.pop_stack() {
- return &Token::End;
+ return &Token::EndCommand;
}
}
}
} else {
self.source = Source::default();
self.source.parse.push(LexToken {
- token: Token::End,
+ token: Token::EndCommand,
pos: 0..0,
macro_rep: None,
});
/// Advances past any tokens up to [Token::EndCommand] or [Token::End].
pub fn discard_rest_of_command(&mut self) {
- while !matches!(self.token(), Token::EndCommand | Token::End) {
+ while !matches!(self.token(), Token::EndCommand) {
self.get();
}
}
pub fn at_end(&self) -> bool {
match self.source.token() {
- Token::End | Token::EndCommand => true,
+ Token::EndCommand => true,
_ => false,
}
}
TokenError(#[from] ScanError),
}
+/*
#[cfg(test)]
mod tests {
use encoding_rs::UTF_8;
use crate::lex::token::Token;
- use super::{Lexer, Source, SourceFile};
+ use super::{Lexer, NewLexer, Source, SourceFile};
#[test]
fn test() {
- let mut lexer = Lexer::new(Box::new(|location, error| println!("{location}: {error}")));
+ let mut lexer = NewLexer::new(Box::new(|location, error| println!("{location}: {error}")));
lexer.include(Source::new_default(SourceFile::for_string(
String::from(
r#"#! /usr/local/bin/pspp
),
UTF_8,
)));
+ while let Some(tokens) = lexer.read_command() {
+
loop {
lexer.get();
let token = lexer.token();
}
}
}
-
-struct Tokens {
+*/
+pub struct Tokens {
file: Arc<SourceFile>,
tokens: Vec<LexToken>,
}
}
}
-struct NewLexer<'a> {
+impl Tokens {
+ /// If the tokens in `ofs` contains a macro call, this returns the raw
+ /// syntax for the macro call (not for the expansion) and for any other
+ /// tokens included in that range. The syntax is encoded in UTF-8 and in
+ /// the original form supplied to the lexer so that, for example, it may
+ /// include comments, spaces, and new-lines if it spans multiple tokens.
+ ///
+ /// Returns `None` if the token range doesn't include a macro call.
+ fn get_macro_call(&self, ofs: RangeInclusive<usize>) -> Option<&str> {
+ if self
+ .tokens
+ .get(ofs.clone())
+ .unwrap_or_default()
+ .iter()
+ .all(|token| token.macro_rep.is_none())
+ {
+ return None;
+ }
+
+ let token0 = &self.tokens[*ofs.start()];
+ let token1 = &self.tokens[*ofs.end()];
+ Some(&self.file.buffer[token0.pos.start..token1.pos.end])
+ }
+
+ fn ofs_location(&self, range: RangeInclusive<usize>) -> Location {
+ if *range.start() <= *range.end() && *range.end() < self.tokens.len() {
+ self.file
+ .token_location(&self.tokens[*range.start()]..=&self.tokens[*range.end()])
+ } else {
+ Location {
+ file_name: self.file.file_name.clone(),
+ span: None,
+ omit_underlines: false,
+ }
+ }
+ }
+
+ pub fn diagnostic(
+ &self,
+ severity: Severity,
+ ofs: RangeInclusive<usize>,
+ text: String,
+ ) -> Diagnostic {
+ let mut s = String::new();
+ if let Some(call) = self.get_macro_call(ofs.clone()) {
+ write!(&mut s, "In syntax expanded from `{}`: ", ellipsize(call)).unwrap();
+ }
+
+ if !text.is_empty() {
+ s.push_str(&text);
+ } else {
+ s.push_str("Syntax error.");
+ }
+
+ if !s.ends_with('.') {
+ s.push('.');
+ }
+
+ let location = self.ofs_location(ofs);
+ let mut source = Vec::new();
+ if let Some(Range {
+ start: Point { line: l0, .. },
+ end: Point { line: l1, .. },
+ }) = location.span
+ {
+ let lines = if l1 - l0 > 3 {
+ vec![l0, l0 + 1, l1]
+ } else {
+ (l0..=l1).collect()
+ };
+ for line_number in lines {
+ source.push((line_number, self.file.get_line(line_number).to_string()));
+ }
+ }
+
+ Diagnostic {
+ category: Category::Syntax,
+ severity,
+ location,
+ source,
+ stack: Vec::new(),
+ command_name: None, // XXX
+ text: s,
+ }
+ }
+}
+
+pub struct NewLexer<'a> {
backing: &'a Tokens,
tokens: &'a [LexToken],
+ start: usize,
pos: usize,
}
Self {
backing,
tokens: backing.tokens.as_slice(),
+ start: 0,
pos: 0,
}
}
pub fn token(&self) -> &Token {
self.tokens
.get(self.pos)
- .map_or(&Token::End, |token| &token.token)
+ .map_or(&Token::EndCommand, |token| &token.token)
}
pub fn next(&self, ofs: isize) -> &Token {
ofs.checked_add(self.pos as isize)
.and_then(|index| usize::try_from(index).ok())
.and_then(|index| self.tokens.get(index))
- .map_or(&Token::End, |token| &token.token)
+ .map_or(&Token::EndCommand, |token| &token.token)
+ }
+
+/*
+ pub fn force_string(&mut self) -> Result<String, ()> {
+ if let Token::String(s) = self.token() {
+ let s = s.clone();
+ self.
+ }
+ }*/
+
+ pub fn error<S>(&self, text: S) -> Diagnostic
+ where
+ S: ToString,
+ {
+ let abs_pos = self.start + self.pos;
+ self.backing
+ .diagnostic(Severity::Error, abs_pos..=abs_pos, text.to_string())
}
}
-struct NewSource {
+pub struct NewSource {
file: Arc<SourceFile>,
segmenter: Segmenter,
seg_pos: usize,
use crate::macros::MacroSet;
- use super::{NewLexer, NewSource, Source, SourceFile};
+ use super::{NewSource, SourceFile};
#[test]
fn test() {
}
let Some((seg_len, seg_type)) = self.segmenter.push(self.input, true).unwrap() else {
- if !self.eof {
- self.eof = true;
- self.tokens.push_back(Token::End);
- }
+ self.eof = true;
return self.merge(true).unwrap();
};
let (s, rest) = self.input.split_at(seg_len);
fn print_token(token: &Token) {
match token {
- Token::End => print!("Token::End"),
Token::Id(s) => print!("Token::Id(String::from({s:?}))"),
Token::Number(number) => print!("Token::Number({number:?})"),
Token::String(s) => print!("Token::String(String::from({s:?}))"),
ScanToken::Token(Token::EndCommand),
ScanToken::Error(ScanError::UnexpectedChar('�')),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::End),
],
);
}
ScanToken::Token(Token::Id(Identifier::new("and.").unwrap())),
ScanToken::Token(Token::Punct(Punct::With)),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::End),
],
);
}
ScanToken::Token(Token::Punct(Punct::LCurly)),
ScanToken::Token(Token::Punct(Punct::RCurly)),
ScanToken::Token(Token::Punct(Punct::Not)),
- ScanToken::Token(Token::End),
],
);
}
ScanToken::Token(Token::Id(Identifier::new("e1").unwrap())),
ScanToken::Error(ScanError::ExpectedExponent(String::from("1e+"))),
ScanToken::Error(ScanError::ExpectedExponent(String::from("1e-"))),
- ScanToken::Token(Token::End),
],
);
}
ScanToken::Error(ScanError::ExpectedExponent(String::from("-1e-"))),
ScanToken::Token(Token::Number(-1.0)),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::End),
],
);
}
ScanToken::Token(Token::String(String::from("ABお"))),
ScanToken::Token(Token::String(String::from("�あいうえお"))),
ScanToken::Token(Token::String(String::from("abc�えxyz"))),
- ScanToken::Token(Token::End),
],
);
}
ScanToken::Token(Token::Id(Identifier::new("bin").unwrap())),
ScanToken::Token(Token::Punct(Punct::Slash)),
ScanToken::Token(Token::Id(Identifier::new("pspp").unwrap())),
- ScanToken::Token(Token::End),
],
);
}
ScanToken::Token(Token::Id(Identifier::new("command").unwrap())),
ScanToken::Token(Token::EndCommand),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::End),
],
);
}
ScanToken::Token(Token::String(String::from("second paragraph."))),
ScanToken::Token(Token::EndCommand),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::End),
],
);
}
ScanToken::Token(Token::Id(Identifier::new("lab").unwrap())),
ScanToken::Token(Token::String(String::from("not quoted here either"))),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::End),
],
);
}
ScanToken::Token(Token::Id(Identifier::new("end").unwrap())),
ScanToken::Token(Token::Id(Identifier::new("data").unwrap())),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::End),
],
);
}
ScanToken::Token(Token::Id(Identifier::new("end").unwrap())),
ScanToken::Token(Token::Id(Identifier::new("repeat").unwrap())),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::End),
],
);
}
ScanToken::Token(Token::String(String::from(" inner command"))),
ScanToken::Token(Token::Id(Identifier::new("end").unwrap())),
ScanToken::Token(Token::Id(Identifier::new("repeat").unwrap())),
- ScanToken::Token(Token::End),
],
);
}
ScanToken::Token(Token::Id(Identifier::new("fifth").unwrap())),
ScanToken::Token(Token::Id(Identifier::new("command").unwrap())),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::End),
],
);
}
ScanToken::Token(Token::String(String::from("var1 var2 var3"))),
ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::End),
],
);
}
ScanToken::Token(Token::String(String::from(" var1 var2 var3"))),
ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::End),
],
);
}
ScanToken::Token(Token::String(String::from("var1 var2 var3"))),
ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::End),
],
);
}
ScanToken::Token(Token::String(String::from("var1 var2 var3"))),
ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::End),
],
);
}
ScanToken::Token(Token::Punct(Punct::RParen)),
ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::End),
],
);
}
ScanToken::Token(Token::String(String::from(""))),
ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::End),
],
);
}
ScanToken::Token(Token::Punct(Punct::RParen)),
ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::End),
],
);
}
ScanToken::Token(Token::Punct(Punct::RParen)),
ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::End),
],
);
}
ScanToken::Token(Token::String(String::from("content 2"))),
ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::End),
],
);
}
ScanToken::Token(Token::Id(Identifier::new("x").unwrap())),
ScanToken::Token(Token::Number(1.0)),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::End),
],
);
}
ScanToken::Token(Token::Id(Identifier::new("x").unwrap())),
ScanToken::Token(Token::Number(1.0)),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::End),
],
);
}
ScanToken::Token(Token::Id(Identifier::new("x").unwrap())),
ScanToken::Token(Token::Number(1.0)),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::End),
],
);
}
ScanToken::Token(Token::Id(Identifier::new("x").unwrap())),
ScanToken::Token(Token::Number(1.0)),
ScanToken::Token(Token::EndCommand),
- ScanToken::Token(Token::End),
],
);
}
ScanToken::Token(Token::Punct(Punct::RParen)),
ScanToken::Token(Token::String(String::from("content line 1"))),
ScanToken::Token(Token::String(String::from("content line 2"))),
- ScanToken::Token(Token::End),
],
);
}
#[derive(Clone, Debug, PartialEq)]
pub enum Token {
- /// End of input.
- End,
-
/// Identifier.
Id(Identifier),
impl Display for Token {
fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
match self {
- Token::End => Ok(()),
Token::Id(s) => write!(f, "{s}"),
Token::Number(number) => {
if number.is_sign_negative() {
impl From<&Token> for TokenClass {
fn from(source: &Token) -> Self {
match source {
- Token::End => Self::Punct,
Token::Id(_) | Token::Number(_) | Token::String(_) => Self::Id,
Token::EndCommand => Self::EndCommand,
Token::Punct(punct) => match punct {
fn push_arg(&mut self, token: &Token, syntax: &str, error: &impl Fn(MacroError)) {
let param = &self.macro_.parameters[self.args.len() - 1];
- if let Token::EndCommand | Token::End = token {
+ if let Token::EndCommand = token {
if let Some(arg) = &self.args[self.arg_index] {
let param = &self.macro_.parameters[self.args.len() - 1];
self.n_tokens += 1;
self.args[self.arg_index].get_or_insert(Vec::new());
self.state = ParserState::Arg;
- } else if param.is_positional() && matches!(token, Token::End | Token::EndCommand) {
+ } else if param.is_positional() && matches!(token, Token::EndCommand) {
self.finished();
} else {
error(MacroError::UnexpectedToken {